From 98c2048d30d0a6e324875c98a4f138e7ee92734a Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Sun, 31 Mar 2024 22:12:03 +0200
Subject: [PATCH 01/54] try to improve Storage S3 selection glob performance

---
 src/Storages/StorageS3.cpp | 122 +++++++++++++++++++++++++++++--------
 1 file changed, 98 insertions(+), 24 deletions(-)
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 2d3aef312bf..cee9f11af95 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -210,32 +210,36 @@ public:
         if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
             throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name");
 
-        const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{"));
-
-        /// We don't have to list bucket, because there is no asterisks.
-        if (key_prefix.size() == globbed_uri.key.size())
+        for (const auto & key : expandSelectionGlob(globbed_uri.key))
         {
-            buffer.emplace_back(std::make_shared<KeyWithInfo>(globbed_uri.key, std::nullopt));
-            buffer_iter = buffer.begin();
-            is_finished = true;
-            return;
+            const String key_prefix = key.substr(0, key.find_first_of("*?{"));
+
+            /// We don't have to list bucket, because there is no asterisks.
+            if (key_prefix.size() == key.size())
+            {
+                buffer.emplace_back(std::make_shared<KeyWithInfo>(key, std::nullopt));
+                buffer_iter = buffer.begin();
+                is_finished = true;
+                return;
+            }
+
+            request.SetBucket(globbed_uri.bucket);
+            request.SetPrefix(key_prefix);
+            request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
+
+            outcome_future = listObjectsAsync();
+
+            matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(key));
+            if (!matcher->ok())
+                throw Exception(
+                    ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", key, matcher->error());
+
+            recursive = key == "/**";
+
+            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+            updateInternalBufferAssumeLocked();
         }
-
-        request.SetBucket(globbed_uri.bucket);
-        request.SetPrefix(key_prefix);
-        request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
-
-        outcome_future = listObjectsAsync();
-
-        matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
-        if (!matcher->ok())
-            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
-                "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
-
-        recursive = globbed_uri.key == "/**" ? true : false;
-
-        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-        fillInternalBufferAssumeLocked();
+        buffer_iter = buffer.begin();
     }
 
     KeyWithInfoPtr next(size_t)
@@ -301,6 +305,76 @@ private:
         } while (true);
     }
 
+    void updateInternalBufferAssumeLocked()
+    {
+        assert(outcome_future.valid());
+        auto outcome = outcome_future.get();
+
+        if (!outcome.IsSuccess())
+        {
+            throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
+                              quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
+                              backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
+        }
+
+        const auto & result_batch = outcome.GetResult().GetContents();
+
+        /// It returns false when all objects were returned
+        is_finished = !outcome.GetResult().GetIsTruncated();
+
+        if (!is_finished)
+        {
+            /// Even if task is finished the thread may be not freed in pool.
+            /// So wait until it will be freed before scheduling a new task.
+            list_objects_pool.wait();
+            outcome_future = listObjectsAsync();
+        }
+
+        if (request_settings.throw_on_zero_files_match && result_batch.empty())
+            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix());
+
+        KeysWithInfo temp_buffer;
+        temp_buffer.reserve(result_batch.size());
+
+        for (const auto & row : result_batch)
+        {
+            String key = row.GetKey();
+            if (recursive || re2::RE2::FullMatch(key, *matcher))
+            {
+                S3::ObjectInfo info =
+                    {
+                        .size = size_t(row.GetSize()),
+                        .last_modification_time = row.GetLastModified().Millis() / 1000,
+                    };
+                temp_buffer.emplace_back(std::make_shared<KeyWithInfo>(std::move(key), std::move(info)));
+            }
+        }
+
+        if (temp_buffer.empty())
+            return;
+
+        if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(temp_buffer.size());
+            for (const auto & key_with_info : temp_buffer)
+                paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key);
+
+            VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext());
+        }
+
+        buffer.insert(buffer.end(), temp_buffer.begin(), temp_buffer.end());
+
+        if (read_keys)
+            read_keys->insert(read_keys->end(), temp_buffer.begin(), temp_buffer.end());
+
+        if (file_progress_callback)
+        {
+            for (const auto & key_with_info : buffer)
+                file_progress_callback(FileProgress(0, key_with_info->info->size));
+        }
+    }
+
     void fillInternalBufferAssumeLocked()
     {
         buffer.clear();

From 73b9ef99f4315e89d6f184d836e01da4345151ba Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Mon, 1 Apr 2024 17:40:40 +0200
Subject: [PATCH 02/54] Revert "try to improve Storage S3 selection glob
 performance"

This reverts commit 9c9421b6897bf4a95346cef52171839ef67bd522.
---
 src/Storages/StorageS3.cpp | 122 ++++++++-----------------------------
 1 file changed, 24 insertions(+), 98 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index cee9f11af95..2d3aef312bf 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -210,36 +210,32 @@ public:
         if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
             throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name");
 
-        for (const auto & key : expandSelectionGlob(globbed_uri.key))
+        const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{"));
+
+        /// We don't have to list bucket, because there is no asterisks.
+        if (key_prefix.size() == globbed_uri.key.size())
         {
-            const String key_prefix = key.substr(0, key.find_first_of("*?{"));
-
-            /// We don't have to list bucket, because there is no asterisks.
-            if (key_prefix.size() == key.size())
-            {
-                buffer.emplace_back(std::make_shared<KeyWithInfo>(key, std::nullopt));
-                buffer_iter = buffer.begin();
-                is_finished = true;
-                return;
-            }
-
-            request.SetBucket(globbed_uri.bucket);
-            request.SetPrefix(key_prefix);
-            request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
-
-            outcome_future = listObjectsAsync();
-
-            matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(key));
-            if (!matcher->ok())
-                throw Exception(
-                    ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", key, matcher->error());
-
-            recursive = key == "/**";
-
-            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-            updateInternalBufferAssumeLocked();
+            buffer.emplace_back(std::make_shared<KeyWithInfo>(globbed_uri.key, std::nullopt));
+            buffer_iter = buffer.begin();
+            is_finished = true;
+            return;
         }
-        buffer_iter = buffer.begin();
+
+        request.SetBucket(globbed_uri.bucket);
+        request.SetPrefix(key_prefix);
+        request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
+
+        outcome_future = listObjectsAsync();
+
+        matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
+        if (!matcher->ok())
+            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
+                "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
+
+        recursive = globbed_uri.key == "/**" ? true : false;
+
+        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+        fillInternalBufferAssumeLocked();
     }
 
     KeyWithInfoPtr next(size_t)
@@ -305,76 +301,6 @@ private:
         } while (true);
     }
 
-    void updateInternalBufferAssumeLocked()
-    {
-        assert(outcome_future.valid());
-        auto outcome = outcome_future.get();
-
-        if (!outcome.IsSuccess())
-        {
-            throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
-                              quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
-                              backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
-        }
-
-        const auto & result_batch = outcome.GetResult().GetContents();
-
-        /// It returns false when all objects were returned
-        is_finished = !outcome.GetResult().GetIsTruncated();
-
-        if (!is_finished)
-        {
-            /// Even if task is finished the thread may be not freed in pool.
-            /// So wait until it will be freed before scheduling a new task.
-            list_objects_pool.wait();
-            outcome_future = listObjectsAsync();
-        }
-
-        if (request_settings.throw_on_zero_files_match && result_batch.empty())
-            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix());
-
-        KeysWithInfo temp_buffer;
-        temp_buffer.reserve(result_batch.size());
-
-        for (const auto & row : result_batch)
-        {
-            String key = row.GetKey();
-            if (recursive || re2::RE2::FullMatch(key, *matcher))
-            {
-                S3::ObjectInfo info =
-                    {
-                        .size = size_t(row.GetSize()),
-                        .last_modification_time = row.GetLastModified().Millis() / 1000,
-                    };
-                temp_buffer.emplace_back(std::make_shared<KeyWithInfo>(std::move(key), std::move(info)));
-            }
-        }
-
-        if (temp_buffer.empty())
-            return;
-
-        if (filter_dag)
-        {
-            std::vector<String> paths;
-            paths.reserve(temp_buffer.size());
-            for (const auto & key_with_info : temp_buffer)
-                paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key);
-
-            VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext());
-        }
-
-        buffer.insert(buffer.end(), temp_buffer.begin(), temp_buffer.end());
-
-        if (read_keys)
-            read_keys->insert(read_keys->end(), temp_buffer.begin(), temp_buffer.end());
-
-        if (file_progress_callback)
-        {
-            for (const auto & key_with_info : buffer)
-                file_progress_callback(FileProgress(0, key_with_info->info->size));
-        }
-    }
-
     void fillInternalBufferAssumeLocked()
     {
         buffer.clear();

From 70da13b9b01d0e9a86b313bdcf165b6c54a4b985 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Mon, 1 Apr 2024 22:34:54 +0200
Subject: [PATCH 03/54] simpler way

---
 src/Storages/StorageS3.cpp | 76 +++++++++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 25 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 2d3aef312bf..844f5362ec2 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -191,7 +191,7 @@ public:
     Impl(
         const S3::Client & client_,
         const S3::URI & globbed_uri_,
-        const ActionsDAG::Node * predicate,
+        const ActionsDAG::Node * predicate_,
         const NamesAndTypesList & virtual_columns_,
         ContextPtr context_,
         KeysWithInfo * read_keys_,
@@ -200,6 +200,7 @@ public:
         : WithContext(context_)
         , client(client_.clone())
         , globbed_uri(globbed_uri_)
+        , predicate(predicate_)
         , virtual_columns(virtual_columns_)
         , read_keys(read_keys_)
         , request_settings(request_settings_)
@@ -210,32 +211,13 @@ public:
         if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
             throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name");
 
-        const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{"));
+        expanded_keys = expandSelectionGlob(globbed_uri.key);
+        expanded_keys_iter = expanded_keys.begin();
 
-        /// We don't have to list bucket, because there is no asterisks.
-        if (key_prefix.size() == globbed_uri.key.size())
-        {
-            buffer.emplace_back(std::make_shared<KeyWithInfo>(globbed_uri.key, std::nullopt));
-            buffer_iter = buffer.begin();
+        bool no_globs_in_key = fillBufferForKey(*expanded_keys_iter);
+        expanded_keys_iter++;
+        if (expanded_keys_iter == expanded_keys.end() && no_globs_in_key)
             is_finished = true;
-            return;
-        }
-
-        request.SetBucket(globbed_uri.bucket);
-        request.SetPrefix(key_prefix);
-        request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
-
-        outcome_future = listObjectsAsync();
-
-        matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
-        if (!matcher->ok())
-            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
-                "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
-
-        recursive = globbed_uri.key == "/**" ? true : false;
-
-        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-        fillInternalBufferAssumeLocked();
     }
 
     KeyWithInfoPtr next(size_t)
@@ -257,6 +239,37 @@ public:
 private:
     using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome;
 
+    bool fillBufferForKey(const std::string & uri_key)
+    {
+        const String key_prefix = uri_key.substr(0, uri_key.find_first_of("*?{"));
+
+        /// We don't have to list bucket, because there is no asterisks.
+        if (key_prefix.size() == uri_key.size())
+        {
+            buffer.clear();
+            buffer.emplace_back(std::make_shared<KeyWithInfo>(uri_key, std::nullopt));
+            buffer_iter = buffer.begin();
+            return true;
+        }
+
+        request.SetBucket(globbed_uri.bucket);
+        request.SetPrefix(key_prefix);
+        request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
+
+        outcome_future = listObjectsAsync();
+
+        matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(uri_key));
+        if (!matcher->ok())
+            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
+                            "Cannot compile regex from glob ({}): {}", uri_key, matcher->error());
+
+        recursive = globbed_uri.key == "/**";
+
+        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+        fillInternalBufferAssumeLocked();
+        return false;
+    }
+
     KeyWithInfoPtr nextAssumeLocked()
     {
         do
@@ -278,6 +291,15 @@ private:
                 return answer;
             }
 
+            if (expanded_keys_iter != expanded_keys.end())
+            {
+                bool no_globs_in_key = fillBufferForKey(*expanded_keys_iter);
+                expanded_keys_iter++;
+                if (expanded_keys_iter == expanded_keys.end() && no_globs_in_key)
+                    is_finished = true;
+                continue;
+            }
+
             if (is_finished)
                 return {};
 
@@ -399,8 +421,12 @@ private:
     KeysWithInfo buffer;
     KeysWithInfo::iterator buffer_iter;
 
+    std::vector<String> expanded_keys;
+    std::vector<String>::iterator expanded_keys_iter;
+
     std::unique_ptr<S3::Client> client;
     S3::URI globbed_uri;
+    const ActionsDAG::Node * predicate;
     ASTPtr query;
     NamesAndTypesList virtual_columns;
     ActionsDAGPtr filter_dag;

From a177fbfd8cb52c64f096797a6c65fdc4dfeb828e Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Tue, 2 Apr 2024 00:05:53 +0200
Subject: [PATCH 04/54] ignore error when one of selection options not exist

---
 src/Storages/StorageS3.cpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 844f5362ec2..09a5ffc86a5 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -283,7 +283,18 @@ private:
                 /// So we get object info lazily here on 'next()' request.
                 if (!answer->info)
                 {
-                    answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings);
+                    try
+                    {
+                        answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings);
+                    }
+                    catch (...)
+                    {
+                        /// if no such file AND there was no `{}` glob -- this is an exception
+                        /// otherwise ignore it, this is acceptable
+                        if (expanded_keys.size() == 1)
+                            throw;
+                        continue;
+                    }
                     if (file_progress_callback)
                         file_progress_callback(FileProgress(0, answer->info->size));
                 }

From 7232bf45768f56c768ac03ed4b34c085bc6f060a Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Tue, 2 Apr 2024 16:12:11 +0200
Subject: [PATCH 05/54] no reuse request

---
 src/Storages/StorageS3.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 09a5ffc86a5..28bfa3c32a9 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -252,6 +252,7 @@ private:
             return true;
         }
 
+        request = S3::ListObjectsV2Request{};
         request.SetBucket(globbed_uri.bucket);
         request.SetPrefix(key_prefix);
         request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));

From 25cab6f0713221e32b2c2fef844e2c2fde77e985 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Wed, 3 Apr 2024 20:57:10 +0200
Subject: [PATCH 06/54] fix schema inference cache (1)

---
 src/Storages/StorageS3.cpp | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 28bfa3c32a9..b19e61762d1 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -217,7 +217,7 @@ public:
         bool no_globs_in_key = fillBufferForKey(*expanded_keys_iter);
         expanded_keys_iter++;
         if (expanded_keys_iter == expanded_keys.end() && no_globs_in_key)
-            is_finished = true;
+            is_finished_for_key = true;
     }
 
     KeyWithInfoPtr next(size_t)
@@ -241,6 +241,7 @@ private:
 
     bool fillBufferForKey(const std::string & uri_key)
     {
+        is_finished_for_key = false;
         const String key_prefix = uri_key.substr(0, uri_key.find_first_of("*?{"));
 
         /// We don't have to list bucket, because there is no asterisks.
@@ -249,10 +250,12 @@ private:
             buffer.clear();
             buffer.emplace_back(std::make_shared<KeyWithInfo>(uri_key, std::nullopt));
             buffer_iter = buffer.begin();
+            if (read_keys)
+                read_keys->insert(read_keys->end(), buffer.begin(), buffer.end());
             return true;
         }
 
-        request = S3::ListObjectsV2Request{};
+        request = {};
         request.SetBucket(globbed_uri.bucket);
         request.SetPrefix(key_prefix);
         request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
@@ -308,11 +311,11 @@ private:
                 bool no_globs_in_key = fillBufferForKey(*expanded_keys_iter);
                 expanded_keys_iter++;
                 if (expanded_keys_iter == expanded_keys.end() && no_globs_in_key)
-                    is_finished = true;
+                    is_finished_for_key = true;
                 continue;
             }
 
-            if (is_finished)
+            if (is_finished_for_key)
                 return {};
 
             try
@@ -327,7 +330,7 @@ private:
                 /// it may take some time for threads to stop processors and they
                 /// may still use this iterator after exception is thrown.
                 /// To avoid this UB, reset the buffer and return defaults for further calls.
-                is_finished = true;
+                is_finished_for_key = true;
                 buffer.clear();
                 buffer_iter = buffer.begin();
                 throw;
@@ -351,9 +354,9 @@ private:
         const auto & result_batch = outcome.GetResult().GetContents();
 
         /// It returns false when all objects were returned
-        is_finished = !outcome.GetResult().GetIsTruncated();
+        is_finished_for_key = !outcome.GetResult().GetIsTruncated();
 
-        if (!is_finished)
+        if (!is_finished_for_key)
         {
             /// Even if task is finished the thread may be not freed in pool.
             /// So wait until it will be freed before scheduling a new task.
@@ -444,7 +447,7 @@ private:
     ActionsDAGPtr filter_dag;
     std::unique_ptr<re2::RE2> matcher;
     bool recursive{false};
-    bool is_finished{false};
+    bool is_finished_for_key{false};
     KeysWithInfo * read_keys;
 
     S3::ListObjectsV2Request request;

From ce3969e25d7ef8bd661fa6047ac0882735fd567a Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <azvonov@altinity.com>
Date: Thu, 4 Apr 2024 19:47:34 +0000
Subject: [PATCH 07/54] adapt test to new behavior

---
 tests/integration/test_storage_s3/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 6d5b84a8143..9d275575f8a 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -1768,13 +1768,13 @@ def test_schema_inference_cache(started_cluster):
         check_cache(instance, [])
 
         run_describe_query(instance, files, storage_name, started_cluster, bucket)
-        check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4)
+        check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4 if storage_name == "url" else 1)
 
         instance.query("system drop schema cache")
         check_cache(instance, [])
 
         run_describe_query(instance, files, storage_name, started_cluster, bucket)
-        check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4)
+        check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4 if storage_name == "url" else 1)
 
         instance.query("system drop schema cache")
 

From e3858107969d6f6363de343197608bf65693dd59 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Thu, 4 Apr 2024 22:18:41 +0200
Subject: [PATCH 08/54] fix black

---
 tests/integration/test_storage_s3/test.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 9d275575f8a..a4ed94c815b 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -1768,13 +1768,27 @@ def test_schema_inference_cache(started_cluster):
         check_cache(instance, [])
 
         run_describe_query(instance, files, storage_name, started_cluster, bucket)
-        check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4 if storage_name == "url" else 1)
+        check_cache_misses(
+            instance,
+            files,
+            storage_name,
+            started_cluster,
+            bucket,
+            4 if storage_name == "url" else 1,
+        )
 
         instance.query("system drop schema cache")
         check_cache(instance, [])
 
         run_describe_query(instance, files, storage_name, started_cluster, bucket)
-        check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4 if storage_name == "url" else 1)
+        check_cache_misses(
+            instance,
+            files,
+            storage_name,
+            started_cluster,
+            bucket,
+            4 if storage_name == "url" else 1,
+        )
 
         instance.query("system drop schema cache")
 

From 3c58e5873b24e0aec20a4c5b97c3ab6bb849c47e Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <azvonov@altinity.com>
Date: Tue, 9 Apr 2024 19:06:14 +0000
Subject: [PATCH 09/54] fix reading of {} with more than 1000 objects under
 each

---
 src/Storages/StorageS3.cpp | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 85d9b45291c..ffe3213a4bc 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -214,10 +214,8 @@ public:
         expanded_keys = expandSelectionGlob(globbed_uri.key);
         expanded_keys_iter = expanded_keys.begin();
 
-        bool no_globs_in_key = fillBufferForKey(*expanded_keys_iter);
+        fillBufferForKey(*expanded_keys_iter);
         expanded_keys_iter++;
-        if (expanded_keys_iter == expanded_keys.end() && no_globs_in_key)
-            is_finished_for_key = true;
     }
 
     KeyWithInfoPtr next(size_t)
@@ -252,6 +250,7 @@ private:
             buffer_iter = buffer.begin();
             if (read_keys)
                 read_keys->insert(read_keys->end(), buffer.begin(), buffer.end());
+            is_finished_for_key = true;
             return true;
         }
 
@@ -306,17 +305,17 @@ private:
                 return answer;
             }
 
-            if (expanded_keys_iter != expanded_keys.end())
-            {
-                bool no_globs_in_key = fillBufferForKey(*expanded_keys_iter);
-                expanded_keys_iter++;
-                if (expanded_keys_iter == expanded_keys.end() && no_globs_in_key)
-                    is_finished_for_key = true;
-                continue;
-            }
-
             if (is_finished_for_key)
-                return {};
+            {
+                if (expanded_keys_iter != expanded_keys.end())
+                {
+                    fillBufferForKey(*expanded_keys_iter);
+                    expanded_keys_iter++;
+                    continue;
+                }
+                else
+                    return {};
+            }
 
             try
             {

From 093b71b8585161e91f571d9991e2d351effd10fa Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <azvonov@altinity.com>
Date: Tue, 9 Apr 2024 21:01:01 +0000
Subject: [PATCH 10/54] added test for selection globs with many files under

---
 tests/integration/test_storage_s3/test.py | 38 +++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index a4ed94c815b..60b0e8792d7 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -678,6 +678,44 @@ def test_s3_glob_scheherazade(started_cluster):
     assert run_query(instance, query).splitlines() == ["1001\t1001\t1001\t1001"]
 
 
+# a bit modified version of scheherazade test
+# checks e.g. `prefix{1,2}/file*.csv`, where there are more than 1000 files under each of prefix1, prefix2.
+def test_s3_glob_many_objects_under_selection(started_cluster):
+    bucket = started_cluster.minio_bucket
+    instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
+    table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
+    values = "(1, 1, 1)"
+    jobs = []
+    for file_num in range(1100):
+
+        def create_files(file_num):
+            for folder_num in range(1, 3):
+                path = f"folder{folder_num}/file{file_num}.csv"
+                query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format(
+                    started_cluster.minio_ip,
+                    MINIO_INTERNAL_PORT,
+                    bucket,
+                    path,
+                    table_format,
+                    values,
+                )
+                run_query(instance, query)
+
+        jobs.append(threading.Thread(target=create_files, args=(file_num,)))
+        jobs[-1].start()
+
+    for job in jobs:
+        job.join()
+
+    query = "select count(), sum(column1), sum(column2), sum(column3) from s3('http://{}:{}/{}/folder{{1,2}}/file*.csv', 'CSV', '{}')".format(
+        started_cluster.minio_redirect_host,
+        started_cluster.minio_redirect_port,
+        bucket,
+        table_format,
+    )
+    assert run_query(instance, query).splitlines() == ["2200\t2200\t2200\t2200"]
+
+
 def run_s3_mocks(started_cluster):
     script_dir = os.path.join(os.path.dirname(__file__), "s3_mocks")
     start_mock_servers(

From c9b05eac022254c71323d2715a0dc1a32ae9c2f7 Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <azvonov@altinity.com>
Date: Wed, 10 Apr 2024 12:02:01 +0000
Subject: [PATCH 11/54] fix test_s3_glob_many_objects_under_selection

---
 tests/integration/test_storage_s3/test.py | 26 ++++++++++++++++-------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 60b0e8792d7..28b70911b77 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -678,19 +678,19 @@ def test_s3_glob_scheherazade(started_cluster):
     assert run_query(instance, query).splitlines() == ["1001\t1001\t1001\t1001"]
 
 
-# a bit modified version of scheherazade test
-# checks e.g. `prefix{1,2}/file*.csv`, where there are more than 1000 files under each of prefix1, prefix2.
+# a bit simplified version of scheherazade test
+# checks e.g. `prefix{1,2}/file*.csv`, where there are more than 1000 files under prefix1.
 def test_s3_glob_many_objects_under_selection(started_cluster):
     bucket = started_cluster.minio_bucket
     instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
     table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
     values = "(1, 1, 1)"
     jobs = []
-    for file_num in range(1100):
+    for thread_num in range(16):
 
-        def create_files(file_num):
-            for folder_num in range(1, 3):
-                path = f"folder{folder_num}/file{file_num}.csv"
+        def create_files(thread_num):
+            for f_num in range(thread_num * 63, thread_num * 63 + 63):
+                path = f"folder1/file{f_num}.csv"
                 query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format(
                     started_cluster.minio_ip,
                     MINIO_INTERNAL_PORT,
@@ -701,9 +701,19 @@ def test_s3_glob_many_objects_under_selection(started_cluster):
                 )
                 run_query(instance, query)
 
-        jobs.append(threading.Thread(target=create_files, args=(file_num,)))
+        jobs.append(threading.Thread(target=create_files, args=(thread_num,)))
         jobs[-1].start()
 
+    query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format(
+        started_cluster.minio_ip,
+        MINIO_INTERNAL_PORT,
+        bucket,
+        f"folder2/file0.csv",
+        table_format,
+        values,
+    )
+    run_query(instance, query)
+
     for job in jobs:
         job.join()
 
@@ -713,7 +723,7 @@ def test_s3_glob_many_objects_under_selection(started_cluster):
         bucket,
         table_format,
     )
-    assert run_query(instance, query).splitlines() == ["2200\t2200\t2200\t2200"]
+    assert run_query(instance, query).splitlines() == ["1009\t1009\t1009\t1009"]
 
 
 def run_s3_mocks(started_cluster):

From 0669591e35e0b6f19c148ff941c2497a0e38435c Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <azvonov@altinity.com>
Date: Wed, 10 Apr 2024 17:33:48 +0000
Subject: [PATCH 12/54] small code cleanup

---
 src/Storages/StorageS3.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index ffe3213a4bc..acef213c1f4 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -237,7 +237,7 @@ public:
 private:
     using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome;
 
-    bool fillBufferForKey(const std::string & uri_key)
+    void fillBufferForKey(const std::string & uri_key)
     {
         is_finished_for_key = false;
         const String key_prefix = uri_key.substr(0, uri_key.find_first_of("*?{"));
@@ -251,7 +251,7 @@ private:
             if (read_keys)
                 read_keys->insert(read_keys->end(), buffer.begin(), buffer.end());
             is_finished_for_key = true;
-            return true;
+            return;
         }
 
         request = {};
@@ -270,7 +270,7 @@ private:
 
         filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
         fillInternalBufferAssumeLocked();
-        return false;
+        return;
     }
 
     KeyWithInfoPtr nextAssumeLocked()

From c9a31599c08f7281acff09b86aa68d7da345efdc Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <azvonov@altinity.com>
Date: Wed, 24 Apr 2024 22:14:47 +0000
Subject: [PATCH 13/54] fix single-threading failsafe when number of files
 cannot be estimated

---
 src/Storages/StorageS3.cpp | 25 +++++++++++++++++++++++--
 src/Storages/StorageS3.h   |  1 +
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index acef213c1f4..daab457e46b 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -229,6 +229,14 @@ public:
         return buffer.size();
     }
 
+    bool hasMore()
+    {
+        if (!buffer.size())
+            return !(expanded_keys_iter == expanded_keys.end() && is_finished_for_key);
+        else
+            return true;
+    }
+
     ~Impl()
     {
         list_objects_pool.wait();
@@ -481,6 +489,11 @@ size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount()
     return pimpl->objectsCount();
 }
 
+bool StorageS3Source::DisclosedGlobIterator::hasMore()
+{
+    return pimpl->hasMore();
+}
+
 class StorageS3Source::KeysIterator::Impl
 {
 public:
@@ -1243,8 +1256,16 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
     if (estimated_keys_count > 1)
         num_streams = std::min(num_streams, estimated_keys_count);
     else
-        /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case.
-        num_streams = 1;
+    {
+        const auto glob_iter = std::dynamic_pointer_cast<StorageS3Source::DisclosedGlobIterator>(iterator_wrapper);
+        if (!(glob_iter && glob_iter->hasMore()))
+        {
+            /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case.
+            num_streams = 1;
+        }
+        /// Otherwise, 1000 files were already listed, but none of them is actually what we are looking for.
+        /// We cannot estimate _how many_ there are left, but if there are more files to list, it's faster to do it in many streams.
+    }
 
     const size_t max_threads = context->getSettingsRef().max_threads;
     const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul));
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 19cbfaa6f08..8d21f1d8e8e 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -83,6 +83,7 @@ public:
 
         KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT
         size_t estimatedKeysCount() override;
+        bool hasMore();
 
     private:
         class Impl;

From 686ea6af9c3512c7b07345cabc785bb975311162 Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <azvonov@altinity.com>
Date: Thu, 25 Apr 2024 09:06:49 +0000
Subject: [PATCH 14/54] fix style and logic of estimation

---
 src/Storages/StorageS3.cpp | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 6ba41d21766..bdfd2b8b453 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -231,7 +231,7 @@ public:
 
     bool hasMore()
     {
-        if (!buffer.size())
+        if (buffer.empty())
             return !(expanded_keys_iter == expanded_keys.end() && is_finished_for_key);
         else
             return true;
@@ -1286,19 +1286,21 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
     createIterator(nullptr);
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
-    if (estimated_keys_count > 1)
-        num_streams = std::min(num_streams, estimated_keys_count);
-    else
+    const auto glob_iter = std::dynamic_pointer_cast<StorageS3Source::DisclosedGlobIterator>(iterator_wrapper);
+
+    if (!(glob_iter && glob_iter->hasMore()))
     {
-        const auto glob_iter = std::dynamic_pointer_cast<StorageS3Source::DisclosedGlobIterator>(iterator_wrapper);
-        if (!(glob_iter && glob_iter->hasMore()))
+        if (estimated_keys_count > 1)
+            num_streams = std::min(num_streams, estimated_keys_count);
+        else
         {
-            /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case.
+            /// The amount of keys (zero) was probably underestimated. We will keep one stream for this particular case.
             num_streams = 1;
         }
-        /// Otherwise, 1000 files were already listed, but none of them is actually what we are looking for.
-        /// We cannot estimate _how many_ there are left, but if there are more files to list, it's faster to do it in many streams.
     }
+     /// OTHERWISE, 1000 files were listed, but we cannot make any estimation of _how many_ there are (because we list bucket lazily);
+     /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do
+     /// as it would lead to serious (up to <max_threads> times) reading performance degradation.
 
     const size_t max_threads = context->getSettingsRef().max_threads;
     const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul));

From b13c7d004c6f533a1931eb8ac5c529ca82914cd9 Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <azvonov@altinity.com>
Date: Thu, 25 Apr 2024 14:51:44 +0000
Subject: [PATCH 15/54] fix tidy

---
 src/Storages/StorageS3.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index bdfd2b8b453..cb5734cfe0c 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -278,7 +278,6 @@ private:
 
         filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
         fillInternalBufferAssumeLocked();
-        return;
     }
 
     KeyWithInfoPtr nextAssumeLocked()

From e09530ab755964b6da12718279ef345bf2800d43 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Fri, 8 Dec 2023 16:51:35 +0100
Subject: [PATCH 16/54] Fix making backup when multiple shards are used.

---
 src/Backups/BackupCoordinationLocal.cpp       | 24 ++++----
 src/Backups/BackupCoordinationLocal.h         | 12 ++--
 src/Backups/BackupCoordinationRemote.cpp      | 48 ++++++++--------
 src/Backups/BackupCoordinationRemote.h        | 12 ++--
 .../BackupCoordinationReplicatedTables.cpp    | 24 ++++----
 .../BackupCoordinationReplicatedTables.h      | 14 ++---
 src/Backups/BackupEntriesCollector.cpp        | 10 ++--
 src/Backups/BackupEntriesCollector.h          |  2 +-
 src/Backups/BackupUtils.cpp                   |  2 +-
 src/Backups/DDLAdjustingForBackupVisitor.cpp  | 10 +---
 src/Backups/DDLAdjustingForBackupVisitor.h    |  5 +-
 src/Backups/IBackupCoordination.h             | 12 ++--
 src/Storages/StorageReplicatedMergeTree.cpp   | 56 +++++--------------
 src/Storages/StorageReplicatedMergeTree.h     |  7 +--
 14 files changed, 100 insertions(+), 138 deletions(-)

diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp
index 9964de2ad6e..efdc18cc29c 100644
--- a/src/Backups/BackupCoordinationLocal.cpp
+++ b/src/Backups/BackupCoordinationLocal.cpp
@@ -33,42 +33,42 @@ Strings BackupCoordinationLocal::waitForStage(const String &, std::chrono::milli
     return {};
 }
 
-void BackupCoordinationLocal::addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
+void BackupCoordinationLocal::addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, const std::vector<PartNameAndChecksum> & part_names_and_checksums)
 {
     std::lock_guard lock{replicated_tables_mutex};
-    replicated_tables.addPartNames({table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums});
+    replicated_tables.addPartNames({table_zk_path, table_name_for_logs, replica_name, part_names_and_checksums});
 }
 
-Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
+Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const
 {
     std::lock_guard lock{replicated_tables_mutex};
-    return replicated_tables.getPartNames(table_shared_id, replica_name);
+    return replicated_tables.getPartNames(table_zk_path, replica_name);
 }
 
 
-void BackupCoordinationLocal::addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations)
+void BackupCoordinationLocal::addReplicatedMutations(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations)
 {
     std::lock_guard lock{replicated_tables_mutex};
-    replicated_tables.addMutations({table_shared_id, table_name_for_logs, replica_name, mutations});
+    replicated_tables.addMutations({table_zk_path, table_name_for_logs, replica_name, mutations});
 }
 
-std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
+std::vector<IBackupCoordination::MutationInfo> BackupCoordinationLocal::getReplicatedMutations(const String & table_zk_path, const String & replica_name) const
 {
     std::lock_guard lock{replicated_tables_mutex};
-    return replicated_tables.getMutations(table_shared_id, replica_name);
+    return replicated_tables.getMutations(table_zk_path, replica_name);
 }
 
 
-void BackupCoordinationLocal::addReplicatedDataPath(const String & table_shared_id, const String & data_path)
+void BackupCoordinationLocal::addReplicatedDataPath(const String & table_zk_path, const String & data_path)
 {
     std::lock_guard lock{replicated_tables_mutex};
-    replicated_tables.addDataPath({table_shared_id, data_path});
+    replicated_tables.addDataPath({table_zk_path, data_path});
 }
 
-Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_shared_id) const
+Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_zk_path) const
 {
     std::lock_guard lock{replicated_tables_mutex};
-    return replicated_tables.getDataPaths(table_shared_id);
+    return replicated_tables.getDataPaths(table_zk_path);
 }
 
 
diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h
index e0aa5dc67a4..a7f15c79649 100644
--- a/src/Backups/BackupCoordinationLocal.h
+++ b/src/Backups/BackupCoordinationLocal.h
@@ -29,16 +29,16 @@ public:
     Strings waitForStage(const String & stage_to_wait) override;
     Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
 
-    void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
+    void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name,
                                 const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
-    Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const override;
+    Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const override;
 
-    void addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
+    void addReplicatedMutations(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name,
                                 const std::vector<MutationInfo> & mutations) override;
-    std::vector<MutationInfo> getReplicatedMutations(const String & table_shared_id, const String & replica_name) const override;
+    std::vector<MutationInfo> getReplicatedMutations(const String & table_zk_path, const String & replica_name) const override;
 
-    void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override;
-    Strings getReplicatedDataPaths(const String & table_shared_id) const override;
+    void addReplicatedDataPath(const String & table_zk_path, const String & data_path) override;
+    Strings getReplicatedDataPaths(const String & table_zk_path) const override;
 
     void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) override;
     Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const override;
diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 455f45a7a77..f353062f628 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -358,7 +358,7 @@ String BackupCoordinationRemote::deserializeFromMultipleZooKeeperNodes(const Str
 
 
 void BackupCoordinationRemote::addReplicatedPartNames(
-    const String & table_shared_id,
+    const String & table_zk_path,
     const String & table_name_for_logs,
     const String & replica_name,
     const std::vector<PartNameAndChecksum> & part_names_and_checksums)
@@ -374,22 +374,22 @@ void BackupCoordinationRemote::addReplicatedPartNames(
     [&, &zk = holder.faulty_zookeeper]()
     {
         with_retries.renewZooKeeper(zk);
-        String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_shared_id);
+        String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_zk_path);
         zk->createIfNotExists(path, "");
         path += "/" + escapeForFileName(replica_name);
         zk->createIfNotExists(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs));
     });
 }
 
-Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const
+Strings BackupCoordinationRemote::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const
 {
     std::lock_guard lock{replicated_tables_mutex};
     prepareReplicatedTables();
-    return replicated_tables->getPartNames(table_shared_id, replica_name);
+    return replicated_tables->getPartNames(table_zk_path, replica_name);
 }
 
 void BackupCoordinationRemote::addReplicatedMutations(
-    const String & table_shared_id,
+    const String & table_zk_path,
     const String & table_name_for_logs,
     const String & replica_name,
     const std::vector<MutationInfo> & mutations)
@@ -405,23 +405,23 @@ void BackupCoordinationRemote::addReplicatedMutations(
         [&, &zk = holder.faulty_zookeeper]()
         {
             with_retries.renewZooKeeper(zk);
-            String path = zookeeper_path + "/repl_mutations/" + escapeForFileName(table_shared_id);
+            String path = zookeeper_path + "/repl_mutations/" + escapeForFileName(table_zk_path);
             zk->createIfNotExists(path, "");
             path += "/" + escapeForFileName(replica_name);
             zk->createIfNotExists(path, ReplicatedMutations::serialize(mutations, table_name_for_logs));
         });
 }
 
-std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_shared_id, const String & replica_name) const
+std::vector<IBackupCoordination::MutationInfo> BackupCoordinationRemote::getReplicatedMutations(const String & table_zk_path, const String & replica_name) const
 {
     std::lock_guard lock{replicated_tables_mutex};
     prepareReplicatedTables();
-    return replicated_tables->getMutations(table_shared_id, replica_name);
+    return replicated_tables->getMutations(table_zk_path, replica_name);
 }
 
 
 void BackupCoordinationRemote::addReplicatedDataPath(
-    const String & table_shared_id, const String & data_path)
+    const String & table_zk_path, const String & data_path)
 {
     {
         std::lock_guard lock{replicated_tables_mutex};
@@ -434,18 +434,18 @@ void BackupCoordinationRemote::addReplicatedDataPath(
     [&, &zk = holder.faulty_zookeeper]()
     {
         with_retries.renewZooKeeper(zk);
-        String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id);
+        String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_zk_path);
         zk->createIfNotExists(path, "");
         path += "/" + escapeForFileName(data_path);
         zk->createIfNotExists(path, "");
     });
 }
 
-Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_shared_id) const
+Strings BackupCoordinationRemote::getReplicatedDataPaths(const String & table_zk_path) const
 {
     std::lock_guard lock{replicated_tables_mutex};
     prepareReplicatedTables();
-    return replicated_tables->getDataPaths(table_shared_id);
+    return replicated_tables->getDataPaths(table_zk_path);
 }
 
 
@@ -464,16 +464,16 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
             with_retries.renewZooKeeper(zk);
 
             String path = zookeeper_path + "/repl_part_names";
-            for (const String & escaped_table_shared_id : zk->getChildren(path))
+            for (const String & escaped_table_zk_path : zk->getChildren(path))
             {
-                String table_shared_id = unescapeForFileName(escaped_table_shared_id);
-                String path2 = path + "/" + escaped_table_shared_id;
+                String table_zk_path = unescapeForFileName(escaped_table_zk_path);
+                String path2 = path + "/" + escaped_table_zk_path;
                 for (const String & escaped_replica_name : zk->getChildren(path2))
                 {
                     String replica_name = unescapeForFileName(escaped_replica_name);
                     auto part_names = ReplicatedPartNames::deserialize(zk->get(path2 + "/" + escaped_replica_name));
                     part_names_for_replicated_tables.push_back(
-                        {table_shared_id, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums});
+                        {table_zk_path, part_names.table_name_for_logs, replica_name, part_names.part_names_and_checksums});
                 }
             }
         });
@@ -489,16 +489,16 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
             with_retries.renewZooKeeper(zk);
 
             String path = zookeeper_path + "/repl_mutations";
-            for (const String & escaped_table_shared_id : zk->getChildren(path))
+            for (const String & escaped_table_zk_path : zk->getChildren(path))
             {
-                String table_shared_id = unescapeForFileName(escaped_table_shared_id);
-                String path2 = path + "/" + escaped_table_shared_id;
+                String table_zk_path = unescapeForFileName(escaped_table_zk_path);
+                String path2 = path + "/" + escaped_table_zk_path;
                 for (const String & escaped_replica_name : zk->getChildren(path2))
                 {
                     String replica_name = unescapeForFileName(escaped_replica_name);
                     auto mutations = ReplicatedMutations::deserialize(zk->get(path2 + "/" + escaped_replica_name));
                     mutations_for_replicated_tables.push_back(
-                        {table_shared_id, mutations.table_name_for_logs, replica_name, mutations.mutations});
+                        {table_zk_path, mutations.table_name_for_logs, replica_name, mutations.mutations});
                 }
             }
         });
@@ -514,14 +514,14 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
             with_retries.renewZooKeeper(zk);
 
             String path = zookeeper_path + "/repl_data_paths";
-            for (const String & escaped_table_shared_id : zk->getChildren(path))
+            for (const String & escaped_table_zk_path : zk->getChildren(path))
             {
-                String table_shared_id = unescapeForFileName(escaped_table_shared_id);
-                String path2 = path + "/" + escaped_table_shared_id;
+                String table_zk_path = unescapeForFileName(escaped_table_zk_path);
+                String path2 = path + "/" + escaped_table_zk_path;
                 for (const String & escaped_data_path : zk->getChildren(path2))
                 {
                     String data_path = unescapeForFileName(escaped_data_path);
-                    data_paths_for_replicated_tables.push_back({table_shared_id, data_path});
+                    data_paths_for_replicated_tables.push_back({table_zk_path, data_path});
                 }
             }
         });
diff --git a/src/Backups/BackupCoordinationRemote.h b/src/Backups/BackupCoordinationRemote.h
index ce891699bd2..7a56b1a4eb8 100644
--- a/src/Backups/BackupCoordinationRemote.h
+++ b/src/Backups/BackupCoordinationRemote.h
@@ -41,23 +41,23 @@ public:
     Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;
 
     void addReplicatedPartNames(
-        const String & table_shared_id,
+        const String & table_zk_path,
         const String & table_name_for_logs,
         const String & replica_name,
         const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
 
-    Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const override;
+    Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const override;
 
     void addReplicatedMutations(
-        const String & table_shared_id,
+        const String & table_zk_path,
         const String & table_name_for_logs,
         const String & replica_name,
         const std::vector<MutationInfo> & mutations) override;
 
-    std::vector<MutationInfo> getReplicatedMutations(const String & table_shared_id, const String & replica_name) const override;
+    std::vector<MutationInfo> getReplicatedMutations(const String & table_zk_path, const String & replica_name) const override;
 
-    void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override;
-    Strings getReplicatedDataPaths(const String & table_shared_id) const override;
+    void addReplicatedDataPath(const String & table_zk_path, const String & data_path) override;
+    Strings getReplicatedDataPaths(const String & table_zk_path) const override;
 
     void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) override;
     Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const override;
diff --git a/src/Backups/BackupCoordinationReplicatedTables.cpp b/src/Backups/BackupCoordinationReplicatedTables.cpp
index 1cbb88acb82..a435667f79a 100644
--- a/src/Backups/BackupCoordinationReplicatedTables.cpp
+++ b/src/Backups/BackupCoordinationReplicatedTables.cpp
@@ -151,7 +151,7 @@ BackupCoordinationReplicatedTables::~BackupCoordinationReplicatedTables() = defa
 
 void BackupCoordinationReplicatedTables::addPartNames(PartNamesForTableReplica && part_names)
 {
-    const auto & table_shared_id = part_names.table_shared_id;
+    const auto & table_zk_path = part_names.table_zk_path;
     const auto & table_name_for_logs = part_names.table_name_for_logs;
     const auto & replica_name = part_names.replica_name;
     const auto & part_names_and_checksums = part_names.part_names_and_checksums;
@@ -159,7 +159,7 @@ void BackupCoordinationReplicatedTables::addPartNames(PartNamesForTableReplica &
     if (prepared)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after preparing");
 
-    auto & table_info = table_infos[table_shared_id];
+    auto & table_info = table_infos[table_zk_path];
     table_info.table_name_for_logs = table_name_for_logs;
 
     if (!table_info.covered_parts_finder)
@@ -200,11 +200,11 @@ void BackupCoordinationReplicatedTables::addPartNames(PartNamesForTableReplica &
     }
 }
 
-Strings BackupCoordinationReplicatedTables::getPartNames(const String & table_shared_id, const String & replica_name) const
+Strings BackupCoordinationReplicatedTables::getPartNames(const String & table_zk_path, const String & replica_name) const
 {
     prepare();
 
-    auto it = table_infos.find(table_shared_id);
+    auto it = table_infos.find(table_zk_path);
     if (it == table_infos.end())
         return {};
 
@@ -218,7 +218,7 @@ Strings BackupCoordinationReplicatedTables::getPartNames(const String & table_sh
 
 void BackupCoordinationReplicatedTables::addMutations(MutationsForTableReplica && mutations_for_table_replica)
 {
-    const auto & table_shared_id = mutations_for_table_replica.table_shared_id;
+    const auto & table_zk_path = mutations_for_table_replica.table_zk_path;
     const auto & table_name_for_logs = mutations_for_table_replica.table_name_for_logs;
     const auto & replica_name = mutations_for_table_replica.replica_name;
     const auto & mutations = mutations_for_table_replica.mutations;
@@ -226,7 +226,7 @@ void BackupCoordinationReplicatedTables::addMutations(MutationsForTableReplica &
     if (prepared)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "addMutations() must not be called after preparing");
 
-    auto & table_info = table_infos[table_shared_id];
+    auto & table_info = table_infos[table_zk_path];
     table_info.table_name_for_logs = table_name_for_logs;
     for (const auto & [mutation_id, mutation_entry] : mutations)
         table_info.mutations.emplace(mutation_id, mutation_entry);
@@ -236,11 +236,11 @@ void BackupCoordinationReplicatedTables::addMutations(MutationsForTableReplica &
 }
 
 std::vector<MutationInfo>
-BackupCoordinationReplicatedTables::getMutations(const String & table_shared_id, const String & replica_name) const
+BackupCoordinationReplicatedTables::getMutations(const String & table_zk_path, const String & replica_name) const
 {
     prepare();
 
-    auto it = table_infos.find(table_shared_id);
+    auto it = table_infos.find(table_zk_path);
     if (it == table_infos.end())
         return {};
 
@@ -257,16 +257,16 @@ BackupCoordinationReplicatedTables::getMutations(const String & table_shared_id,
 
 void BackupCoordinationReplicatedTables::addDataPath(DataPathForTableReplica && data_path_for_table_replica)
 {
-    const auto & table_shared_id = data_path_for_table_replica.table_shared_id;
+    const auto & table_zk_path = data_path_for_table_replica.table_zk_path;
     const auto & data_path = data_path_for_table_replica.data_path;
 
-    auto & table_info = table_infos[table_shared_id];
+    auto & table_info = table_infos[table_zk_path];
     table_info.data_paths.emplace(data_path);
 }
 
-Strings BackupCoordinationReplicatedTables::getDataPaths(const String & table_shared_id) const
+Strings BackupCoordinationReplicatedTables::getDataPaths(const String & table_zk_path) const
 {
-    auto it = table_infos.find(table_shared_id);
+    auto it = table_infos.find(table_zk_path);
     if (it == table_infos.end())
         return {};
 
diff --git a/src/Backups/BackupCoordinationReplicatedTables.h b/src/Backups/BackupCoordinationReplicatedTables.h
index 74f21eb9c7c..50ab56aef75 100644
--- a/src/Backups/BackupCoordinationReplicatedTables.h
+++ b/src/Backups/BackupCoordinationReplicatedTables.h
@@ -40,7 +40,7 @@ public:
 
     struct PartNamesForTableReplica
     {
-        String table_shared_id;
+        String table_zk_path;
         String table_name_for_logs;
         String replica_name;
         std::vector<PartNameAndChecksum> part_names_and_checksums;
@@ -55,13 +55,13 @@ public:
     /// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
     /// This is the same list as it was added by call of the function addPartNames() but without duplications and without
     /// parts covered by another parts.
-    Strings getPartNames(const String & table_shared_id, const String & replica_name) const;
+    Strings getPartNames(const String & table_zk_path, const String & replica_name) const;
 
     using MutationInfo = IBackupCoordination::MutationInfo;
 
     struct MutationsForTableReplica
     {
-        String table_shared_id;
+        String table_zk_path;
         String table_name_for_logs;
         String replica_name;
         std::vector<MutationInfo> mutations;
@@ -71,11 +71,11 @@ public:
     void addMutations(MutationsForTableReplica && mutations_for_table_replica);
 
     /// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames().
-    std::vector<MutationInfo> getMutations(const String & table_shared_id, const String & replica_name) const;
+    std::vector<MutationInfo> getMutations(const String & table_zk_path, const String & replica_name) const;
 
     struct DataPathForTableReplica
     {
-        String table_shared_id;
+        String table_zk_path;
         String data_path;
     };
 
@@ -85,7 +85,7 @@ public:
     void addDataPath(DataPathForTableReplica && data_path_for_table_replica);
 
     /// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()).
-    Strings getDataPaths(const String & table_shared_id) const;
+    Strings getDataPaths(const String & table_zk_path) const;
 
 private:
     void prepare() const;
@@ -110,7 +110,7 @@ private:
         std::unordered_set<String> data_paths;
     };
 
-    std::map<String /* table_shared_id */, TableInfo> table_infos; /// Should be ordered because we need this map to be in the same order on every replica.
+    std::map<String /* table_zk_path */, TableInfo> table_infos; /// Should be ordered because we need this map to be in the same order on every replica.
     mutable bool prepared = false;
 };
 
diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp
index cc014c279cc..136e3c49321 100644
--- a/src/Backups/BackupEntriesCollector.cpp
+++ b/src/Backups/BackupEntriesCollector.cpp
@@ -11,6 +11,7 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/formatAST.h>
 #include <Storages/IStorage.h>
+#include <Storages/MergeTree/extractZkPathFromCreateQuery.h>
 #include <base/chrono_io.h>
 #include <base/insertAtEnd.h>
 #include <base/scope_guard.h>
@@ -758,7 +759,7 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs()
         checkIsQueryCancelled();
 
         ASTPtr new_create_query = database_info.create_database_query;
-        adjustCreateQueryForBackup(new_create_query, context->getGlobalContext(), nullptr);
+        adjustCreateQueryForBackup(new_create_query, context->getGlobalContext());
         renameDatabaseAndTableNameInCreateQuery(new_create_query, renaming_map, context->getGlobalContext());
 
         const String & metadata_path_in_backup = database_info.metadata_path_in_backup;
@@ -775,7 +776,8 @@ void BackupEntriesCollector::makeBackupEntriesForTablesDefs()
         checkIsQueryCancelled();
 
         ASTPtr new_create_query = table_info.create_table_query;
-        adjustCreateQueryForBackup(new_create_query, context->getGlobalContext(), &table_info.replicated_table_shared_id);
+        table_info.replicated_table_zk_path = tryExtractZkPathFromCreateQuery(*new_create_query, context->getGlobalContext());
+        adjustCreateQueryForBackup(new_create_query, context->getGlobalContext());
         renameDatabaseAndTableNameInCreateQuery(new_create_query, renaming_map, context->getGlobalContext());
 
         const String & metadata_path_in_backup = table_info.metadata_path_in_backup;
@@ -814,8 +816,8 @@ void BackupEntriesCollector::makeBackupEntriesForTableData(const QualifiedTableN
         /// If this table is replicated in this case we call IBackupCoordination::addReplicatedDataPath() which will cause
         /// other replicas to fill the storage's data in the backup.
         /// If this table is not replicated we'll do nothing leaving the storage's data empty in the backup.
-        if (table_info.replicated_table_shared_id)
-            backup_coordination->addReplicatedDataPath(*table_info.replicated_table_shared_id, data_path_in_backup);
+        if (table_info.replicated_table_zk_path)
+            backup_coordination->addReplicatedDataPath(*table_info.replicated_table_zk_path, data_path_in_backup);
         return;
     }
 
diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h
index 01e8d594334..c7bce077a2d 100644
--- a/src/Backups/BackupEntriesCollector.h
+++ b/src/Backups/BackupEntriesCollector.h
@@ -164,7 +164,7 @@ private:
         ASTPtr create_table_query;
         String metadata_path_in_backup;
         std::filesystem::path data_path_in_backup;
-        std::optional<String> replicated_table_shared_id;
+        std::optional<String> replicated_table_zk_path;
         std::optional<ASTs> partitions;
     };
 
diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp
index fb448fb64ad..fa8ed5855dd 100644
--- a/src/Backups/BackupUtils.cpp
+++ b/src/Backups/BackupUtils.cpp
@@ -103,7 +103,7 @@ bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAS
     auto adjust_before_comparison = [&](const IAST & query) -> ASTPtr
     {
         auto new_query = query.clone();
-        adjustCreateQueryForBackup(new_query, global_context, nullptr);
+        adjustCreateQueryForBackup(new_query, global_context);
         ASTCreateQuery & create = typeid_cast<ASTCreateQuery &>(*new_query);
         create.setUUID({});
         create.if_not_exists = false;
diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp
index 5ea91094b75..7e5ce91629b 100644
--- a/src/Backups/DDLAdjustingForBackupVisitor.cpp
+++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp
@@ -27,9 +27,6 @@ namespace
     {
         /// Precondition: engine_name.starts_with("Replicated") && engine_name.ends_with("MergeTree")
 
-        if (data.replicated_table_shared_id)
-            *data.replicated_table_shared_id = StorageReplicatedMergeTree::tryGetTableSharedIDFromCreateQuery(*data.create_query, data.global_context);
-
         /// Before storing the metadata in a backup we have to find a zookeeper path in its definition and turn the table's UUID in there
         /// back into "{uuid}", and also we probably can remove the zookeeper path and replica name if they're default.
         /// So we're kind of reverting what we had done to the table's definition in registerStorageMergeTree.cpp before we created this table.
@@ -98,12 +95,9 @@ void DDLAdjustingForBackupVisitor::visit(ASTPtr ast, const Data & data)
         visitCreateQuery(*create, data);
 }
 
-void adjustCreateQueryForBackup(ASTPtr ast, const ContextPtr & global_context, std::optional<String> * replicated_table_shared_id)
+void adjustCreateQueryForBackup(ASTPtr ast, const ContextPtr & global_context)
 {
-    if (replicated_table_shared_id)
-        *replicated_table_shared_id = {};
-
-    DDLAdjustingForBackupVisitor::Data data{ast, global_context, replicated_table_shared_id};
+    DDLAdjustingForBackupVisitor::Data data{ast, global_context};
     DDLAdjustingForBackupVisitor::Visitor{data}.visit(ast);
 }
 
diff --git a/src/Backups/DDLAdjustingForBackupVisitor.h b/src/Backups/DDLAdjustingForBackupVisitor.h
index 63353dcc000..f0508434e02 100644
--- a/src/Backups/DDLAdjustingForBackupVisitor.h
+++ b/src/Backups/DDLAdjustingForBackupVisitor.h
@@ -12,9 +12,7 @@ class Context;
 using ContextPtr = std::shared_ptr<const Context>;
 
 /// Changes a create query to a form which is appropriate or suitable for saving in a backup.
-/// Also extracts a replicated table's shared ID from the create query if this is a create query for a replicated table.
-/// `replicated_table_shared_id` can be null if you don't need that.
-void adjustCreateQueryForBackup(ASTPtr ast, const ContextPtr & global_context, std::optional<String> * replicated_table_shared_id);
+void adjustCreateQueryForBackup(ASTPtr ast, const ContextPtr & global_context);
 
 /// Visits ASTCreateQuery and changes it to a form which is appropriate or suitable for saving in a backup.
 class DDLAdjustingForBackupVisitor
@@ -24,7 +22,6 @@ public:
     {
         ASTPtr create_query;
         ContextPtr global_context;
-        std::optional<String> * replicated_table_shared_id = nullptr;
     };
 
     using Visitor = InDepthNodeVisitor<DDLAdjustingForBackupVisitor, false>;
diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h
index f80b5dee883..4a9f8a23855 100644
--- a/src/Backups/IBackupCoordination.h
+++ b/src/Backups/IBackupCoordination.h
@@ -36,13 +36,13 @@ public:
     /// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function
     /// getReplicatedPartNames().
     /// Checksums are used only to control that parts under the same names on different replicas are the same.
-    virtual void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
+    virtual void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name,
                                         const std::vector<PartNameAndChecksum> & part_names_and_checksums) = 0;
 
     /// Returns the names of the parts which a specified replica of a replicated table should put to the backup.
     /// This is the same list as it was added by call of the function addReplicatedPartNames() but without duplications and without
     /// parts covered by another parts.
-    virtual Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const = 0;
+    virtual Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const = 0;
 
     struct MutationInfo
     {
@@ -51,10 +51,10 @@ public:
     };
 
     /// Adds information about mutations of a replicated table.
-    virtual void addReplicatedMutations(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations) = 0;
+    virtual void addReplicatedMutations(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, const std::vector<MutationInfo> & mutations) = 0;
 
     /// Returns all mutations of a replicated table which are not finished for some data parts added by addReplicatedPartNames().
-    virtual std::vector<MutationInfo> getReplicatedMutations(const String & table_shared_id, const String & replica_name) const = 0;
+    virtual std::vector<MutationInfo> getReplicatedMutations(const String & table_zk_path, const String & replica_name) const = 0;
 
     /// Adds information about KeeperMap tables
     virtual void addKeeperMapTable(const String & table_zookeeper_root_path, const String & table_id, const String & data_path_in_backup) = 0;
@@ -65,10 +65,10 @@ public:
     /// Adds a data path in backup for a replicated table.
     /// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function
     /// getReplicatedDataPaths().
-    virtual void addReplicatedDataPath(const String & table_shared_id, const String & data_path) = 0;
+    virtual void addReplicatedDataPath(const String & table_zk_path, const String & data_path) = 0;
 
     /// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()).
-    virtual Strings getReplicatedDataPaths(const String & table_shared_id) const = 0;
+    virtual Strings getReplicatedDataPaths(const String & table_zk_path) const = 0;
 
     /// Adds a path to access.txt file keeping access entities of a ReplicatedAccessStorage.
     virtual void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) = 0;
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index c425035dfba..58d1846915f 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -187,7 +187,6 @@ namespace ErrorCodes
     extern const int NOT_INITIALIZED;
     extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
     extern const int TABLE_IS_DROPPED;
-    extern const int CANNOT_BACKUP_TABLE;
     extern const int SUPPORT_IS_DISABLED;
     extern const int FAULT_INJECTED;
     extern const int CANNOT_FORGET_PARTITION;
@@ -310,8 +309,9 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
                     true,                   /// require_part_metadata
                     mode,
                     [this] (const std::string & name) { enqueuePartForCheck(name); })
-    , zookeeper_name(zkutil::extractZooKeeperName(zookeeper_path_))
-    , zookeeper_path(zkutil::extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ mode <= LoadingStrictnessLevel::CREATE, log.load()))
+    , full_zookeeper_path(zookeeper_path_)
+    , zookeeper_name(zkutil::extractZooKeeperName(full_zookeeper_path))
+    , zookeeper_path(zkutil::extractZooKeeperPath(full_zookeeper_path, /* check_starts_with_slash */ mode <= LoadingStrictnessLevel::CREATE, log.load()))
     , replica_name(replica_name_)
     , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_)
     , reader(*this)
@@ -9242,24 +9242,6 @@ void StorageReplicatedMergeTree::createTableSharedID() const
 }
 
 
-std::optional<String> StorageReplicatedMergeTree::tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context)
-{
-    auto zk_path = tryExtractZkPathFromCreateQuery(create_query, global_context);
-    if (!zk_path)
-        return {};
-
-    String zk_name = zkutil::extractZooKeeperName(*zk_path);
-    zk_path = zkutil::extractZooKeeperPath(*zk_path, false, nullptr);
-    zkutil::ZooKeeperPtr zookeeper = (zk_name == getDefaultZooKeeperName()) ? global_context->getZooKeeper() : global_context->getAuxiliaryZooKeeper(zk_name);
-
-    String id;
-    if (!zookeeper->tryGet(fs::path(*zk_path) / "table_shared_id", id))
-        return {};
-
-    return id;
-}
-
-
 zkutil::EphemeralNodeHolderPtr StorageReplicatedMergeTree::lockSharedDataTemporary(const String & part_name, const String & part_id, const DiskPtr & disk) const
 {
     auto settings = getSettings();
@@ -10419,21 +10401,10 @@ void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_quer
         auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, current_metadata).checkAndFindDiff(metadata_from_entry, current_metadata->getColumns(), getContext());
         auto adjusted_metadata = metadata_diff.getNewMetadata(columns_from_entry, getContext(), *current_metadata);
         applyMetadataChangesToCreateQuery(create_query, adjusted_metadata);
-
-        /// Check that tryGetTableSharedIDFromCreateQuery() works for this storage.
-        auto actual_table_shared_id = getTableSharedID();
-        auto expected_table_shared_id = tryGetTableSharedIDFromCreateQuery(*create_query, getContext());
-        if (actual_table_shared_id != expected_table_shared_id)
-        {
-            throw Exception(ErrorCodes::CANNOT_BACKUP_TABLE, "Table {} has its shared ID different from one from the create query: "
-                            "actual shared id = {}, expected shared id = {}, create query = {}",
-                            getStorageID().getNameForLogs(), actual_table_shared_id, expected_table_shared_id.value_or("nullopt"),
-                            create_query);
-        }
     }
     catch (...)
     {
-        /// We can continue making a backup with non-adjusted name.
+        /// We can continue making a backup with non-adjusted query.
         tryLogCurrentException(log, "Failed to adjust the create query of this table for backup");
     }
 }
@@ -10459,8 +10430,8 @@ void StorageReplicatedMergeTree::backupData(
     auto parts_backup_entries = backupParts(data_parts, /* data_path_in_backup */ "", backup_settings, read_settings, local_context);
 
     auto coordination = backup_entries_collector.getBackupCoordination();
-    String shared_id = getTableSharedID();
-    coordination->addReplicatedDataPath(shared_id, data_path_in_backup);
+
+    coordination->addReplicatedDataPath(full_zookeeper_path, data_path_in_backup);
 
     using PartNameAndChecksum = IBackupCoordination::PartNameAndChecksum;
     std::vector<PartNameAndChecksum> part_names_with_hashes;
@@ -10469,7 +10440,7 @@ void StorageReplicatedMergeTree::backupData(
         part_names_with_hashes.emplace_back(PartNameAndChecksum{part_backup_entries.part_name, part_backup_entries.part_checksum});
 
     /// Send our list of part names to the coordination (to compare with other replicas).
-    coordination->addReplicatedPartNames(shared_id, getStorageID().getFullTableName(), getReplicaName(), part_names_with_hashes);
+    coordination->addReplicatedPartNames(full_zookeeper_path, getStorageID().getFullTableName(), getReplicaName(), part_names_with_hashes);
 
     /// Send a list of mutations to the coordination too (we need to find the mutations which are not finished for added part names).
     {
@@ -10511,25 +10482,25 @@ void StorageReplicatedMergeTree::backupData(
             }
 
             if (!mutation_infos.empty())
-                coordination->addReplicatedMutations(shared_id, getStorageID().getFullTableName(), getReplicaName(), mutation_infos);
+                coordination->addReplicatedMutations(full_zookeeper_path, getStorageID().getFullTableName(), getReplicaName(), mutation_infos);
         }
     }
 
     /// This task will be executed after all replicas have collected their parts and the coordination is ready to
     /// give us the final list of parts to add to the BackupEntriesCollector.
-    auto post_collecting_task = [shared_id,
+    auto post_collecting_task = [my_full_zookeeper_path = full_zookeeper_path,
                                  my_replica_name = getReplicaName(),
                                  coordination,
                                  my_parts_backup_entries = std::move(parts_backup_entries),
                                  &backup_entries_collector]()
     {
-        Strings data_paths = coordination->getReplicatedDataPaths(shared_id);
+        Strings data_paths = coordination->getReplicatedDataPaths(my_full_zookeeper_path);
         std::vector<fs::path> data_paths_fs;
         data_paths_fs.reserve(data_paths.size());
         for (const auto & data_path : data_paths)
             data_paths_fs.push_back(data_path);
 
-        Strings part_names = coordination->getReplicatedPartNames(shared_id, my_replica_name);
+        Strings part_names = coordination->getReplicatedPartNames(my_full_zookeeper_path, my_replica_name);
         std::unordered_set<std::string_view> part_names_set{part_names.begin(), part_names.end()};
 
         for (const auto & part_backup_entries : my_parts_backup_entries)
@@ -10542,7 +10513,7 @@ void StorageReplicatedMergeTree::backupData(
             }
         }
 
-        auto mutation_infos = coordination->getReplicatedMutations(shared_id, my_replica_name);
+        auto mutation_infos = coordination->getReplicatedMutations(my_full_zookeeper_path, my_replica_name);
         for (const auto & mutation_info : mutation_infos)
         {
             auto backup_entry = ReplicatedMergeTreeMutationEntry::parse(mutation_info.entry, mutation_info.id).backup();
@@ -10556,8 +10527,7 @@ void StorageReplicatedMergeTree::backupData(
 
 void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional<ASTs> & partitions)
 {
-    String full_zk_path = getZooKeeperName() + getZooKeeperPath();
-    if (!restorer.getRestoreCoordination()->acquireInsertingDataIntoReplicatedTable(full_zk_path))
+    if (!restorer.getRestoreCoordination()->acquireInsertingDataIntoReplicatedTable(full_zookeeper_path))
     {
         /// Other replica is already restoring the data of this table.
         /// We'll get them later due to replication, it's not necessary to read it from the backup.
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index c472c11e7f8..7f33c82e5c2 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -330,17 +330,14 @@ public:
 
     // Return default or custom zookeeper name for table
     const String & getZooKeeperName() const { return zookeeper_name; }
-
     const String & getZooKeeperPath() const { return zookeeper_path; }
+    const String & getFullZooKeeperPath() const { return full_zookeeper_path; }
 
     // Return table id, common for different replicas
     String getTableSharedID() const override;
 
     std::map<std::string, MutationCommands> getUnfinishedMutationCommands() const override;
 
-    /// Returns the same as getTableSharedID(), but extracts it from a create query.
-    static std::optional<String> tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context);
-
     static const String & getDefaultZooKeeperName() { return default_zookeeper_name; }
 
     /// Check if there are new broken disks and enqueue part recovery tasks.
@@ -420,9 +417,11 @@ private:
 
     bool is_readonly_metric_set = false;
 
+    const String full_zookeeper_path;
     static const String default_zookeeper_name;
     const String zookeeper_name;
     const String zookeeper_path;
+
     const String replica_name;
     const String replica_path;
 

From 6e579312633f2c0abb8784f122bfc75559a5d05a Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 25 Apr 2024 19:24:36 +0200
Subject: [PATCH 17/54] Get rid of code duplication in
 extractZkPathFromCreateQuery().

---
 src/Backups/BackupEntriesCollector.cpp        |   4 +-
 .../extractZkPathFromCreateQuery.cpp          |  61 ---
 .../MergeTree/extractZkPathFromCreateQuery.h  |  19 -
 ...tractZooKeeperPathFromReplicatedTableDef.h |  18 +
 .../MergeTree/registerStorageMergeTree.cpp    | 401 +++++++++++-------
 src/Storages/StorageReplicatedMergeTree.cpp   |   1 -
 6 files changed, 272 insertions(+), 232 deletions(-)
 delete mode 100644 src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp
 delete mode 100644 src/Storages/MergeTree/extractZkPathFromCreateQuery.h
 create mode 100644 src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h

diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp
index 136e3c49321..d91cf47c4d3 100644
--- a/src/Backups/BackupEntriesCollector.cpp
+++ b/src/Backups/BackupEntriesCollector.cpp
@@ -11,7 +11,7 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/formatAST.h>
 #include <Storages/IStorage.h>
-#include <Storages/MergeTree/extractZkPathFromCreateQuery.h>
+#include <Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h>
 #include <base/chrono_io.h>
 #include <base/insertAtEnd.h>
 #include <base/scope_guard.h>
@@ -776,7 +776,7 @@ void BackupEntriesCollector::makeBackupEntriesForTablesDefs()
         checkIsQueryCancelled();
 
         ASTPtr new_create_query = table_info.create_table_query;
-        table_info.replicated_table_zk_path = tryExtractZkPathFromCreateQuery(*new_create_query, context->getGlobalContext());
+        table_info.replicated_table_zk_path = extractZooKeeperPathFromReplicatedTableDef(new_create_query->as<const ASTCreateQuery &>(), context);
         adjustCreateQueryForBackup(new_create_query, context->getGlobalContext());
         renameDatabaseAndTableNameInCreateQuery(new_create_query, renaming_map, context->getGlobalContext());
 
diff --git a/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp b/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp
deleted file mode 100644
index 8ea732b0243..00000000000
--- a/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <Databases/DatabaseReplicatedHelpers.h>
-#include <Databases/IDatabase.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/DatabaseCatalog.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTLiteral.h>
-#include <Storages/MergeTree/extractZkPathFromCreateQuery.h>
-#include <Common/Macros.h>
-
-
-namespace DB
-{
-
-std::optional<String> tryExtractZkPathFromCreateQuery(const IAST & create_query, const ContextPtr & global_context)
-{
-    const auto * create = create_query.as<const ASTCreateQuery>();
-    if (!create || !create->storage || !create->storage->engine)
-        return {};
-
-    /// Check if the table engine is one of the ReplicatedMergeTree family.
-    const auto & ast_engine = *create->storage->engine;
-    if (!ast_engine.name.starts_with("Replicated") || !ast_engine.name.ends_with("MergeTree"))
-        return {};
-
-    /// Get the first argument.
-    const auto * ast_arguments = typeid_cast<ASTExpressionList *>(ast_engine.arguments.get());
-    if (!ast_arguments || ast_arguments->children.empty())
-        return {};
-
-    auto * ast_zk_path = typeid_cast<ASTLiteral *>(ast_arguments->children[0].get());
-    if (!ast_zk_path || (ast_zk_path->value.getType() != Field::Types::String))
-        return {};
-
-    String zk_path = ast_zk_path->value.safeGet<String>();
-
-    /// Expand macros.
-    Macros::MacroExpansionInfo info;
-    info.table_id.table_name = create->getTable();
-    info.table_id.database_name = create->getDatabase();
-    info.table_id.uuid = create->uuid;
-    auto database = DatabaseCatalog::instance().tryGetDatabase(info.table_id.database_name);
-    if (database && database->getEngineName() == "Replicated")
-    {
-        info.shard = getReplicatedDatabaseShardName(database);
-        info.replica = getReplicatedDatabaseReplicaName(database);
-    }
-
-    try
-    {
-        zk_path = global_context->getMacros()->expand(zk_path, info);
-    }
-    catch (...)
-    {
-        return {}; /// Couldn't expand macros.
-    }
-
-    return zk_path;
-}
-
-}
diff --git a/src/Storages/MergeTree/extractZkPathFromCreateQuery.h b/src/Storages/MergeTree/extractZkPathFromCreateQuery.h
deleted file mode 100644
index e22f76d2cd5..00000000000
--- a/src/Storages/MergeTree/extractZkPathFromCreateQuery.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#pragma once
-
-#include <base/types.h>
-#include <memory>
-#include <optional>
-
-
-namespace DB
-{
-class IAST;
-class Context;
-using ContextPtr = std::shared_ptr<const Context>;
-
-/// Extracts a zookeeper path from a specified CREATE TABLE query. Returns std::nullopt if fails.
-/// The function takes the first argument of the ReplicatedMergeTree table engine and expands macros in it.
-/// It works like a part of what the create() function in registerStorageMergeTree.cpp does but in a simpler manner.
-std::optional<String> tryExtractZkPathFromCreateQuery(const IAST & create_query, const ContextPtr & global_context);
-
-}
diff --git a/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h b/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h
new file mode 100644
index 00000000000..1bd58392201
--- /dev/null
+++ b/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <base/types.h>
+#include <memory>
+#include <optional>
+
+
+namespace DB
+{
+class ASTCreateQuery;
+class Context;
+using ContextPtr = std::shared_ptr<const Context>;
+
+/// Extracts a zookeeper path from a specified CREATE TABLE query. Returns std::nullopt if fails.
+/// The function checks the table engine and if it is Replicated*MergeTree then it takes the first argument and expands macros in it.
+std::optional<String> extractZooKeeperPathFromReplicatedTableDef(const ASTCreateQuery & create_query, const ContextPtr & context);
+
+}
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index d552a4b6fa5..9b0200d5a1c 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -1,6 +1,7 @@
 #include <Databases/DatabaseReplicatedHelpers.h>
 #include <Storages/MergeTree/MergeTreeIndexMinMax.h>
 #include <Storages/MergeTree/MergeTreeIndices.h>
+#include <Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageMergeTree.h>
 #include <Storages/StorageReplicatedMergeTree.h>
@@ -122,6 +123,248 @@ static void verifySortingKey(const KeyDescription & sorting_key)
     }
 }
 
+/// Returns whether a new syntax is used to define a table engine, i.e. MergeTree() PRIMARY KEY ... PARTITION BY ... SETTINGS ...
+/// instead of MergeTree(MergeTree(date, [sample_key], primary_key).
+static bool isExtendedStorageDef(const ASTCreateQuery & query)
+{
+    if (query.storage && query.storage->isExtendedStorageDefinition())
+        return true;
+
+    if (query.columns_list &&
+        ((query.columns_list->indices && !query.columns_list->indices->children.empty()) ||
+         (query.columns_list->projections && !query.columns_list->projections->children.empty())))
+    {
+        return true;
+    }
+
+    return false;
+}
+
+/// Evaluates expressions in engine arguments.
+/// In new syntax an argument can be literal or identifier or array/tuple of identifiers.
+static void evaluateEngineArgs(ASTs & engine_args, const ContextPtr & context)
+{
+    size_t arg_idx = 0;
+    try
+    {
+        for (; arg_idx < engine_args.size(); ++arg_idx)
+        {
+            auto & arg = engine_args[arg_idx];
+            auto * arg_func = arg->as<ASTFunction>();
+            if (!arg_func)
+                continue;
+
+            /// If we got ASTFunction, let's evaluate it and replace with ASTLiteral.
+            /// Do not try evaluate array or tuple, because it's array or tuple of column identifiers.
+            if (arg_func->name == "array" || arg_func->name == "tuple")
+                continue;
+            Field value = evaluateConstantExpression(arg, context).first;
+            arg = std::make_shared<ASTLiteral>(value);
+        }
+    }
+    catch (Exception & e)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot evaluate engine argument {}: {} {}",
+                        arg_idx, e.message(), verbose_help_message);
+    }
+}
+
+/// Returns whether this is a Replicated table engine?
+static bool isReplicated(const String & engine_name)
+{
+    return engine_name.starts_with("Replicated") && engine_name.ends_with("MergeTree");
+}
+
+/// Returns the part of the name of a table engine between "Replicated" (if any) and "MergeTree".
+static std::string_view getNamePart(const String & engine_name)
+{
+    std::string_view name_part = engine_name;
+    if (name_part.starts_with("Replicated"))
+        name_part.remove_prefix(strlen("Replicated"));
+
+    if (name_part.ends_with("MergeTree"))
+        name_part.remove_suffix(strlen("MergeTree"));
+
+    return name_part;
+}
+
+/// Extracts zookeeper path and replica name from the table engine's arguments.
+/// The function can modify those arguments (that's why they're passed separately in `engine_args`) and also determines RenamingRestrictions.
+/// The function assumes the table engine is Replicated.
+static void extractZooKeeperPathAndReplicaNameFromEngineArgs(
+    const ASTCreateQuery & query,
+    const StorageID & table_id,
+    const String & engine_name,
+    ASTs & engine_args,
+    LoadingStrictnessLevel mode,
+    const ContextPtr & context,
+    String & zookeeper_path,
+    String & replica_name,
+    RenamingRestrictions & renaming_restrictions)
+{
+    chassert(isReplicated(engine_name));
+
+    zookeeper_path = "";
+    replica_name = "";
+    renaming_restrictions = RenamingRestrictions::ALLOW_ANY;
+
+    bool is_extended_storage_def = isExtendedStorageDef(query);
+
+    if (is_extended_storage_def)
+    {
+        /// Allow expressions in engine arguments.
+        /// In new syntax argument can be literal or identifier or array/tuple of identifiers.
+        evaluateEngineArgs(engine_args, context);
+    }
+
+    bool is_on_cluster = context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
+    bool is_replicated_database = context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY &&
+        DatabaseCatalog::instance().getDatabase(table_id.database_name)->getEngineName() == "Replicated";
+
+    /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries
+    /// and if UUID was explicitly passed in CREATE TABLE (like for ATTACH)
+    bool allow_uuid_macro = is_on_cluster || is_replicated_database || query.attach || query.has_uuid;
+
+    auto expand_macro = [&] (ASTLiteral * ast_zk_path, ASTLiteral * ast_replica_name)
+    {
+        /// Unfold {database} and {table} macro on table creation, so table can be renamed.
+        if (mode < LoadingStrictnessLevel::ATTACH)
+        {
+            Macros::MacroExpansionInfo info;
+            /// NOTE: it's not recursive
+            info.expand_special_macros_only = true;
+            info.table_id = table_id;
+            /// Avoid unfolding {uuid} macro on this step.
+            /// We did unfold it in previous versions to make moving table from Atomic to Ordinary database work correctly,
+            /// but now it's not allowed (and it was the only reason to unfold {uuid} macro).
+            info.table_id.uuid = UUIDHelpers::Nil;
+            zookeeper_path = context->getMacros()->expand(zookeeper_path, info);
+
+            info.level = 0;
+            replica_name = context->getMacros()->expand(replica_name, info);
+        }
+
+        ast_zk_path->value = zookeeper_path;
+        ast_replica_name->value = replica_name;
+
+        /// Expand other macros (such as {shard} and {replica}). We do not expand them on previous step
+        /// to make possible copying metadata files between replicas.
+        Macros::MacroExpansionInfo info;
+        info.table_id = table_id;
+        if (is_replicated_database)
+        {
+            auto database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
+            info.shard = getReplicatedDatabaseShardName(database);
+            info.replica = getReplicatedDatabaseReplicaName(database);
+        }
+        if (!allow_uuid_macro)
+            info.table_id.uuid = UUIDHelpers::Nil;
+        zookeeper_path = context->getMacros()->expand(zookeeper_path, info);
+
+        info.level = 0;
+        info.table_id.uuid = UUIDHelpers::Nil;
+        replica_name = context->getMacros()->expand(replica_name, info);
+
+        /// We do not allow renaming table with these macros in metadata, because zookeeper_path will be broken after RENAME TABLE.
+        /// NOTE: it may happen if table was created by older version of ClickHouse (< 20.10) and macros was not unfolded on table creation
+        /// or if one of these macros is recursively expanded from some other macro.
+        /// Also do not allow to move table from Atomic to Ordinary database if there's {uuid} macro
+        if (info.expanded_database || info.expanded_table)
+            renaming_restrictions = RenamingRestrictions::DO_NOT_ALLOW;
+        else if (info.expanded_uuid)
+            renaming_restrictions = RenamingRestrictions::ALLOW_PRESERVING_UUID;
+    };
+
+    size_t arg_num = 0;
+    size_t arg_cnt = engine_args.size();
+
+    bool has_arguments = (arg_num + 2 <= arg_cnt);
+    bool has_valid_arguments = has_arguments && engine_args[arg_num]->as<ASTLiteral>() && engine_args[arg_num + 1]->as<ASTLiteral>();
+
+    if (has_valid_arguments)
+    {
+        /// Get path and name from engine arguments
+        auto * ast_zk_path = engine_args[arg_num]->as<ASTLiteral>();
+        if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String)
+            zookeeper_path = ast_zk_path->value.safeGet<String>();
+        else
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path in ZooKeeper must be a string literal{}", verbose_help_message);
+
+        auto * ast_replica_name = engine_args[arg_num + 1]->as<ASTLiteral>();
+        if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String)
+            replica_name = ast_replica_name->value.safeGet<String>();
+        else
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must be a string literal{}", verbose_help_message);
+
+        if (replica_name.empty())
+            throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message);
+
+        expand_macro(ast_zk_path, ast_replica_name);
+    }
+    else if (is_extended_storage_def
+        && (arg_cnt == 0
+            || !engine_args[arg_num]->as<ASTLiteral>()
+            || (arg_cnt == 1 && (getNamePart(engine_name) == "Graphite"))))
+    {
+        /// Try use default values if arguments are not specified.
+        /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic.
+        const auto & server_settings = context->getServerSettings();
+        zookeeper_path = server_settings.default_replica_path;
+        /// TODO maybe use hostname if {replica} is not defined?
+        replica_name = server_settings.default_replica_name;
+
+        /// Modify query, so default values will be written to metadata
+        assert(arg_num == 0);
+        ASTs old_args;
+        std::swap(engine_args, old_args);
+        auto path_arg = std::make_shared<ASTLiteral>(zookeeper_path);
+        auto name_arg = std::make_shared<ASTLiteral>(replica_name);
+        auto * ast_zk_path = path_arg.get();
+        auto * ast_replica_name = name_arg.get();
+
+        expand_macro(ast_zk_path, ast_replica_name);
+
+        engine_args.emplace_back(std::move(path_arg));
+        engine_args.emplace_back(std::move(name_arg));
+        std::move(std::begin(old_args), std::end(old_args), std::back_inserter(engine_args));
+    }
+    else
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected two string literal arguments: zookeeper_path and replica_name");
+}
+
+/// Extracts a zookeeper path from a specified CREATE TABLE query. Returns std::nullopt if fails.
+std::optional<String> extractZooKeeperPathFromReplicatedTableDef(const ASTCreateQuery & query, const ContextPtr & context)
+{
+    try
+    {
+        if (!query.storage || !query.storage->engine)
+            return {};
+
+        const String & engine_name = query.storage->engine->name;
+        if (!isReplicated(engine_name))
+            return {};
+
+        StorageID table_id{query.getDatabase(), query.getTable(), query.uuid};
+        ASTs engine_args;
+        if (query.storage->engine->arguments)
+            engine_args = query.storage->engine->arguments->children;
+        for (auto & engine_arg : engine_args)
+            engine_arg = engine_arg->clone();
+        LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE;
+        String zookeeper_path;
+        String replica_name;
+        RenamingRestrictions renaming_restrictions;
+
+        extractZooKeeperPathAndReplicaNameFromEngineArgs(query, table_id, engine_name, engine_args, mode, context,
+                                                         zookeeper_path, replica_name, renaming_restrictions);
+
+        return zookeeper_path;
+    }
+    catch (...)
+    {
+        return {};
+    }
+}
 
 static StoragePtr create(const StorageFactory::Arguments & args)
 {
@@ -156,17 +399,12 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         *  - Additional MergeTreeSettings in the SETTINGS clause;
         */
 
-    bool is_extended_storage_def = args.storage_def->isExtendedStorageDefinition()
-        || (args.query.columns_list->indices && !args.query.columns_list->indices->children.empty())
-        || (args.query.columns_list->projections && !args.query.columns_list->projections->children.empty());
+    bool is_extended_storage_def = isExtendedStorageDef(args.query);
 
     const Settings & local_settings = args.getLocalContext()->getSettingsRef();
 
-    String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree"));
-
-    bool replicated = startsWith(name_part, "Replicated");
-    if (replicated)
-        name_part = name_part.substr(strlen("Replicated"));
+    bool replicated = isReplicated(args.engine_name);
+    std::string_view name_part = getNamePart(args.engine_name);
 
     MergeTreeData::MergingParams merging_params;
     merging_params.mode = MergeTreeData::MergingParams::Ordinary;
@@ -283,29 +521,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
     {
         /// Allow expressions in engine arguments.
         /// In new syntax argument can be literal or identifier or array/tuple of identifiers.
-        size_t arg_idx = 0;
-        try
-        {
-            for (; arg_idx < engine_args.size(); ++arg_idx)
-            {
-                auto & arg = engine_args[arg_idx];
-                auto * arg_func = arg->as<ASTFunction>();
-                if (!arg_func)
-                    continue;
-
-                /// If we got ASTFunction, let's evaluate it and replace with ASTLiteral.
-                /// Do not try evaluate array or tuple, because it's array or tuple of column identifiers.
-                if (arg_func->name == "array" || arg_func->name == "tuple")
-                    continue;
-                Field value = evaluateConstantExpression(arg, args.getLocalContext()).first;
-                arg = std::make_shared<ASTLiteral>(value);
-            }
-        }
-        catch (Exception & e)
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot evaluate engine argument {}: {} {}",
-                            arg_idx, e.message(), verbose_help_message);
-        }
+        evaluateEngineArgs(engine_args, args.getLocalContext());
     }
     else if (args.mode <= LoadingStrictnessLevel::CREATE && !local_settings.allow_deprecated_syntax_for_merge_tree)
     {
@@ -314,130 +530,17 @@ static StoragePtr create(const StorageFactory::Arguments & args)
                                                    "See also `allow_deprecated_syntax_for_merge_tree` setting.");
     }
 
-    /// For Replicated.
+    /// Extract zookeeper path and replica name from engine arguments.
     String zookeeper_path;
     String replica_name;
     RenamingRestrictions renaming_restrictions = RenamingRestrictions::ALLOW_ANY;
 
-    bool is_on_cluster = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY;
-    bool is_replicated_database = args.getLocalContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY &&
-        DatabaseCatalog::instance().getDatabase(args.table_id.database_name)->getEngineName() == "Replicated";
-
-    /// Allow implicit {uuid} macros only for zookeeper_path in ON CLUSTER queries
-    /// and if UUID was explicitly passed in CREATE TABLE (like for ATTACH)
-    bool allow_uuid_macro = is_on_cluster || is_replicated_database || args.query.attach || args.query.has_uuid;
-
-    auto expand_macro = [&] (ASTLiteral * ast_zk_path, ASTLiteral * ast_replica_name)
-    {
-        /// Unfold {database} and {table} macro on table creation, so table can be renamed.
-        if (args.mode < LoadingStrictnessLevel::ATTACH)
-        {
-            Macros::MacroExpansionInfo info;
-            /// NOTE: it's not recursive
-            info.expand_special_macros_only = true;
-            info.table_id = args.table_id;
-            /// Avoid unfolding {uuid} macro on this step.
-            /// We did unfold it in previous versions to make moving table from Atomic to Ordinary database work correctly,
-            /// but now it's not allowed (and it was the only reason to unfold {uuid} macro).
-            info.table_id.uuid = UUIDHelpers::Nil;
-            zookeeper_path = context->getMacros()->expand(zookeeper_path, info);
-
-            info.level = 0;
-            replica_name = context->getMacros()->expand(replica_name, info);
-        }
-
-        ast_zk_path->value = zookeeper_path;
-        ast_replica_name->value = replica_name;
-
-        /// Expand other macros (such as {shard} and {replica}). We do not expand them on previous step
-        /// to make possible copying metadata files between replicas.
-        Macros::MacroExpansionInfo info;
-        info.table_id = args.table_id;
-        if (is_replicated_database)
-        {
-            auto database = DatabaseCatalog::instance().getDatabase(args.table_id.database_name);
-            info.shard = getReplicatedDatabaseShardName(database);
-            info.replica = getReplicatedDatabaseReplicaName(database);
-        }
-        if (!allow_uuid_macro)
-            info.table_id.uuid = UUIDHelpers::Nil;
-        zookeeper_path = context->getMacros()->expand(zookeeper_path, info);
-
-        info.level = 0;
-        info.table_id.uuid = UUIDHelpers::Nil;
-        replica_name = context->getMacros()->expand(replica_name, info);
-
-        /// We do not allow renaming table with these macros in metadata, because zookeeper_path will be broken after RENAME TABLE.
-        /// NOTE: it may happen if table was created by older version of ClickHouse (< 20.10) and macros was not unfolded on table creation
-        /// or if one of these macros is recursively expanded from some other macro.
-        /// Also do not allow to move table from Atomic to Ordinary database if there's {uuid} macro
-        if (info.expanded_database || info.expanded_table)
-            renaming_restrictions = RenamingRestrictions::DO_NOT_ALLOW;
-        else if (info.expanded_uuid)
-            renaming_restrictions = RenamingRestrictions::ALLOW_PRESERVING_UUID;
-    };
-
     if (replicated)
     {
-        bool has_arguments = arg_num + 2 <= arg_cnt;
-        bool has_valid_arguments = has_arguments && engine_args[arg_num]->as<ASTLiteral>() && engine_args[arg_num + 1]->as<ASTLiteral>();
-
-        ASTLiteral * ast_zk_path;
-        ASTLiteral * ast_replica_name;
-
-        if (has_valid_arguments)
-        {
-            /// Get path and name from engine arguments
-            ast_zk_path = engine_args[arg_num]->as<ASTLiteral>();
-            if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String)
-                zookeeper_path = ast_zk_path->value.safeGet<String>();
-            else
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path in ZooKeeper must be a string literal{}", verbose_help_message);
-            ++arg_num;
-
-            ast_replica_name = engine_args[arg_num]->as<ASTLiteral>();
-            if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String)
-                replica_name = ast_replica_name->value.safeGet<String>();
-            else
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must be a string literal{}", verbose_help_message);
-
-            if (replica_name.empty())
-                throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message);
-            ++arg_num;
-
-            expand_macro(ast_zk_path, ast_replica_name);
-        }
-        else if (is_extended_storage_def
-            && (arg_cnt == 0
-                || !engine_args[arg_num]->as<ASTLiteral>()
-                || (arg_cnt == 1 && merging_params.mode == MergeTreeData::MergingParams::Graphite)))
-        {
-            /// Try use default values if arguments are not specified.
-            /// Note: {uuid} macro works for ON CLUSTER queries when database engine is Atomic.
-            const auto & server_settings = args.getContext()->getServerSettings();
-            zookeeper_path = server_settings.default_replica_path;
-            /// TODO maybe use hostname if {replica} is not defined?
-            replica_name = server_settings.default_replica_name;
-
-            /// Modify query, so default values will be written to metadata
-            assert(arg_num == 0);
-            ASTs old_args;
-            std::swap(engine_args, old_args);
-            auto path_arg = std::make_shared<ASTLiteral>(zookeeper_path);
-            auto name_arg = std::make_shared<ASTLiteral>(replica_name);
-            ast_zk_path = path_arg.get();
-            ast_replica_name = name_arg.get();
-
-            expand_macro(ast_zk_path, ast_replica_name);
-
-            engine_args.emplace_back(std::move(path_arg));
-            engine_args.emplace_back(std::move(name_arg));
-            std::move(std::begin(old_args), std::end(old_args), std::back_inserter(engine_args));
-            arg_num = 2;
-            arg_cnt += 2;
-        }
-        else
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected two string literal arguments: zookeeper_path and replica_name");
+        extractZooKeeperPathAndReplicaNameFromEngineArgs(args.query, args.table_id, args.engine_name, args.engine_args, args.mode,
+                                                         args.getLocalContext(), zookeeper_path, replica_name, renaming_restrictions);
+        arg_cnt = engine_args.size(); /// Update `arg_cnt` here because extractZooKeeperPathAndReplicaNameFromEngineArgs() could add arguments.
+        arg_num = 2;                  /// zookeeper_path and replica_name together are always two arguments.
     }
 
     /// This merging param maybe used as part of sorting key
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 58d1846915f..0639b172d31 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -31,7 +31,6 @@
 #include <Storages/ColumnsDescription.h>
 #include <Storages/Freeze.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
-#include <Storages/MergeTree/extractZkPathFromCreateQuery.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/MergeTree/LeaderElection.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>

From faae8a4f2b683eed530b74f92ab58d1a76b5d001 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Wed, 24 Apr 2024 20:37:06 +0200
Subject: [PATCH 18/54] Add tests for backup on cluster with 2 shards and 2
 replicas.

---
 .../configs/cluster_2x2.xml                   |  26 +++
 .../test_backup_restore_on_cluster/test.py    |   1 -
 .../test_two_shards_two_replicas.py           | 153 ++++++++++++++++++
 3 files changed, 179 insertions(+), 1 deletion(-)
 create mode 100644 tests/integration/test_backup_restore_on_cluster/configs/cluster_2x2.xml
 create mode 100644 tests/integration/test_backup_restore_on_cluster/test_two_shards_two_replicas.py

diff --git a/tests/integration/test_backup_restore_on_cluster/configs/cluster_2x2.xml b/tests/integration/test_backup_restore_on_cluster/configs/cluster_2x2.xml
new file mode 100644
index 00000000000..97e60fbbed7
--- /dev/null
+++ b/tests/integration/test_backup_restore_on_cluster/configs/cluster_2x2.xml
@@ -0,0 +1,26 @@
+<clickhouse>
+    <remote_servers>
+        <cluster_2x2>
+            <shard>
+                <replica>
+                    <host>node_1_1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node_1_2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>node_2_1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node_2_2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </cluster_2x2>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py
index d1520444df1..700ed6f15f5 100644
--- a/tests/integration/test_backup_restore_on_cluster/test.py
+++ b/tests/integration/test_backup_restore_on_cluster/test.py
@@ -41,7 +41,6 @@ node2 = cluster.add_instance(
     stay_alive=True,  # Necessary for the "test_stop_other_host_while_backup" test
 )
 
-
 node3 = cluster.add_instance(
     "node3",
     main_configs=main_configs,
diff --git a/tests/integration/test_backup_restore_on_cluster/test_two_shards_two_replicas.py b/tests/integration/test_backup_restore_on_cluster/test_two_shards_two_replicas.py
new file mode 100644
index 00000000000..c0e318c8bb7
--- /dev/null
+++ b/tests/integration/test_backup_restore_on_cluster/test_two_shards_two_replicas.py
@@ -0,0 +1,153 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
+
+
+cluster = ClickHouseCluster(__file__)
+
+main_configs = [
+    "configs/backups_disk.xml",
+    "configs/cluster_2x2.xml",
+    "configs/lesser_timeouts.xml",  # Default timeouts are quite big (a few minutes), the tests don't need them to be that big.
+]
+
+user_configs = [
+    "configs/zookeeper_retries.xml",
+]
+
+node_1_1 = cluster.add_instance(
+    "node_1_1",
+    main_configs=main_configs,
+    user_configs=user_configs,
+    external_dirs=["/backups/"],
+    macros={"replica": "1", "shard": "1"},
+    with_zookeeper=True,
+)
+
+node_1_2 = cluster.add_instance(
+    "node_1_2",
+    main_configs=main_configs,
+    user_configs=user_configs,
+    external_dirs=["/backups/"],
+    macros={"replica": "2", "shard": "1"},
+    with_zookeeper=True,
+)
+
+node_2_1 = cluster.add_instance(
+    "node_2_1",
+    main_configs=main_configs,
+    user_configs=user_configs,
+    external_dirs=["/backups/"],
+    macros={"replica": "1", "shard": "2"},
+    with_zookeeper=True,
+)
+
+node_2_2 = cluster.add_instance(
+    "node_2_2",
+    main_configs=main_configs,
+    user_configs=user_configs,
+    external_dirs=["/backups/"],
+    macros={"replica": "2", "shard": "2"},
+    with_zookeeper=True,
+)
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+@pytest.fixture(autouse=True)
+def drop_after_test():
+    try:
+        yield
+    finally:
+        node_1_1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster_2x2' SYNC")
+        node_1_1.query("DROP TABLE IF EXISTS table_a ON CLUSTER 'cluster_2x2' SYNC")
+        node_1_1.query("DROP TABLE IF EXISTS table_b ON CLUSTER 'cluster_2x2' SYNC")
+
+
+backup_id_counter = 0
+
+
+def new_backup_name():
+    global backup_id_counter
+    backup_id_counter += 1
+    return f"Disk('backups', '{backup_id_counter}')"
+
+
+def test_replicated_table():
+    node_1_1.query(
+        "CREATE TABLE tbl ON CLUSTER 'cluster_2x2' ("
+        "x Int64"
+        ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/{shard}', '{replica}')"
+        "ORDER BY x"
+    )
+
+    node_1_1.query("INSERT INTO tbl VALUES (100), (200)")
+    node_2_1.query("INSERT INTO tbl VALUES (300), (400)")
+
+    backup_name = new_backup_name()
+
+    node_1_1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster_2x2' TO {backup_name}")
+
+    node_1_1.query(f"DROP TABLE tbl ON CLUSTER 'cluster_2x2' SYNC")
+
+    node_1_1.query(f"RESTORE ALL ON CLUSTER 'cluster_2x2' FROM {backup_name}")
+
+    node_1_1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster_2x2' tbl")
+
+    assert node_1_1.query("SELECT * FROM tbl ORDER BY x") == TSV([[100], [200]])
+    assert node_1_2.query("SELECT * FROM tbl ORDER BY x") == TSV([[100], [200]])
+    assert node_2_1.query("SELECT * FROM tbl ORDER BY x") == TSV([[300], [400]])
+    assert node_2_2.query("SELECT * FROM tbl ORDER BY x") == TSV([[300], [400]])
+
+
+def test_two_tables_with_uuid_in_zk_path():
+    node_1_1.query(
+        "CREATE TABLE table_a ON CLUSTER 'cluster_2x2' ("
+        "x Int64"
+        ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')"
+        "ORDER BY x"
+    )
+
+    node_1_1.query(
+        "CREATE TABLE table_b ON CLUSTER 'cluster_2x2' ("
+        "x Int64"
+        ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}')"
+        "ORDER BY x"
+    )
+
+    node_1_1.query("INSERT INTO table_a VALUES (100), (200)")
+    node_2_1.query("INSERT INTO table_a VALUES (300), (400)")
+
+    node_1_2.query("INSERT INTO table_b VALUES (500), (600)")
+    node_2_2.query("INSERT INTO table_b VALUES (700), (800)")
+
+    backup_name = new_backup_name()
+
+    node_1_1.query(
+        f"BACKUP TABLE table_a, TABLE table_b ON CLUSTER 'cluster_2x2' TO {backup_name}"
+    )
+
+    node_1_1.query(f"DROP TABLE table_a ON CLUSTER 'cluster_2x2' SYNC")
+    node_1_1.query(f"DROP TABLE table_b ON CLUSTER 'cluster_2x2' SYNC")
+
+    node_1_1.query(f"RESTORE ALL ON CLUSTER 'cluster_2x2' FROM {backup_name}")
+
+    node_1_1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster_2x2' table_a")
+    node_1_1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster_2x2' table_b")
+
+    assert node_1_1.query("SELECT * FROM table_a ORDER BY x") == TSV([[100], [200]])
+    assert node_1_2.query("SELECT * FROM table_a ORDER BY x") == TSV([[100], [200]])
+    assert node_2_1.query("SELECT * FROM table_a ORDER BY x") == TSV([[300], [400]])
+    assert node_2_2.query("SELECT * FROM table_a ORDER BY x") == TSV([[300], [400]])
+
+    assert node_1_1.query("SELECT * FROM table_b ORDER BY x") == TSV([[500], [600]])
+    assert node_1_2.query("SELECT * FROM table_b ORDER BY x") == TSV([[500], [600]])
+    assert node_2_1.query("SELECT * FROM table_b ORDER BY x") == TSV([[700], [800]])
+    assert node_2_2.query("SELECT * FROM table_b ORDER BY x") == TSV([[700], [800]])

From 424e3f12e209608bfba00b3c5056f004f4c33192 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 1 May 2024 00:52:15 +0000
Subject: [PATCH 19/54] Revert "Merge pull request #62577 from
 ClickHouse/revert-61551-better-marks-loading"

This reverts commit 17315e53b89ef7a8c1bea118b0fb75ec5ab51f9f, reversing
changes made to ed35fddddd28d4109fa1934db16e93c41b107343.
---
 src/Storages/MergeTree/MarkRange.cpp          |  5 ++
 src/Storages/MergeTree/MarkRange.h            |  1 +
 .../MergeTree/MergeTreeIndexReader.cpp        |  3 +
 .../MergeTree/MergeTreeMarksLoader.cpp        |  8 +++
 src/Storages/MergeTree/MergeTreeMarksLoader.h |  1 +
 .../MergeTree/MergeTreeReaderCompact.cpp      |  1 +
 .../MergeTree/MergeTreeReaderStream.cpp       | 72 ++++++++++++++-----
 .../MergeTree/MergeTreeReaderStream.h         | 38 +++++++---
 .../MergeTree/MergeTreeReaderWide.cpp         | 32 ++++++---
 src/Storages/MergeTree/MergeTreeReaderWide.h  |  1 +
 .../MergeTree/MergeTreeSequentialSource.cpp   |  9 +--
 .../test_merge_tree_load_marks/__init__.py    |  0
 .../configs/config.xml                        | 12 ++++
 .../test_merge_tree_load_marks/test.py        | 62 ++++++++++++++++
 .../02532_send_logs_level_test.reference      |  1 +
 .../0_stateless/02532_send_logs_level_test.sh |  2 +-
 16 files changed, 207 insertions(+), 41 deletions(-)
 create mode 100644 tests/integration/test_merge_tree_load_marks/__init__.py
 create mode 100644 tests/integration/test_merge_tree_load_marks/configs/config.xml
 create mode 100644 tests/integration/test_merge_tree_load_marks/test.py

diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp
index bd8546f04cc..c6e98b4e5a1 100644
--- a/src/Storages/MergeTree/MarkRange.cpp
+++ b/src/Storages/MergeTree/MarkRange.cpp
@@ -81,6 +81,11 @@ size_t MarkRanges::getNumberOfMarks() const
     return result;
 }
 
+bool MarkRanges::isOneRangeForWholePart(size_t num_marks_in_part) const
+{
+    return size() == 1 && front().begin == 0 && front().end == num_marks_in_part;
+}
+
 void MarkRanges::serialize(WriteBuffer & out) const
 {
     writeBinaryLittleEndian(this->size(), out);
diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h
index 1d9d0a1e27e..f36d5d89825 100644
--- a/src/Storages/MergeTree/MarkRange.h
+++ b/src/Storages/MergeTree/MarkRange.h
@@ -36,6 +36,7 @@ struct MarkRanges : public std::deque<MarkRange>
     using std::deque<MarkRange>::deque; /// NOLINT(modernize-type-traits)
 
     size_t getNumberOfMarks() const;
+    bool isOneRangeForWholePart(size_t num_marks_in_part) const;
 
     void serialize(WriteBuffer & out) const;
     String describe() const;
diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
index 6012994b46d..e7ae1fc5c13 100644
--- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
@@ -31,6 +31,8 @@ std::unique_ptr<MergeTreeReaderStream> makeIndexReader(
         load_marks_threadpool,
         /*num_columns_in_mark=*/ 1);
 
+    marks_loader->startAsyncLoad();
+
     return std::make_unique<MergeTreeReaderStreamSingleColumn>(
         part->getDataPartStoragePtr(),
         index->getFileName(), extension, marks_count,
@@ -65,6 +67,7 @@ MergeTreeIndexReader::MergeTreeIndexReader(
         mark_cache,
         uncompressed_cache,
         std::move(settings));
+
     version = index_format.version;
 
     stream->adjustRightMark(getLastMark(all_mark_ranges_));
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
index eae7594448a..168134a329f 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
@@ -64,6 +64,10 @@ MergeTreeMarksLoader::MergeTreeMarksLoader(
     , read_settings(read_settings_)
     , num_columns_in_mark(num_columns_in_mark_)
     , load_marks_threadpool(load_marks_threadpool_)
+{
+}
+
+void MergeTreeMarksLoader::startAsyncLoad()
 {
     if (load_marks_threadpool)
         future = loadMarksAsync();
@@ -102,6 +106,8 @@ MergeTreeMarksGetterPtr MergeTreeMarksLoader::loadMarks()
 
 MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl()
 {
+    LOG_TEST(getLogger("MergeTreeMarksLoader"), "Loading marks from path {}", mrk_path);
+
     /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache.
     MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
 
@@ -218,7 +224,9 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksSync()
         }
     }
     else
+    {
         loaded_marks = loadMarksImpl();
+    }
 
     if (!loaded_marks)
     {
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.h b/src/Storages/MergeTree/MergeTreeMarksLoader.h
index 73dd462f2fa..2aa4474e1c5 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.h
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.h
@@ -50,6 +50,7 @@ public:
 
     ~MergeTreeMarksLoader();
 
+    void startAsyncLoad();
     MergeTreeMarksGetterPtr loadMarks();
     size_t getNumColumns() const { return num_columns_in_mark; }
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 53acfd539fb..643a1c31474 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -48,6 +48,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
     , profile_callback(profile_callback_)
     , clock_type(clock_type_)
 {
+    marks_loader->startAsyncLoad();
 }
 
 void MergeTreeReaderCompact::fillColumnPositions()
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
index 40a16176c69..15ef02440cb 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
@@ -13,6 +13,7 @@ namespace ErrorCodes
 {
     extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int CANNOT_READ_ALL_DATA;
+    extern const int LOGICAL_ERROR;
 }
 
 MergeTreeReaderStream::MergeTreeReaderStream(
@@ -41,14 +42,17 @@ MergeTreeReaderStream::MergeTreeReaderStream(
 {
 }
 
+void MergeTreeReaderStream::loadMarks()
+{
+    if (!marks_getter)
+        marks_getter = marks_loader->loadMarks();
+}
+
 void MergeTreeReaderStream::init()
 {
     if (initialized)
         return;
 
-    initialized = true;
-    marks_getter = marks_loader->loadMarks();
-
     /// Compute the size of the buffer.
     auto [max_mark_range_bytes, sum_mark_range_bytes] = estimateMarkRangeBytes(all_mark_ranges);
 
@@ -110,11 +114,15 @@ void MergeTreeReaderStream::init()
         data_buffer = non_cached_buffer.get();
         compressed_data_buffer = non_cached_buffer.get();
     }
+
+    initialized = true;
 }
 
 void MergeTreeReaderStream::seekToMarkAndColumn(size_t row_index, size_t column_position)
 {
     init();
+    loadMarks();
+
     const auto & mark = marks_getter->getMark(row_index, column_position);
 
     try
@@ -193,7 +201,7 @@ CompressedReadBufferBase * MergeTreeReaderStream::getCompressedDataBuffer()
     return compressed_data_buffer;
 }
 
-size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) const
+size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark)
 {
     /// NOTE: if we are reading the whole file, then right_mark == marks_count
     /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
@@ -202,7 +210,8 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) cons
     if (marks_count == 0)
         return 0;
 
-    assert(right_mark <= marks_count);
+    chassert(right_mark <= marks_count);
+    loadMarks();
 
     if (right_mark == 0)
         return marks_getter->getMark(right_mark, 0).offset_in_compressed_file;
@@ -281,9 +290,9 @@ size_t MergeTreeReaderStreamSingleColumn::getRightOffset(size_t right_mark) cons
     return file_size;
 }
 
-std::pair<size_t, size_t> MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const
+std::pair<size_t, size_t> MergeTreeReaderStreamSingleColumn::estimateMarkRangeBytes(const MarkRanges & mark_ranges)
 {
-    assert(marks_getter != nullptr);
+    loadMarks();
 
     size_t max_range_bytes = 0;
     size_t sum_range_bytes = 0;
@@ -302,7 +311,34 @@ std::pair<size_t, size_t> MergeTreeReaderStreamSingleColumn::estimateMarkRangeBy
     return {max_range_bytes, sum_range_bytes};
 }
 
-size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) const
+size_t MergeTreeReaderStreamSingleColumnWholePart::getRightOffset(size_t right_mark)
+{
+    if (right_mark != marks_count)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+            "Expected one right mark: {}, got: {}",
+            marks_count, right_mark);
+    }
+    return file_size;
+}
+
+std::pair<size_t, size_t> MergeTreeReaderStreamSingleColumnWholePart::estimateMarkRangeBytes(const MarkRanges & mark_ranges)
+{
+    if (!mark_ranges.isOneRangeForWholePart(marks_count))
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+            "Expected one mark range that covers the whole part, got: {}",
+            mark_ranges.describe());
+    }
+    return {file_size, file_size};
+}
+
+void MergeTreeReaderStreamSingleColumnWholePart::seekToMark(size_t)
+{
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeReaderStreamSingleColumnWholePart cannot seek to marks");
+}
+
+size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position)
 {
     /// NOTE: if we are reading the whole file, then right_mark == marks_count
     /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
@@ -311,7 +347,8 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ
     if (marks_count == 0)
         return 0;
 
-    assert(right_mark_non_included <= marks_count);
+    chassert(right_mark_non_included <= marks_count);
+    loadMarks();
 
     if (right_mark_non_included == 0)
         return marks_getter->getMark(right_mark_non_included, column_position).offset_in_compressed_file;
@@ -347,9 +384,9 @@ size_t MergeTreeReaderStreamMultipleColumns::getRightOffsetOneColumn(size_t righ
 }
 
 std::pair<size_t, size_t>
-MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) const
+MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position)
 {
-    assert(marks_getter != nullptr);
+    loadMarks();
 
     /// As a maximal range we return the maximal size of a whole stripe.
     size_t max_range_bytes = 0;
@@ -386,8 +423,9 @@ MergeTreeReaderStreamMultipleColumns::estimateMarkRangeBytesOneColumn(const Mark
     return {max_range_bytes, sum_range_bytes};
 }
 
-MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark(size_t row_index, size_t column_position) const
+MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeMark(size_t row_index, size_t column_position)
 {
+    loadMarks();
     const auto & current_mark = marks_getter->getMark(row_index, column_position);
 
     if (marks_getter->getNumColumns() == 1)
@@ -434,27 +472,27 @@ MarkInCompressedFile MergeTreeReaderStreamMultipleColumns::getStartOfNextStripeM
     return marks_getter->getMark(mark_index + 1, column_position + 1);
 }
 
-size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset(size_t right_mark_non_included) const
+size_t MergeTreeReaderStreamOneOfMultipleColumns::getRightOffset(size_t right_mark_non_included)
 {
     return getRightOffsetOneColumn(right_mark_non_included, column_position);
 }
 
-std::pair<size_t, size_t> MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const
+std::pair<size_t, size_t> MergeTreeReaderStreamOneOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges)
 {
     return estimateMarkRangeBytesOneColumn(mark_ranges, column_position);
 }
 
-size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset(size_t right_mark_non_included) const
+size_t MergeTreeReaderStreamAllOfMultipleColumns::getRightOffset(size_t right_mark_non_included)
 {
     return getRightOffsetOneColumn(right_mark_non_included, marks_loader->getNumColumns() - 1);
 }
 
-std::pair<size_t, size_t> MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges) const
+std::pair<size_t, size_t> MergeTreeReaderStreamAllOfMultipleColumns::estimateMarkRangeBytes(const MarkRanges & mark_ranges)
 {
     size_t max_range_bytes = 0;
     size_t sum_range_bytes = 0;
 
-    for (size_t i = 0; i < marks_getter->getNumColumns(); ++i)
+    for (size_t i = 0; i < marks_loader->getNumColumns(); ++i)
     {
         auto [current_max, current_sum] = estimateMarkRangeBytesOneColumn(mark_ranges, i);
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h
index f3ca6953ceb..05341cd8acc 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.h
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.h
@@ -40,6 +40,7 @@ public:
     /// Seeks to exact mark in file.
     void seekToMarkAndColumn(size_t row_index, size_t column_position);
 
+    /// Seeks to the start of the file.
     void seekToStart();
 
     /**
@@ -53,11 +54,11 @@ public:
 
 private:
     /// Returns offset in file up to which it's needed to read file to read all rows up to @right_mark mark.
-    virtual size_t getRightOffset(size_t right_mark) const = 0;
+    virtual size_t getRightOffset(size_t right_mark) = 0;
 
     /// Returns estimated max amount of bytes to read among mark ranges (which is used as size for read buffer)
     /// and total amount of bytes to read in all mark ranges.
-    virtual std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) const = 0;
+    virtual std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) = 0;
 
     const ReadBufferFromFileBase::ProfileCallback profile_callback;
     const clockid_t clock_type;
@@ -80,6 +81,7 @@ private:
 
 protected:
     void init();
+    void loadMarks();
 
     const MergeTreeReaderSettings settings;
     const size_t marks_count;
@@ -100,11 +102,25 @@ public:
     {
     }
 
-    size_t getRightOffset(size_t right_mark_non_included) const override;
-    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override;
+    size_t getRightOffset(size_t right_mark_non_included) override;
+    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) override;
     void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, 0); }
 };
 
+class MergeTreeReaderStreamSingleColumnWholePart : public MergeTreeReaderStream
+{
+public:
+    template <typename... Args>
+    explicit MergeTreeReaderStreamSingleColumnWholePart(Args &&... args)
+        : MergeTreeReaderStream{std::forward<Args>(args)...}
+    {
+    }
+
+    size_t getRightOffset(size_t right_mark_non_included) override;
+    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) override;
+    void seekToMark(size_t row_index) override;
+};
+
 /// Base class for reading from file that contains multiple columns.
 /// It is used to read from compact parts.
 /// See more details about data layout in MergeTreeDataPartCompact.h.
@@ -118,9 +134,9 @@ public:
     }
 
 protected:
-    size_t getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position) const;
-    std::pair<size_t, size_t> estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position) const;
-    MarkInCompressedFile getStartOfNextStripeMark(size_t row_index, size_t column_position) const;
+    size_t getRightOffsetOneColumn(size_t right_mark_non_included, size_t column_position);
+    std::pair<size_t, size_t> estimateMarkRangeBytesOneColumn(const MarkRanges & mark_ranges, size_t column_position);
+    MarkInCompressedFile getStartOfNextStripeMark(size_t row_index, size_t column_position);
 };
 
 /// Class for reading a single column from file that contains multiple columns
@@ -135,8 +151,8 @@ public:
     {
     }
 
-    size_t getRightOffset(size_t right_mark_non_included) const override;
-    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override;
+    size_t getRightOffset(size_t right_mark_non_included) override;
+    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) override;
     void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, column_position); }
 
 private:
@@ -154,8 +170,8 @@ public:
     {
     }
 
-    size_t getRightOffset(size_t right_mark_non_included) const override;
-    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) const override;
+    size_t getRightOffset(size_t right_mark_non_included) override;
+    std::pair<size_t, size_t> estimateMarkRangeBytes(const MarkRanges & mark_ranges) override;
     void seekToMark(size_t row_index) override { seekToMarkAndColumn(row_index, 0); }
 };
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index 394a22835f1..d398668d5c8 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -43,6 +43,7 @@ MergeTreeReaderWide::MergeTreeReaderWide(
         mark_ranges_,
         settings_,
         avg_value_size_hints_)
+    , read_whole_part(all_mark_ranges.isOneRangeForWholePart(data_part_info_for_read->getMarksCount()))
 {
     try
     {
@@ -227,12 +228,13 @@ void MergeTreeReaderWide::addStreams(
 
         auto context = data_part_info_for_read->getContext();
         auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr;
+        size_t num_marks_in_part = data_part_info_for_read->getMarksCount();
 
         auto marks_loader = std::make_shared<MergeTreeMarksLoader>(
             data_part_info_for_read,
             mark_cache,
             data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(*stream_name),
-            data_part_info_for_read->getMarksCount(),
+            num_marks_in_part,
             data_part_info_for_read->getIndexGranularityInfo(),
             settings.save_marks_in_cache,
             settings.read_settings,
@@ -243,11 +245,24 @@ void MergeTreeReaderWide::addStreams(
         auto stream_settings = settings;
         stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys;
 
-        streams.emplace(*stream_name, std::make_unique<MergeTreeReaderStreamSingleColumn>(
-            data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION,
-            data_part_info_for_read->getMarksCount(), all_mark_ranges, stream_settings,
-            uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION),
-            std::move(marks_loader), profile_callback, clock_type));
+        auto create_stream = [&]<typename Stream>()
+        {
+            return std::make_unique<Stream>(
+                data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION,
+                num_marks_in_part, all_mark_ranges, stream_settings,
+                uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION),
+                std::move(marks_loader), profile_callback, clock_type);
+        };
+
+        if (read_whole_part)
+        {
+            streams.emplace(*stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumnWholePart>());
+        }
+        else
+        {
+            marks_loader->startAsyncLoad();
+            streams.emplace(*stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumn>());
+        }
     };
 
     serialization->enumerateStreams(callback);
@@ -325,7 +340,8 @@ void MergeTreeReaderWide::prefetchForColumn(
 
         if (stream_name && !prefetched_streams.contains(*stream_name))
         {
-            bool seek_to_mark = !continue_reading;
+            bool seek_to_mark = !continue_reading && !read_whole_part;
+
             if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache))
             {
                 buf->prefetch(priority);
@@ -349,7 +365,7 @@ void MergeTreeReaderWide::readData(
 
     deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path)
     {
-        bool seek_to_mark = !was_prefetched && !continue_reading;
+        bool seek_to_mark = !was_prefetched && !continue_reading && !read_whole_part;
 
         return getStream(
             /* seek_to_start = */false, substream_path,
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h
index a9a5526dd65..7ffe565d262 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.h
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.h
@@ -73,6 +73,7 @@ private:
     std::unordered_map<String, ISerialization::SubstreamsCache> caches;
     std::unordered_set<std::string> prefetched_streams;
     ssize_t prefetched_from_mark = -1;
+    bool read_whole_part = false;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index c022cfe3861..47661a3ff93 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -184,12 +184,12 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
         storage_snapshot,
         *mark_ranges,
         /*virtual_fields=*/ {},
-        /*uncompressed_cache=*/{},
+        /*uncompressed_cache=*/ {},
         mark_cache.get(),
         alter_conversions,
         reader_settings,
-        {},
-        {});
+        /*avg_value_size_hints=*/ {},
+        /*profile_callback=*/ {});
 }
 
 static void fillBlockNumberColumns(
@@ -230,6 +230,7 @@ try
     const auto & header = getPort().getHeader();
     /// Part level is useful for next step for merging non-merge tree table
     bool add_part_level = storage.merging_params.mode != MergeTreeData::MergingParams::Ordinary;
+    size_t num_marks_in_part = data_part->getMarksCount();
 
     if (!isCancelled() && current_row < data_part->rows_count)
     {
@@ -238,7 +239,7 @@ try
 
         const auto & sample = reader->getColumns();
         Columns columns(sample.size());
-        size_t rows_read = reader->readRows(current_mark, data_part->getMarksCount(), continue_reading, rows_to_read, columns);
+        size_t rows_read = reader->readRows(current_mark, num_marks_in_part, continue_reading, rows_to_read, columns);
 
         if (rows_read)
         {
diff --git a/tests/integration/test_merge_tree_load_marks/__init__.py b/tests/integration/test_merge_tree_load_marks/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_merge_tree_load_marks/configs/config.xml b/tests/integration/test_merge_tree_load_marks/configs/config.xml
new file mode 100644
index 00000000000..1c9ee8d698f
--- /dev/null
+++ b/tests/integration/test_merge_tree_load_marks/configs/config.xml
@@ -0,0 +1,12 @@
+<clickhouse>
+    <text_log>
+        <database>system</database>
+        <table>text_log</table>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+        <max_size_rows>1048576</max_size_rows>
+        <reserved_size_rows>8192</reserved_size_rows>
+        <buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
+        <flush_on_crash>false</flush_on_crash>
+        <level>test</level>
+    </text_log>
+</clickhouse>
diff --git a/tests/integration/test_merge_tree_load_marks/test.py b/tests/integration/test_merge_tree_load_marks/test.py
new file mode 100644
index 00000000000..b066b2a6ec0
--- /dev/null
+++ b/tests/integration/test_merge_tree_load_marks/test.py
@@ -0,0 +1,62 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+node = cluster.add_instance(
+    "node",
+    main_configs=["configs/config.xml"],
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+# This test is bad and it should be a functional test but S3 metrics
+# are accounted  incorrectly for merges in part_log and query_log.
+# Also we have text_log with level 'trace' in functional tests
+# but this test requeires text_log with level 'test'.
+
+
+@pytest.mark.parametrize("min_bytes_for_wide_part", [0, 1000000000])
+def test_merge_load_marks(started_cluster, min_bytes_for_wide_part):
+    node.query(
+        f"""
+        DROP TABLE IF EXISTS t_load_marks;
+
+        CREATE TABLE t_load_marks (a UInt64, b UInt64)
+        ENGINE = MergeTree ORDER BY a
+        SETTINGS min_bytes_for_wide_part = {min_bytes_for_wide_part};
+
+        INSERT INTO t_load_marks SELECT number, number FROM numbers(1000);
+        INSERT INTO t_load_marks SELECT number, number FROM numbers(1000);
+
+        OPTIMIZE TABLE t_load_marks FINAL;
+        SYSTEM FLUSH LOGS;
+    """
+    )
+
+    uuid = node.query(
+        "SELECT uuid FROM system.tables WHERE table = 't_prewarm_merge'"
+    ).strip()
+
+    result = node.query(
+        f"""
+        SELECT count()
+        FROM system.text_log
+        WHERE (query_id LIKE '%{uuid}::all_1_2_1%') AND (message LIKE '%Loading marks%')
+    """
+    ).strip()
+
+    result = int(result)
+
+    is_wide = min_bytes_for_wide_part == 0
+    not_loaded = result == 0
+
+    assert is_wide == not_loaded
diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.reference b/tests/queries/0_stateless/02532_send_logs_level_test.reference
index dbd49cfc0a4..7e51b888d9c 100644
--- a/tests/queries/0_stateless/02532_send_logs_level_test.reference
+++ b/tests/queries/0_stateless/02532_send_logs_level_test.reference
@@ -1,2 +1,3 @@
+<Test> MergeTreeMarksLoader: Loading marks from path data.cmrk3
 <Test> MergeTreeRangeReader: First reader returned: num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]:  Int32(size = 1), requested columns: key
 <Test> MergeTreeRangeReader: read() returned num_rows: 1, columns: 1, total_rows_per_granule: 1, no filter, column[0]:  Int32(size = 1), sample block key
diff --git a/tests/queries/0_stateless/02532_send_logs_level_test.sh b/tests/queries/0_stateless/02532_send_logs_level_test.sh
index f65d8705569..4afc6d4496b 100755
--- a/tests/queries/0_stateless/02532_send_logs_level_test.sh
+++ b/tests/queries/0_stateless/02532_send_logs_level_test.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 $CLICKHOUSE_CLIENT -nm -q "
     drop table if exists data;
-    create table data (key Int) engine=MergeTree order by tuple();
+    create table data (key Int) engine=MergeTree order by tuple() settings min_bytes_for_wide_part = '1G', compress_marks = 1;
     insert into data values (1);
 "
 

From 6f484734c2b7c8f1c2aaa365461772d560dd0c60 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 1 May 2024 11:34:59 +0000
Subject: [PATCH 20/54] fix reading in general case

---
 src/Storages/MergeTree/MergeTreeIOSettings.h         |  2 ++
 src/Storages/MergeTree/MergeTreeReaderWide.cpp       | 10 ++++++----
 src/Storages/MergeTree/MergeTreeReaderWide.h         |  2 +-
 src/Storages/MergeTree/MergeTreeSequentialSource.cpp |  1 +
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h
index feeb1808a6f..12a83703148 100644
--- a/src/Storages/MergeTree/MergeTreeIOSettings.h
+++ b/src/Storages/MergeTree/MergeTreeIOSettings.h
@@ -44,6 +44,8 @@ struct MergeTreeReaderSettings
     bool enable_multiple_prewhere_read_steps = false;
     /// If true, try to lower size of read buffer according to granule size and compressed block size.
     bool adjust_read_buffer_size = true;
+    /// If true, it's allowed to read the whole part without reading marks.
+    bool can_read_part_without_marks = false;
 };
 
 struct MergeTreeWriterSettings
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index d398668d5c8..59feb4dda19 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -43,7 +43,9 @@ MergeTreeReaderWide::MergeTreeReaderWide(
         mark_ranges_,
         settings_,
         avg_value_size_hints_)
-    , read_whole_part(all_mark_ranges.isOneRangeForWholePart(data_part_info_for_read->getMarksCount()))
+    , read_without_marks(
+        settings.can_read_part_without_marks
+        && all_mark_ranges.isOneRangeForWholePart(data_part_info_for_read->getMarksCount()))
 {
     try
     {
@@ -254,7 +256,7 @@ void MergeTreeReaderWide::addStreams(
                 std::move(marks_loader), profile_callback, clock_type);
         };
 
-        if (read_whole_part)
+        if (read_without_marks)
         {
             streams.emplace(*stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumnWholePart>());
         }
@@ -340,7 +342,7 @@ void MergeTreeReaderWide::prefetchForColumn(
 
         if (stream_name && !prefetched_streams.contains(*stream_name))
         {
-            bool seek_to_mark = !continue_reading && !read_whole_part;
+            bool seek_to_mark = !continue_reading && !read_without_marks;
 
             if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache))
             {
@@ -365,7 +367,7 @@ void MergeTreeReaderWide::readData(
 
     deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path)
     {
-        bool seek_to_mark = !was_prefetched && !continue_reading && !read_whole_part;
+        bool seek_to_mark = !was_prefetched && !continue_reading && !read_without_marks;
 
         return getStream(
             /* seek_to_start = */false, substream_path,
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h
index 7ffe565d262..9f6bdd79b00 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.h
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.h
@@ -73,7 +73,7 @@ private:
     std::unordered_map<String, ISerialization::SubstreamsCache> caches;
     std::unordered_set<std::string> prefetched_streams;
     ssize_t prefetched_from_mark = -1;
-    bool read_whole_part = false;
+    bool read_without_marks = false;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 47661a3ff93..0939378932b 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -174,6 +174,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
         .read_settings = read_settings,
         .save_marks_in_cache = false,
         .apply_deleted_mask = apply_deleted_mask,
+        .can_read_part_without_marks = true,
     };
 
     if (!mark_ranges)

From a50c41c61703126fcef38615b1612bdff9a6a336 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 30 Apr 2024 13:19:10 +0300
Subject: [PATCH 21/54] QueryAnalysisPass improve QUALIFY validation

---
 src/Analyzer/ValidationUtils.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Analyzer/ValidationUtils.cpp b/src/Analyzer/ValidationUtils.cpp
index 60cc1dd521f..e17639367eb 100644
--- a/src/Analyzer/ValidationUtils.cpp
+++ b/src/Analyzer/ValidationUtils.cpp
@@ -266,6 +266,9 @@ void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidatio
         if (query_node_typed.hasHaving())
             validate_group_by_columns_visitor.visit(query_node_typed.getHaving());
 
+        if (query_node_typed.hasQualify())
+            validate_group_by_columns_visitor.visit(query_node_typed.getQualify());
+
         if (query_node_typed.hasOrderBy())
             validate_group_by_columns_visitor.visit(query_node_typed.getOrderByNode());
 

From 5f68eb847c7b88a118fb40599a2f51f9782a97fd Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 2 May 2024 18:40:31 +0300
Subject: [PATCH 22/54] Added tests

---
 ...dow_functions_qualify_validation.reference |  0
 ...43_window_functions_qualify_validation.sql | 26 +++++++++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 tests/queries/0_stateless/03143_window_functions_qualify_validation.reference
 create mode 100644 tests/queries/0_stateless/03143_window_functions_qualify_validation.sql

diff --git a/tests/queries/0_stateless/03143_window_functions_qualify_validation.reference b/tests/queries/0_stateless/03143_window_functions_qualify_validation.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03143_window_functions_qualify_validation.sql b/tests/queries/0_stateless/03143_window_functions_qualify_validation.sql
new file mode 100644
index 00000000000..2b6d1820b00
--- /dev/null
+++ b/tests/queries/0_stateless/03143_window_functions_qualify_validation.sql
@@ -0,0 +1,26 @@
+DROP TABLE IF EXISTS uk_price_paid;
+CREATE TABLE uk_price_paid
+(
+    `price` UInt32,
+    `date` Date,
+    `postcode1` LowCardinality(String),
+    `postcode2` LowCardinality(String),
+    `type` Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
+    `is_new` UInt8,
+    `duration` Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
+    `addr1` String,
+    `addr2` String,
+    `street` LowCardinality(String),
+    `locality` LowCardinality(String),
+    `town` LowCardinality(String),
+    `district` LowCardinality(String),
+    `county` LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2);
+
+SELECT count(), (quantile(0.9)(price) OVER ()) AS price_quantile FROM uk_price_paid WHERE toYear(date) = 2023 QUALIFY price > price_quantile; -- { serverError 215 }
+
+SELECT count() FROM uk_price_paid WHERE toYear(date) = 2023 QUALIFY price > (quantile(0.9)(price) OVER ()); -- { serverError 215 }
+
+DROP TABLE uk_price_paid;

From ccfe3f8cbcbb180004ddceabc33b5339cec9785a Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 3 May 2024 13:59:02 +0000
Subject: [PATCH 23/54] fix test

---
 tests/integration/test_merge_tree_load_marks/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_merge_tree_load_marks/test.py b/tests/integration/test_merge_tree_load_marks/test.py
index b066b2a6ec0..a7078017ac9 100644
--- a/tests/integration/test_merge_tree_load_marks/test.py
+++ b/tests/integration/test_merge_tree_load_marks/test.py
@@ -43,7 +43,7 @@ def test_merge_load_marks(started_cluster, min_bytes_for_wide_part):
     )
 
     uuid = node.query(
-        "SELECT uuid FROM system.tables WHERE table = 't_prewarm_merge'"
+        "SELECT uuid FROM system.tables WHERE table = 't_load_marks'"
     ).strip()
 
     result = node.query(

From d6690f8384235531947a09cd65f00623095f3ed4 Mon Sep 17 00:00:00 2001
From: unashi <unashi@tencent.com>
Date: Mon, 6 May 2024 15:12:16 +0800
Subject: [PATCH 24/54] [feature] Raw as a synonym for TSVRaw

---
 docs/en/interfaces/formats.md                             | 6 +++---
 .../Formats/Impl/TabSeparatedRowInputFormat.cpp           | 8 ++++++++
 .../Formats/Impl/TabSeparatedRowOutputFormat.cpp          | 3 +++
 .../0_stateless/00397_tsv_format_synonym.reference        | 3 +++
 tests/queries/0_stateless/00397_tsv_format_synonym.sql    | 1 +
 5 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 03cf345349e..937dfb52609 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -206,7 +206,7 @@ SELECT * FROM nestedt FORMAT TSV
 Differs from `TabSeparated` format in that the rows are written without escaping.
 When parsing with this format, tabs or linefeeds are not allowed in each field.
 
-This format is also available under the name `TSVRaw`.
+This format is also available under the name `TSVRaw`, `Raw`.
 
 ## TabSeparatedWithNames {#tabseparatedwithnames}
 
@@ -241,14 +241,14 @@ This format is also available under the name `TSVWithNamesAndTypes`.
 Differs from `TabSeparatedWithNames` format in that the rows are written without escaping.
 When parsing with this format, tabs or linefeeds are not allowed in each field.
 
-This format is also available under the name `TSVRawWithNames`.
+This format is also available under the name `TSVRawWithNames`, `RawWithNames`.
 
 ## TabSeparatedRawWithNamesAndTypes {#tabseparatedrawwithnamesandtypes}
 
 Differs from `TabSeparatedWithNamesAndTypes` format in that the rows are written without escaping.
 When parsing with this format, tabs or linefeeds are not allowed in each field.
 
-This format is also available under the name `TSVRawWithNamesAndNames`.
+This format is also available under the name `TSVRawWithNamesAndNames`, `RawWithNamesAndNames`.
 
 ## Template {#format-template}
 
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index 85b1797dab8..09f8fa92e5f 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -402,6 +402,8 @@ void registerInputFormatTabSeparated(FormatFactory & factory)
 
         registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func);
         registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func);
+        if (is_raw)
+            registerWithNamesAndTypes("Raw", register_func);
     }
 }
 
@@ -433,6 +435,8 @@ void registerTSVSchemaReader(FormatFactory & factory)
 
         registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func);
         registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func);
+        if (is_raw)
+            registerWithNamesAndTypes("Raw", register_func);
     }
 }
 
@@ -506,8 +510,12 @@ void registerFileSegmentationEngineTabSeparated(FormatFactory & factory)
 
         registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func);
         registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func);
+        if (is_raw)
+            registerWithNamesAndTypes("Raw", register_func);
         markFormatWithNamesAndTypesSupportsSamplingColumns(is_raw ? "TSVRaw" : "TSV", factory);
         markFormatWithNamesAndTypesSupportsSamplingColumns(is_raw ? "TabSeparatedRaw" : "TabSeparated", factory);
+        if (is_raw)
+            markFormatWithNamesAndTypesSupportsSamplingColumns("Raw", factory);
     }
 
     // We can use the same segmentation engine for TSKV.
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp
index a4a5aea26cb..c8384c09be6 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.cpp
@@ -95,7 +95,10 @@ void registerOutputFormatTabSeparated(FormatFactory & factory)
         registerWithNamesAndTypes(is_raw ? "TSVRaw" : "TSV", register_func);
         registerWithNamesAndTypes(is_raw ? "TabSeparatedRaw" : "TabSeparated", register_func);
         if (is_raw)
+        {
             registerWithNamesAndTypes("LineAsString", register_func);
+            registerWithNamesAndTypes("Raw", register_func);
+        }
     }
 }
 
diff --git a/tests/queries/0_stateless/00397_tsv_format_synonym.reference b/tests/queries/0_stateless/00397_tsv_format_synonym.reference
index c4a86983be3..c91169a06fa 100644
--- a/tests/queries/0_stateless/00397_tsv_format_synonym.reference
+++ b/tests/queries/0_stateless/00397_tsv_format_synonym.reference
@@ -28,3 +28,6 @@ UInt8	String	String
 1	hello	world
 2	hello	world
 3	hello	world
+1	hello	world
+2	hello	world
+3	hello	world
diff --git a/tests/queries/0_stateless/00397_tsv_format_synonym.sql b/tests/queries/0_stateless/00397_tsv_format_synonym.sql
index 8c69a795857..51283c6ced9 100644
--- a/tests/queries/0_stateless/00397_tsv_format_synonym.sql
+++ b/tests/queries/0_stateless/00397_tsv_format_synonym.sql
@@ -9,3 +9,4 @@ SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT TSVWithN
 
 SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT TabSeparatedRaw;
 SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT TSVRaw;
+SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT Raw;

From 731d05491cf44d8356f1d6971883004a862fcd0d Mon Sep 17 00:00:00 2001
From: Andrey Zvonov <azvonov@altinity.com>
Date: Mon, 6 May 2024 12:08:02 +0000
Subject: [PATCH 25/54] simplify estimation of number of objects in bucket

---
 src/Storages/StorageS3.cpp | 33 ++++++++++++++-------------------
 src/Storages/StorageS3.h   |  1 -
 2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index cb5734cfe0c..8a4e30fed1d 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -485,12 +485,15 @@ StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next(siz
 
 size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount()
 {
-    return pimpl->objectsCount();
-}
-
-bool StorageS3Source::DisclosedGlobIterator::hasMore()
-{
-    return pimpl->hasMore();
+    if (pimpl->hasMore())
+    {
+        /// 1000 files were listed, and we cannot make any estimation of _how many more_ there are (because we list bucket lazily);
+        /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do
+        /// as it would lead to serious (up to <max_threads> times) reading performance degradation.
+        return std::numeric_limits<size_t>::max();
+    }
+    else
+        return pimpl->objectsCount();
 }
 
 class StorageS3Source::KeysIterator::Impl
@@ -1285,21 +1288,13 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
     createIterator(nullptr);
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
-    const auto glob_iter = std::dynamic_pointer_cast<StorageS3Source::DisclosedGlobIterator>(iterator_wrapper);
-
-    if (!(glob_iter && glob_iter->hasMore()))
+    if (estimated_keys_count > 1)
+        num_streams = std::min(num_streams, estimated_keys_count);
+    else
     {
-        if (estimated_keys_count > 1)
-            num_streams = std::min(num_streams, estimated_keys_count);
-        else
-        {
-            /// The amount of keys (zero) was probably underestimated. We will keep one stream for this particular case.
-            num_streams = 1;
-        }
+        /// The amount of keys (zero) was probably underestimated. We will keep one stream for this particular case.
+        num_streams = 1;
     }
-     /// OTHERWISE, 1000 files were listed, but we cannot make any estimation of _how many_ there are (because we list bucket lazily);
-     /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do
-     /// as it would lead to serious (up to <max_threads> times) reading performance degradation.
 
     const size_t max_threads = context->getSettingsRef().max_threads;
     const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul));
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index b841e973a9b..c8ab28fb20e 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -83,7 +83,6 @@ public:
 
         KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT
         size_t estimatedKeysCount() override;
-        bool hasMore();
 
     private:
         class Impl;

From c53c8eb6d178329be8b7f11bfa09521545624384 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 May 2024 01:36:20 +0200
Subject: [PATCH 26/54] Fix slow suggest

---
 src/Access/ContextAccess.cpp                  |  8 +----
 src/Interpreters/Context.cpp                  |  6 ++--
 src/Storages/System/StorageSystemColumns.cpp  | 18 ++++++----
 .../System/StorageSystemDatabases.cpp         |  4 +--
 src/Storages/System/StorageSystemTables.cpp   |  6 ++--
 ...147_system_columns_access_checks.reference |  2 ++
 .../03147_system_columns_access_checks.sh     | 36 +++++++++++++++++++
 7 files changed, 58 insertions(+), 22 deletions(-)
 create mode 100644 tests/queries/0_stateless/03147_system_columns_access_checks.reference
 create mode 100755 tests/queries/0_stateless/03147_system_columns_access_checks.sh

diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp
index 2736d13e751..2a658d7aaa2 100644
--- a/src/Access/ContextAccess.cpp
+++ b/src/Access/ContextAccess.cpp
@@ -570,11 +570,8 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg
     if (params.full_access)
         return true;
 
-    auto access_granted = [&]
+    auto access_granted = []
     {
-        if (trace_log)
-            LOG_TRACE(trace_log, "Access granted: {}{}", (AccessRightsElement{flags, args...}.toStringWithoutOptions()),
-                      (grant_option ? " WITH GRANT OPTION" : ""));
         return true;
     };
 
@@ -582,9 +579,6 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg
                                                FormatStringHelper<String, FmtArgs...> fmt_string [[maybe_unused]],
                                                FmtArgs && ...fmt_args [[maybe_unused]])
     {
-        if (trace_log)
-            LOG_TRACE(trace_log, "Access denied: {}{}", (AccessRightsElement{flags, args...}.toStringWithoutOptions()),
-                      (grant_option ? " WITH GRANT OPTION" : ""));
         if constexpr (throw_if_denied)
             throw Exception(error_code, std::move(fmt_string), getUserName(), std::forward<FmtArgs>(fmt_args)...);
         return false;
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 44d36e94441..d22cdccc722 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2498,7 +2498,7 @@ AsyncLoader & Context::getAsyncLoader() const
         shared->async_loader = std::make_unique<AsyncLoader>(std::vector<AsyncLoader::PoolInitializer>{
                 // IMPORTANT: Pool declaration order should match the order in `PoolId.h` to get the indices right.
                 { // TablesLoaderForegroundPoolId
-                    "FgLoad",
+                    "ForegroundLoad",
                     CurrentMetrics::TablesLoaderForegroundThreads,
                     CurrentMetrics::TablesLoaderForegroundThreadsActive,
                     CurrentMetrics::TablesLoaderForegroundThreadsScheduled,
@@ -2506,7 +2506,7 @@ AsyncLoader & Context::getAsyncLoader() const
                     TablesLoaderForegroundPriority
                 },
                 { // TablesLoaderBackgroundLoadPoolId
-                    "BgLoad",
+                    "BackgroundLoad",
                     CurrentMetrics::TablesLoaderBackgroundThreads,
                     CurrentMetrics::TablesLoaderBackgroundThreadsActive,
                     CurrentMetrics::TablesLoaderBackgroundThreadsScheduled,
@@ -2514,7 +2514,7 @@ AsyncLoader & Context::getAsyncLoader() const
                     TablesLoaderBackgroundLoadPriority
                 },
                 { // TablesLoaderBackgroundStartupPoolId
-                    "BgStartup",
+                    "BackgroundStartup",
                     CurrentMetrics::TablesLoaderBackgroundThreads,
                     CurrentMetrics::TablesLoaderBackgroundThreadsActive,
                     CurrentMetrics::TablesLoaderBackgroundThreadsScheduled,
diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp
index 8c6d29a3b70..15b73fd1884 100644
--- a/src/Storages/System/StorageSystemColumns.cpp
+++ b/src/Storages/System/StorageSystemColumns.cpp
@@ -88,6 +88,7 @@ public:
         , total_tables(tables->size()), access(context->getAccess())
         , query_id(context->getCurrentQueryId()), lock_acquire_timeout(context->getSettingsRef().lock_acquire_timeout)
     {
+        need_to_check_access_for_tables = !access->isGranted(AccessType::SHOW_COLUMNS);
     }
 
     String getName() const override { return "Columns"; }
@@ -101,8 +102,6 @@ protected:
         MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns();
         size_t rows_count = 0;
 
-        const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_COLUMNS);
-
         while (rows_count < max_block_size && db_table_num < total_tables)
         {
             const std::string database_name = (*databases)[db_table_num].get<std::string>();
@@ -138,13 +137,17 @@ protected:
                 column_sizes = storage->getColumnSizes();
             }
 
-            bool check_access_for_columns = check_access_for_tables && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name);
+            /// A shortcut: if we don't allow to list this table in SHOW TABLES, also exclude it from system.columns.
+            if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name))
+                continue;
+
+            bool need_to_check_access_for_columns = need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name);
 
             size_t position = 0;
             for (const auto & column : columns)
             {
                 ++position;
-                if (check_access_for_columns && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name, column.name))
+                if (need_to_check_access_for_columns && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name, column.name))
                     continue;
 
                 size_t src_index = 0;
@@ -296,6 +299,7 @@ private:
     size_t db_table_num = 0;
     size_t total_tables;
     std::shared_ptr<const ContextAccess> access;
+    bool need_to_check_access_for_tables;
     String query_id;
     std::chrono::milliseconds lock_acquire_timeout;
 };
@@ -358,7 +362,6 @@ void StorageSystemColumns::read(
 
     auto [columns_mask, header] = getQueriedColumnsMaskAndHeader(sample_block, column_names);
 
-
     auto this_ptr = std::static_pointer_cast<StorageSystemColumns>(shared_from_this());
 
     auto reading = std::make_unique<ReadFromSystemColumns>(
@@ -416,9 +419,10 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline,
 
         /// Add `table` column.
         MutableColumnPtr table_column_mut = ColumnString::create();
-        IColumn::Offsets offsets(database_column->size());
+        size_t num_databases = database_column->size();
+        IColumn::Offsets offsets(num_databases);
 
-        for (size_t i = 0; i < database_column->size(); ++i)
+        for (size_t i = 0; i < num_databases; ++i)
         {
             const std::string database_name = (*database_column)[i].get<std::string>();
             if (database_name.empty())
diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp
index 2351c3c6a2a..1dbb187c418 100644
--- a/src/Storages/System/StorageSystemDatabases.cpp
+++ b/src/Storages/System/StorageSystemDatabases.cpp
@@ -102,7 +102,7 @@ static ColumnPtr getFilteredDatabases(const Databases & databases, const Actions
 void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector<UInt8> columns_mask) const
 {
     const auto access = context->getAccess();
-    const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES);
+    const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_DATABASES);
 
     const auto databases = DatabaseCatalog::instance().getDatabases();
     ColumnPtr filtered_databases_column = getFilteredDatabases(databases, predicate, context);
@@ -111,7 +111,7 @@ void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr c
     {
         auto database_name = filtered_databases_column->getDataAt(i).toString();
 
-        if (check_access_for_databases && !access->isGranted(AccessType::SHOW_DATABASES, database_name))
+        if (need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_DATABASES, database_name))
             continue;
 
         if (database_name == DatabaseCatalog::TEMPORARY_DATABASE)
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index 9bd7ff945ad..d428d6bd6d0 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -224,7 +224,7 @@ protected:
         MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns();
 
         const auto access = context->getAccess();
-        const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES);
+        const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES);
 
         size_t rows_count = 0;
         while (rows_count < max_block_size)
@@ -348,7 +348,7 @@ protected:
                 return Chunk(std::move(res_columns), num_rows);
             }
 
-            const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name);
+            const bool need_to_check_access_for_tables = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name);
 
             if (!tables_it || !tables_it->isValid())
                 tables_it = database->getTablesIterator(context);
@@ -361,7 +361,7 @@ protected:
                 if (!tables.contains(table_name))
                     continue;
 
-                if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name))
+                if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name))
                     continue;
 
                 StoragePtr table = nullptr;
diff --git a/tests/queries/0_stateless/03147_system_columns_access_checks.reference b/tests/queries/0_stateless/03147_system_columns_access_checks.reference
new file mode 100644
index 00000000000..35438f11b31
--- /dev/null
+++ b/tests/queries/0_stateless/03147_system_columns_access_checks.reference
@@ -0,0 +1,2 @@
+........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
+end
diff --git a/tests/queries/0_stateless/03147_system_columns_access_checks.sh b/tests/queries/0_stateless/03147_system_columns_access_checks.sh
new file mode 100755
index 00000000000..b204e7b28d8
--- /dev/null
+++ b/tests/queries/0_stateless/03147_system_columns_access_checks.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-parallel
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# Create many tables in the database
+NUM_TABLES=1000
+NUM_COLUMNS=1000
+THREADS=$(nproc)
+
+COLUMNS=$(seq 1 $NUM_COLUMNS | sed -r -e 's/(.+)/c\1 UInt8, /' | tr -d '\n')
+
+seq 1 $NUM_TABLES | xargs -P "${THREADS}" -I{} bash -c "
+    echo -n '.'
+    $CLICKHOUSE_CLIENT --query 'CREATE OR REPLACE TABLE test{} (${COLUMNS} end String) ENGINE = Memory'
+"
+echo
+
+$CLICKHOUSE_CLIENT --multiquery "
+DROP USER IF EXISTS test_03147;
+CREATE USER test_03147;
+GRANT SELECT (end) ON ${CLICKHOUSE_DATABASE}.test1 TO test_03147;
+"
+
+# This query was slow in previous ClickHouse versions for several reasons:
+# - tables and databases without SHOW TABLES access were still checked for SHOW COLUMNS access for every column in every table;
+# - excessive logging of "access granted" and "access denied"
+
+# The test could succeed even on the previous version, but it will show up as being too slow.
+$CLICKHOUSE_CLIENT --user test_03147 --query "SELECT name FROM system.columns"
+
+$CLICKHOUSE_CLIENT --multiquery "
+DROP USER test_03147;
+"

From a38ea6c8cd3aa706171fa15e053670a1c49c0d5d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 May 2024 02:54:25 +0200
Subject: [PATCH 27/54] Thread names

---
 src/Interpreters/Context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index d22cdccc722..df49b822aaa 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2514,7 +2514,7 @@ AsyncLoader & Context::getAsyncLoader() const
                     TablesLoaderBackgroundLoadPriority
                 },
                 { // TablesLoaderBackgroundStartupPoolId
-                    "BackgroundStartup",
+                    "BackgrndStartup",
                     CurrentMetrics::TablesLoaderBackgroundThreads,
                     CurrentMetrics::TablesLoaderBackgroundThreadsActive,
                     CurrentMetrics::TablesLoaderBackgroundThreadsScheduled,

From e33d8272b03fec5ff72b098c1177130d73a8d6f2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 May 2024 03:31:00 +0200
Subject: [PATCH 28/54] Fix style

---
 tests/queries/0_stateless/03147_system_columns_access_checks.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03147_system_columns_access_checks.sh b/tests/queries/0_stateless/03147_system_columns_access_checks.sh
index b204e7b28d8..70693e0985f 100755
--- a/tests/queries/0_stateless/03147_system_columns_access_checks.sh
+++ b/tests/queries/0_stateless/03147_system_columns_access_checks.sh
@@ -29,7 +29,7 @@ GRANT SELECT (end) ON ${CLICKHOUSE_DATABASE}.test1 TO test_03147;
 # - excessive logging of "access granted" and "access denied"
 
 # The test could succeed even on the previous version, but it will show up as being too slow.
-$CLICKHOUSE_CLIENT --user test_03147 --query "SELECT name FROM system.columns"
+$CLICKHOUSE_CLIENT --user test_03147 --query "SELECT name FROM system.columns WHERE database = currentDatabase()"
 
 $CLICKHOUSE_CLIENT --multiquery "
 DROP USER test_03147;

From d37590aed68e12c5fd7664b1a21138dd428d1482 Mon Sep 17 00:00:00 2001
From: unashi <unashi@tencent.com>
Date: Tue, 7 May 2024 10:36:44 +0800
Subject: [PATCH 29/54] [update] add test for RawWithNames,
 RawWithNamesAndTypes and *WithNames, *WithNamesAndTypes; add changelog

---
 CHANGELOG.md                                  |  1 +
 .../00397_tsv_format_synonym.reference        | 27 +++++++++++++++++++
 .../0_stateless/00397_tsv_format_synonym.sql  |  8 ++++++
 3 files changed, 36 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f40c42c4462..955e2f5b72f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,6 +33,7 @@
 * A mode for `topK`/`topkWeighed` support mode, which return count of values and its error. [#54508](https://github.com/ClickHouse/ClickHouse/pull/54508) ([UnamedRus](https://github.com/UnamedRus)).
 * Added function `toMillisecond` which returns the millisecond component for values of type`DateTime` or `DateTime64`. [#60281](https://github.com/ClickHouse/ClickHouse/pull/60281) ([Shaun Struwig](https://github.com/Blargian)).
 * Allow configuring HTTP redirect handlers for clickhouse-server. For example, you can make `/` redirect to the Play UI. [#60390](https://github.com/ClickHouse/ClickHouse/pull/60390) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Allow Raw as a synonym for TSVRaw. [#63394](https://github.com/ClickHouse/ClickHouse/pull/63394) ([Unalian](https://github.com/Unalian))
 
 #### Performance Improvement
 * Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)).
diff --git a/tests/queries/0_stateless/00397_tsv_format_synonym.reference b/tests/queries/0_stateless/00397_tsv_format_synonym.reference
index c91169a06fa..3326b039b8d 100644
--- a/tests/queries/0_stateless/00397_tsv_format_synonym.reference
+++ b/tests/queries/0_stateless/00397_tsv_format_synonym.reference
@@ -31,3 +31,30 @@ UInt8	String	String
 1	hello	world
 2	hello	world
 3	hello	world
+arr	s1	s2
+1	hello	world
+2	hello	world
+3	hello	world
+arr	s1	s2
+1	hello	world
+2	hello	world
+3	hello	world
+arr	s1	s2
+1	hello	world
+2	hello	world
+3	hello	world
+arr	s1	s2
+UInt8	String	String
+1	hello	world
+2	hello	world
+3	hello	world
+arr	s1	s2
+UInt8	String	String
+1	hello	world
+2	hello	world
+3	hello	world
+arr	s1	s2
+UInt8	String	String
+1	hello	world
+2	hello	world
+3	hello	world
diff --git a/tests/queries/0_stateless/00397_tsv_format_synonym.sql b/tests/queries/0_stateless/00397_tsv_format_synonym.sql
index 51283c6ced9..b3b231fbf3f 100644
--- a/tests/queries/0_stateless/00397_tsv_format_synonym.sql
+++ b/tests/queries/0_stateless/00397_tsv_format_synonym.sql
@@ -10,3 +10,11 @@ SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT TSVWithN
 SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT TabSeparatedRaw;
 SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT TSVRaw;
 SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT Raw;
+
+SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT TabSeparatedRawWithNames;
+SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT TSVRawWithNames;
+SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT RawWithNames;
+
+SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT TabSeparatedRawWithNamesAndTypes;
+SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT TSVRawWithNamesAndTypes;
+SELECT arrayJoin([1, 2, 3]) AS arr, 'hello' AS s1, 'world' AS s2 FORMAT RawWithNamesAndTypes;

From a8ae0074aa5563b8e65ae110fa5dc71313a81a77 Mon Sep 17 00:00:00 2001
From: unashi <unashi@tencent.com>
Date: Tue, 7 May 2024 10:40:46 +0800
Subject: [PATCH 30/54] [fix] name->names

---
 docs/en/interfaces/formats.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 937dfb52609..0b108edc17f 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -206,7 +206,7 @@ SELECT * FROM nestedt FORMAT TSV
 Differs from `TabSeparated` format in that the rows are written without escaping.
 When parsing with this format, tabs or linefeeds are not allowed in each field.
 
-This format is also available under the name `TSVRaw`, `Raw`.
+This format is also available under the names `TSVRaw`, `Raw`.
 
 ## TabSeparatedWithNames {#tabseparatedwithnames}
 
@@ -241,14 +241,14 @@ This format is also available under the name `TSVWithNamesAndTypes`.
 Differs from `TabSeparatedWithNames` format in that the rows are written without escaping.
 When parsing with this format, tabs or linefeeds are not allowed in each field.
 
-This format is also available under the name `TSVRawWithNames`, `RawWithNames`.
+This format is also available under the names `TSVRawWithNames`, `RawWithNames`.
 
 ## TabSeparatedRawWithNamesAndTypes {#tabseparatedrawwithnamesandtypes}
 
 Differs from `TabSeparatedWithNamesAndTypes` format in that the rows are written without escaping.
 When parsing with this format, tabs or linefeeds are not allowed in each field.
 
-This format is also available under the name `TSVRawWithNamesAndNames`, `RawWithNamesAndNames`.
+This format is also available under the names `TSVRawWithNamesAndNames`, `RawWithNamesAndNames`.
 
 ## Template {#format-template}
 

From 0a1d852dfd52cc88502a7699d249328edb041976 Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Thu, 2 May 2024 21:35:32 +0000
Subject: [PATCH 31/54] Enable plain_rewritable metadata for local and azure

Enable plain_rewritable support for local and azure (azure_blob_storage)
metadata type.

- HDFS object storage currently does not support iteration
  and does not implement listObjects method. It's a blocker
  for enabling plain_rewritable metadata type with HDFS.
- StaticWeb object storage is read-only and works with
  its dedicated metadata type.
---
 .../ObjectStorages/ObjectStorageFactory.cpp   | 14 +++++--
 .../PlainRewritableObjectStorage.h            | 39 ++++++++++++++++++-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  7 ----
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  3 --
 .../03008_local_plain_rewritable.reference    | 22 +++++++++++
 .../03008_local_plain_rewritable.sh           | 35 +++++++++++++++++
 6 files changed, 106 insertions(+), 14 deletions(-)
 create mode 100644 tests/queries/0_stateless/03008_local_plain_rewritable.reference
 create mode 100755 tests/queries/0_stateless/03008_local_plain_rewritable.sh

diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
index 7b949db268b..264ec2b258e 100644
--- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
@@ -73,9 +73,17 @@ ObjectStoragePtr createObjectStorage(
         return std::make_shared<PlainObjectStorage<BaseObjectStorage>>(std::forward<Args>(args)...);
     else if (isPlainRewritableStorage(type, config, config_prefix))
     {
-        /// TODO(jkartseva@): Test support for generic disk type
-        if (type != ObjectStorageType::S3)
-            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "plain_rewritable metadata storage support is implemented only for S3");
+        /// HDFS object storage currently does not support iteration and does not implement listObjects method.
+        /// StaticWeb object storage is read-only and works with its dedicated metadata type.
+        constexpr auto supported_object_storage_types
+            = std::array{ObjectStorageType::S3, ObjectStorageType::Local, ObjectStorageType::Azure};
+        if (std::find(supported_object_storage_types.begin(), supported_object_storage_types.end(), type)
+            == supported_object_storage_types.end())
+            throw Exception(
+                ErrorCodes::NOT_IMPLEMENTED,
+                "plain_rewritable metadata storage support is not implemented for '{}' object storage",
+                DataSourceDescription{DataSourceType::ObjectStorage, type, MetadataStorageType::PlainRewritable, /*description*/ ""}
+                    .toString());
 
         return std::make_shared<PlainRewritableObjectStorage<BaseObjectStorage>>(std::forward<Args>(args)...);
     }
diff --git a/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h
index d71e995b490..2b116cff443 100644
--- a/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h
+++ b/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h
@@ -1,16 +1,26 @@
 #pragma once
 
 #include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Common/ObjectStorageKeyGenerator.h>
+#include "CommonPathPrefixKeyGenerator.h"
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+extern const int LOGICAL_ERROR;
+}
 template <typename BaseObjectStorage>
 class PlainRewritableObjectStorage : public BaseObjectStorage
 {
 public:
     template <class... Args>
-    explicit PlainRewritableObjectStorage(Args &&... args) : BaseObjectStorage(std::forward<Args>(args)...)
+    explicit PlainRewritableObjectStorage(Args &&... args)
+        : BaseObjectStorage(std::forward<Args>(args)...)
+        /// A basic key generator is required for checking S3 capabilities,
+        /// it will be reset later by metadata storage.
+        , key_generator(createObjectStorageKeysGeneratorAsIsWithPrefix(BaseObjectStorage::getCommonKeyPrefix()))
     {
     }
 
@@ -19,6 +29,33 @@ public:
     bool isWriteOnce() const override { return false; }
 
     bool isPlain() const override { return true; }
+
+    ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override;
+
+    ObjectStorageKey generateObjectKeyPrefixForDirectoryPath(const std::string & path) const override;
+
+    void setKeysGenerator(ObjectStorageKeysGeneratorPtr gen) override { key_generator = gen; }
+
+private:
+    ObjectStorageKeysGeneratorPtr key_generator;
 };
 
+
+template <typename BaseObjectStorage>
+ObjectStorageKey PlainRewritableObjectStorage<BaseObjectStorage>::generateObjectKeyForPath(const std::string & path) const
+{
+    if (!key_generator)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set");
+
+    return key_generator->generate(path, /* is_directory */ false);
+}
+
+template <typename BaseObjectStorage>
+ObjectStorageKey PlainRewritableObjectStorage<BaseObjectStorage>::generateObjectKeyPrefixForDirectoryPath(const std::string & path) const
+{
+    if (!key_generator)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set");
+
+    return key_generator->generate(path, /* is_directory */ true);
+}
 }
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 2eae8877f87..a58b37f1df9 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -574,13 +574,6 @@ ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & p
     return key_generator->generate(path, /* is_directory */ false);
 }
 
-ObjectStorageKey S3ObjectStorage::generateObjectKeyPrefixForDirectoryPath(const std::string & path) const
-{
-    if (!key_generator)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set");
-
-    return key_generator->generate(path, /* is_directory */ true);
-}
 }
 
 #endif
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index ff66b00e47c..5eaab4b585c 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -159,12 +159,9 @@ public:
     bool supportParallelWrite() const override { return true; }
 
     ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override;
-    ObjectStorageKey generateObjectKeyPrefixForDirectoryPath(const std::string & path) const override;
 
     bool isReadOnly() const override { return s3_settings.get()->read_only; }
 
-    void setKeysGenerator(ObjectStorageKeysGeneratorPtr gen) override { key_generator = gen; }
-
 private:
     void setNewSettings(std::unique_ptr<S3ObjectStorageSettings> && s3_settings_);
 
diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.reference b/tests/queries/0_stateless/03008_local_plain_rewritable.reference
new file mode 100644
index 00000000000..10fc932ca4d
--- /dev/null
+++ b/tests/queries/0_stateless/03008_local_plain_rewritable.reference
@@ -0,0 +1,22 @@
+10006
+0	0	0
+1	1	1
+1	2	0
+2	2	2
+2	2	2
+3	1	9
+3	3	3
+4	4	4
+4	7	7
+5	5	5
+10006
+0	0	0
+1	1	1
+1	2	0
+2	2	2
+2	2	2
+3	1	9
+3	3	3
+4	4	4
+4	7	7
+5	5	5
diff --git a/tests/queries/0_stateless/03008_local_plain_rewritable.sh b/tests/queries/0_stateless/03008_local_plain_rewritable.sh
new file mode 100755
index 00000000000..07fd013c911
--- /dev/null
+++ b/tests/queries/0_stateless/03008_local_plain_rewritable.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# Tags: no-random-settings, no-replicated-database, no-shared-merge-tree
+# Tag no-random-settings: enable after root causing flakiness
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} --query "drop table if exists test_mt sync"
+
+${CLICKHOUSE_CLIENT} -nm --query "
+create table test_mt (a Int32, b Int64, c Int64) engine = MergeTree() partition by intDiv(a, 1000) order by tuple(a, b)
+settings disk = disk(
+    type = object_storage,
+    object_storage_type = local,
+    metadata_type = plain_rewritable,
+    path = '/var/lib/clickhouse/disks/local_plain_rewritable/')
+"
+
+${CLICKHOUSE_CLIENT} -nm --query "
+insert into test_mt (*) values (1, 2, 0), (2, 2, 2), (3, 1, 9), (4, 7, 7), (5, 10, 2), (6, 12, 5);
+insert into test_mt (*) select number, number, number from numbers_mt(10000);
+"
+
+${CLICKHOUSE_CLIENT} -nm --query "
+select count(*) from test_mt;
+select (*) from test_mt order by tuple(a, b) limit 10;
+"
+
+${CLICKHOUSE_CLIENT} --query "optimize table test_mt final"
+
+${CLICKHOUSE_CLIENT} -nm --query "
+select count(*) from test_mt;
+select (*) from test_mt order by tuple(a, b) limit 10;
+"

From c6f17b25e47ffcf96ff49f869f5ecd6b67b910b8 Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Fri, 3 May 2024 03:59:50 +0000
Subject: [PATCH 32/54] plain_rewritable: add integration test for Azure

---
 .../__init__.py                               |   0
 .../test.py                                   | 153 ++++++++++++++++++
 2 files changed, 153 insertions(+)
 create mode 100644 tests/integration/test_azure_blob_storage_plain_rewritable/__init__.py
 create mode 100644 tests/integration/test_azure_blob_storage_plain_rewritable/test.py

diff --git a/tests/integration/test_azure_blob_storage_plain_rewritable/__init__.py b/tests/integration/test_azure_blob_storage_plain_rewritable/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_azure_blob_storage_plain_rewritable/test.py b/tests/integration/test_azure_blob_storage_plain_rewritable/test.py
new file mode 100644
index 00000000000..96d116ec6a2
--- /dev/null
+++ b/tests/integration/test_azure_blob_storage_plain_rewritable/test.py
@@ -0,0 +1,153 @@
+import logging
+import os
+import random
+import string
+
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from azure.storage.blob import BlobServiceClient
+from test_storage_azure_blob_storage.test import azure_query
+
+NODE_NAME = "node"
+
+
+def generate_cluster_def(port):
+    path = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)),
+        "./_gen/disk_storage_conf.xml",
+    )
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w") as f:
+        f.write(
+            f"""<clickhouse>
+    <storage_configuration>
+        <disks>
+            <blob_storage_disk>
+                <type>object_storage</type>
+                <object_storage_type>azure_blob_storage</object_storage_type>
+                <metadata_type>plain_rewritable</metadata_type>
+                <storage_account_url>http://azurite1:{port}/devstoreaccount1</storage_account_url>
+                <container_name>cont</container_name>
+                <skip_access_check>true</skip_access_check>
+                <account_name>devstoreaccount1</account_name>
+                <account_key>Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==</account_key>
+                <max_single_part_upload_size>100000</max_single_part_upload_size>
+                <min_upload_part_size>100000</min_upload_part_size>
+                <max_single_download_retries>10</max_single_download_retries>
+                <max_single_read_retries>10</max_single_read_retries>
+            </blob_storage_disk>
+        </disks>
+        <policies>
+            <blob_storage_policy>
+                <volumes>
+                    <main>
+                        <disk>blob_storage_disk</disk>
+                    </main>
+                </volumes>
+            </blob_storage_policy>
+        </policies>
+    </storage_configuration>
+</clickhouse>
+"""
+        )
+    return path
+
+
+insert_values = [
+    "(0,'data'),(1,'data')",
+    ",".join(
+        f"({i},'{''.join(random.choices(string.ascii_lowercase, k=5))}')"
+        for i in range(10)
+    ),
+]
+
+
+@pytest.fixture(scope="module")
+def cluster():
+    try:
+        cluster = ClickHouseCluster(__file__)
+        port = cluster.azurite_port
+        path = generate_cluster_def(port)
+        cluster.add_instance(
+            NODE_NAME,
+            main_configs=[
+                path,
+            ],
+            with_azurite=True,
+            stay_alive=True,
+        )
+        logging.info("Starting cluster...")
+        cluster.start()
+        logging.info("Cluster started")
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_insert_select(cluster):
+    node = cluster.instances[NODE_NAME]
+
+    for index, value in enumerate(insert_values):
+        azure_query(
+            node,
+            """
+            CREATE TABLE test_{} (
+                id Int64,
+                data String
+            ) ENGINE=MergeTree()
+            ORDER BY id
+            SETTINGS storage_policy='blob_storage_policy'
+            """.format(
+                index
+            ),
+        )
+
+        azure_query(node, "INSERT INTO test_{} VALUES {}".format(index, value))
+        assert (
+            azure_query(
+                node, "SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index)
+            )
+            == value
+        )
+
+
+def test_restart_server(cluster):
+    node = cluster.instances[NODE_NAME]
+
+    for index, value in enumerate(insert_values):
+        assert (
+            azure_query(
+                node, "SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index)
+            )
+            == value
+        )
+    node.restart_clickhouse()
+
+    for index, value in enumerate(insert_values):
+        assert (
+            azure_query(
+                node, "SELECT * FROM test_{} ORDER BY id FORMAT Values".format(index)
+            )
+            == value
+        )
+
+
+def test_drop_table(cluster):
+    node = cluster.instances[NODE_NAME]
+
+    for index, value in enumerate(insert_values):
+        node.query("DROP TABLE IF EXISTS test_{} SYNC".format(index))
+
+    port = cluster.env_variables["AZURITE_PORT"]
+    connection_string = (
+        f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;"
+        f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;"
+        f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;"
+    )
+    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
+    containers = blob_service_client.list_containers()
+    for container in containers:
+        container_client = blob_service_client.get_container_client(container)
+        assert len(list(container_client.list_blobs())) == 0

From fcad15ffc2b7c5d4d1c9e9ce201ba9eb86d4a3d4 Mon Sep 17 00:00:00 2001
From: Julia Kartseva <yulia.kartseva@gmail.com>
Date: Sat, 4 May 2024 04:26:48 +0000
Subject: [PATCH 33/54] plain_rewritable: update docs

---
 docs/en/operations/storing-data.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index 389c917d427..7005783dd60 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -371,6 +371,8 @@ is equal to
 </s3_plain_rewritable>
 ```
 
+Starting from `24.5` it is possible configure any object storage disk (`s3`, `azure`, `local`) using `plain_rewritable` metadata type.
+
 ### Using Azure Blob Storage {#azure-blob-storage}
 
 `MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.

From 796ffb3a0a60bef212aec0e971d8853fa086f636 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 May 2024 08:39:33 +0300
Subject: [PATCH 34/54] Update 03147_system_columns_access_checks.sh

---
 tests/queries/0_stateless/03147_system_columns_access_checks.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03147_system_columns_access_checks.sh b/tests/queries/0_stateless/03147_system_columns_access_checks.sh
index 70693e0985f..0c8aafe16ec 100755
--- a/tests/queries/0_stateless/03147_system_columns_access_checks.sh
+++ b/tests/queries/0_stateless/03147_system_columns_access_checks.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest, no-parallel
+# Tags: no-fasttest, no-parallel, long
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 5f52cb43b47538d98428dae13ae551c73114ef02 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 May 2024 09:33:47 +0200
Subject: [PATCH 35/54] Fix coverage.

---
 docker/test/base/setup_export_logs.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 8858e12c50e..740fdd2da1f 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -143,7 +143,7 @@ function setup_logs_replication
             time DateTime COMMENT 'The time of test run',
             test_name String COMMENT 'The name of the test',
             coverage Array(UInt64) COMMENT 'An array of addresses of the code (a subset of addresses instrumented for coverage) that were encountered during the test run'
-        ) ENGINE = Null COMMENT 'Contains information about per-test coverage from the CI, but used only for exporting to the CI cluster'
+        ) ENGINE = MergeTree ORDER BY test_name COMMENT 'Contains information about per-test coverage from the CI, but used only for exporting to the CI cluster'
     "
 
     # For each system log table:

From 20f3bb487812abb793fc0f9c7db3a1142dd5e2e7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 May 2024 10:37:46 +0300
Subject: [PATCH 36/54] Update 03147_system_columns_access_checks.sh

---
 tests/queries/0_stateless/03147_system_columns_access_checks.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03147_system_columns_access_checks.sh b/tests/queries/0_stateless/03147_system_columns_access_checks.sh
index 0c8aafe16ec..2bd7fb083ea 100755
--- a/tests/queries/0_stateless/03147_system_columns_access_checks.sh
+++ b/tests/queries/0_stateless/03147_system_columns_access_checks.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest, no-parallel, long
+# Tags: no-fasttest, no-parallel, no-ordinary-database, long
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From da4f6f7b6ce4d7c46f3bd1955352656fd2826f19 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 30 Apr 2024 16:35:57 +0300
Subject: [PATCH 37/54] Added recursive CTE documentation

---
 .../sql-reference/statements/select/with.md   | 235 +++++++++++++++++-
 1 file changed, 229 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/statements/select/with.md b/docs/en/sql-reference/statements/select/with.md
index a59ef463419..84d3c02eca1 100644
--- a/docs/en/sql-reference/statements/select/with.md
+++ b/docs/en/sql-reference/statements/select/with.md
@@ -5,21 +5,21 @@ sidebar_label: WITH
 
 # WITH Clause
 
-ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression. 
+ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
 
 Please note that CTEs do not guarantee the same results in all places they are called because the query will be re-executed for each use case.
 
 An example of such behavior is below
 ``` sql
-with cte_numbers as 
+with cte_numbers as
 (
-    select 
-        num 
-    from generateRandom('num UInt64', NULL) 
+    select
+        num
+    from generateRandom('num UInt64', NULL)
     limit 1000000
 )
 select
-    count() 
+    count()
 from cte_numbers
 where num in (select num from cte_numbers)
 ```
@@ -87,3 +87,226 @@ LIMIT 10;
 WITH test1 AS (SELECT i + 1, j + 1 FROM test1)
 SELECT * FROM test1;
 ```
+
+# Recursive Queries
+
+The optional RECURSIVE modifier allows for a WITH query to refer to its own output. Example:
+
+**Example:** Sum integers from 1 throught 100
+
+```sql
+WITH RECURSIVE test_table AS (
+    SELECT 1 AS number
+UNION ALL
+    SELECT number + 1 FROM test_table WHERE number < 100
+)
+SELECT sum(number) FROM test_table;
+```
+
+``` text
+┌─sum(number)─┐
+│        5050 │
+└─────────────┘
+```
+
+The general form of a recursive `WITH` query is always a non-recursive term, then `UNION ALL`, then a recursive term, where only the recursive term can contain a reference to the query's own output. Recursive CTE query is executed as follows:
+
+1. Evaluate the non-recursive term. Place result of non-recursive term query in a temporary working table.
+2. As long as the working table is not empty, repeat these steps:
+    1. Evaluate the recursive term, substituting the current contents of the working table for the recursive self-reference. Place result of recursive term query in a temporary intermediate table.
+    2. Replace the contents of the working table with the contents of the intermediate table, then empty the intermediate table.
+
+Recursive queries are typically used to work with hierarchical or tree-structured data. For example, we can write a query that performs tree traversal:
+
+**Example:** Tree traversal
+
+First let's create tree table:
+
+```sql
+DROP TABLE IF EXISTS tree;
+CREATE TABLE tree
+(
+    id UInt64,
+    parent_id Nullable(UInt64),
+    data String
+) ENGINE = MergeTree ORDER BY id;
+
+INSERT INTO tree VALUES (0, NULL, 'ROOT'), (1, 0, 'Child_1'), (2, 0, 'Child_2'), (3, 1, 'Child_1_1');
+```
+
+We can traverse those tree with such query:
+
+**Example:** Tree traversal
+```sql
+WITH RECURSIVE search_tree AS (
+    SELECT id, parent_id, data
+    FROM tree t
+    WHERE t.id = 0
+UNION ALL
+    SELECT t.id, t.parent_id, t.data
+    FROM tree t, search_tree st
+    WHERE t.parent_id = st.id
+)
+SELECT * FROM search_tree;
+```
+
+```text
+┌─id─┬─parent_id─┬─data──────┐
+│  0 │      ᴺᵁᴸᴸ │ ROOT      │
+│  1 │         0 │ Child_1   │
+│  2 │         0 │ Child_2   │
+│  3 │         1 │ Child_1_1 │
+└────┴───────────┴───────────┘
+```
+
+## Search order
+
+To create a depth-first order, we compute for each result row an array of rows that we have already visited:
+
+**Example:** Tree traversal depth-first order
+```sql
+WITH RECURSIVE search_tree AS (
+    SELECT id, parent_id, data, [t.id] AS path
+    FROM tree t
+    WHERE t.id = 0
+UNION ALL
+    SELECT t.id, t.parent_id, t.data, arrayConcat(path, [t.id])
+    FROM tree t, search_tree st
+    WHERE t.parent_id = st.id
+)
+SELECT * FROM search_tree ORDER BY path;
+```
+
+```text
+┌─id─┬─parent_id─┬─data──────┬─path────┐
+│  0 │      ᴺᵁᴸᴸ │ ROOT      │ [0]     │
+│  1 │         0 │ Child_1   │ [0,1]   │
+│  3 │         1 │ Child_1_1 │ [0,1,3] │
+│  2 │         0 │ Child_2   │ [0,2]   │
+└────┴───────────┴───────────┴─────────┘
+```
+
+To create a breadth-first order, standard approach is to add column that tracks the depth of the search:
+
+**Example:** Tree traversal breadth-first order
+```sql
+WITH RECURSIVE search_tree AS (
+    SELECT id, parent_id, data, [t.id] AS path, toUInt64(0) AS depth
+    FROM tree t
+    WHERE t.id = 0
+UNION ALL
+    SELECT t.id, t.parent_id, t.data, arrayConcat(path, [t.id]), depth + 1
+    FROM tree t, search_tree st
+    WHERE t.parent_id = st.id
+)
+SELECT * FROM search_tree ORDER BY depth;
+```
+
+```text
+┌─id─┬─link─┬─data──────┬─path────┬─depth─┐
+│  0 │ ᴺᵁᴸᴸ │ ROOT      │ [0]     │     0 │
+│  1 │    0 │ Child_1   │ [0,1]   │     1 │
+│  2 │    0 │ Child_2   │ [0,2]   │     1 │
+│  3 │    1 │ Child_1_1 │ [0,1,3] │     2 │
+└────┴──────┴───────────┴─────────┴───────┘
+```
+
+## Cycle detection
+
+First let's create graph table:
+
+```sql
+DROP TABLE IF EXISTS graph;
+CREATE TABLE graph
+(
+    from UInt64,
+    to UInt64,
+    label String
+) ENGINE = MergeTree ORDER BY (from, to);
+
+INSERT INTO graph VALUES (1, 2, '1 -> 2'), (1, 3, '1 -> 3'), (2, 3, '2 -> 3'), (1, 4, '1 -> 4'), (4, 5, '4 -> 5');
+```
+
+We can traverse that graph with such query:
+
+**Example:** Graph traversal without cycle detection
+```sql
+WITH RECURSIVE search_graph AS (
+    SELECT from, to, label FROM graph g
+    UNION ALL
+    SELECT g.from, g.to, g.label
+    FROM graph g, search_graph sg
+    WHERE g.from = sg.to
+)
+SELECT DISTINCT * FROM search_graph ORDER BY from;
+```
+```text
+┌─from─┬─to─┬─label──┐
+│    1 │  4 │ 1 -> 4 │
+│    1 │  2 │ 1 -> 2 │
+│    1 │  3 │ 1 -> 3 │
+│    2 │  3 │ 2 -> 3 │
+│    4 │  5 │ 4 -> 5 │
+└──────┴────┴────────┘
+```
+
+But if we add cycle in that graph, previous query will fail with `Maximum recursive CTE evaluation depth` error:
+
+```sql
+INSERT INTO graph VALUES (5, 1, '5 -> 1');
+
+WITH RECURSIVE search_graph AS (
+    SELECT from, to, label FROM graph g
+UNION ALL
+    SELECT g.from, g.to, g.label
+    FROM graph g, search_graph sg
+    WHERE g.from = sg.to
+)
+SELECT DISTINCT * FROM search_graph ORDER BY from;
+```
+
+```text
+Code: 306. DB::Exception: Received from localhost:9000. DB::Exception: Maximum recursive CTE evaluation depth (1000) exceeded, during evaluation of search_graph AS (SELECT from, to, label FROM graph AS g UNION ALL SELECT g.from, g.to, g.label FROM graph AS g, search_graph AS sg WHERE g.from = sg.to). Consider raising max_recursive_cte_evaluation_depth setting.: While executing RecursiveCTESource. (TOO_DEEP_RECURSION)
+```
+
+The standard method for handling cycles is to compute an array of the already visited nodes:
+
+**Example:** Graph traversal with cycle detection
+```sql
+WITH RECURSIVE search_graph AS (
+    SELECT from, to, label, false AS is_cycle, [tuple(g.from, g.to)] AS path FROM graph g
+UNION ALL
+    SELECT g.from, g.to, g.label, has(path, tuple(g.from, g.to)), arrayConcat(sg.path, [tuple(g.from, g.to)])
+    FROM graph g, search_graph sg
+    WHERE g.from = sg.to AND NOT is_cycle
+)
+SELECT * FROM search_graph WHERE is_cycle ORDER BY from;
+```
+
+```text
+┌─from─┬─to─┬─label──┬─is_cycle─┬─path──────────────────────┐
+│    1 │  4 │ 1 -> 4 │ true     │ [(1,4),(4,5),(5,1),(1,4)] │
+│    4 │  5 │ 4 -> 5 │ true     │ [(4,5),(5,1),(1,4),(4,5)] │
+│    5 │  1 │ 5 -> 1 │ true     │ [(5,1),(1,4),(4,5),(5,1)] │
+└──────┴────┴────────┴──────────┴───────────────────────────┘
+```
+
+## Infinite queries
+
+It is also possible to use inifinite recursive CTE queries if `LIMIT` is used in outer query:
+
+**Example:** Infinite recursive CTE query
+```sql
+WITH RECURSIVE test_table AS (
+    SELECT 1 AS number
+UNION ALL
+    SELECT number + 1 FROM test_table
+)
+SELECT sum(number) FROM (SELECT number FROM test_table LIMIT 100);
+```
+
+```text
+┌─sum(number)─┐
+│        5050 │
+└─────────────┘
+```

From 0b59c24866a6e61989b907aed0219530d6503b30 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 2 May 2024 18:50:38 +0300
Subject: [PATCH 38/54] Fixed style check

---
 docs/en/sql-reference/statements/select/with.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/statements/select/with.md b/docs/en/sql-reference/statements/select/with.md
index 84d3c02eca1..ffde7a3fe54 100644
--- a/docs/en/sql-reference/statements/select/with.md
+++ b/docs/en/sql-reference/statements/select/with.md
@@ -92,7 +92,7 @@ SELECT * FROM test1;
 
 The optional RECURSIVE modifier allows for a WITH query to refer to its own output. Example:
 
-**Example:** Sum integers from 1 throught 100
+**Example:** Sum integers from 1 through 100
 
 ```sql
 WITH RECURSIVE test_table AS (
@@ -293,7 +293,7 @@ SELECT * FROM search_graph WHERE is_cycle ORDER BY from;
 
 ## Infinite queries
 
-It is also possible to use inifinite recursive CTE queries if `LIMIT` is used in outer query:
+It is also possible to use infinite recursive CTE queries if `LIMIT` is used in outer query:
 
 **Example:** Infinite recursive CTE query
 ```sql

From a61cb622f10586b519d41c43601c8c4ed428dfef Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Tue, 7 May 2024 09:39:19 +0000
Subject: [PATCH 39/54] fix test flaky

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh
index 8acc83fc86c..45e65b18e07 100755
--- a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh
+++ b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh
@@ -27,9 +27,9 @@ ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;"
 
 # Testing insert with multiple sinks and fixed block size
 ${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;"
-${CLICKHOUSE_CLIENT} --query "ALTER TABLE rocksdb_worm MODIFY SETTING bulk_insert_block_size = 500000;"
-${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000) SETTINGS max_insert_threads = 2, max_block_size = 100000;"
-${CLICKHOUSE_CLIENT} --query "SELECT sum(value) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be 2 as max_block_size is set to 500000
+# Must set both max_threads and max_insert_threads to 2 to make sure there is only two sinks
+${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000) SETTINGS max_threads = 2, max_insert_threads = 2, max_block_size = 10000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, insert_deduplication_token = '';"
+${CLICKHOUSE_CLIENT} --query "SELECT sum(value) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be 2 because default bulk sink size is ~1M rows / SST file
 ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;"
 
 # Testing insert with duplicated keys

From 791278ba47676ef497c95a308eaca91698717f91 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Mon, 6 May 2024 21:45:22 +0200
Subject: [PATCH 40/54] Add logging after a failure with evaluating
 Replicated*MergeTree engine arguments.

---
 ...tractZooKeeperPathFromReplicatedTableDef.h |  3 +-
 .../MergeTree/registerStorageMergeTree.cpp    | 60 +++++++++++--------
 2 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h b/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h
index 1bd58392201..5ef5e1db62e 100644
--- a/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h
+++ b/src/Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h
@@ -11,8 +11,9 @@ class ASTCreateQuery;
 class Context;
 using ContextPtr = std::shared_ptr<const Context>;
 
-/// Extracts a zookeeper path from a specified CREATE TABLE query. Returns std::nullopt if fails.
+/// Extracts a zookeeper path from a specified CREATE TABLE query.
 /// The function checks the table engine and if it is Replicated*MergeTree then it takes the first argument and expands macros in it.
+/// Returns std::nullopt if the specified CREATE query doesn't describe a Replicated table or its arguments can't be evaluated.
 std::optional<String> extractZooKeeperPathFromReplicatedTableDef(const ASTCreateQuery & create_query, const ContextPtr & context);
 
 }
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 9b0200d5a1c..4244ccccfe0 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -296,9 +296,6 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs(
         else
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must be a string literal{}", verbose_help_message);
 
-        if (replica_name.empty())
-            throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message);
-
         expand_macro(ast_zk_path, ast_replica_name);
     }
     else if (is_extended_storage_def
@@ -332,38 +329,45 @@ static void extractZooKeeperPathAndReplicaNameFromEngineArgs(
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected two string literal arguments: zookeeper_path and replica_name");
 }
 
-/// Extracts a zookeeper path from a specified CREATE TABLE query. Returns std::nullopt if fails.
+/// Extracts a zookeeper path from a specified CREATE TABLE query.
 std::optional<String> extractZooKeeperPathFromReplicatedTableDef(const ASTCreateQuery & query, const ContextPtr & context)
 {
+    if (!query.storage || !query.storage->engine)
+        return {};
+
+    const String & engine_name = query.storage->engine->name;
+    if (!isReplicated(engine_name))
+        return {};
+
+    StorageID table_id{query.getDatabase(), query.getTable(), query.uuid};
+
+    ASTs engine_args;
+    if (query.storage->engine->arguments)
+        engine_args = query.storage->engine->arguments->children;
+    for (auto & engine_arg : engine_args)
+        engine_arg = engine_arg->clone();
+
+    LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE;
+    String zookeeper_path;
+    String replica_name;
+    RenamingRestrictions renaming_restrictions;
+
     try
     {
-        if (!query.storage || !query.storage->engine)
-            return {};
-
-        const String & engine_name = query.storage->engine->name;
-        if (!isReplicated(engine_name))
-            return {};
-
-        StorageID table_id{query.getDatabase(), query.getTable(), query.uuid};
-        ASTs engine_args;
-        if (query.storage->engine->arguments)
-            engine_args = query.storage->engine->arguments->children;
-        for (auto & engine_arg : engine_args)
-            engine_arg = engine_arg->clone();
-        LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE;
-        String zookeeper_path;
-        String replica_name;
-        RenamingRestrictions renaming_restrictions;
-
         extractZooKeeperPathAndReplicaNameFromEngineArgs(query, table_id, engine_name, engine_args, mode, context,
                                                          zookeeper_path, replica_name, renaming_restrictions);
-
-        return zookeeper_path;
     }
-    catch (...)
+    catch (Exception & e)
     {
-        return {};
+        if (e.code() == ErrorCodes::BAD_ARGUMENTS)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__, "Couldn't evaluate engine arguments");
+            return {};
+        }
+        throw;
     }
+
+    return zookeeper_path;
 }
 
 static StoragePtr create(const StorageFactory::Arguments & args)
@@ -539,6 +543,10 @@ static StoragePtr create(const StorageFactory::Arguments & args)
     {
         extractZooKeeperPathAndReplicaNameFromEngineArgs(args.query, args.table_id, args.engine_name, args.engine_args, args.mode,
                                                          args.getLocalContext(), zookeeper_path, replica_name, renaming_restrictions);
+
+        if (replica_name.empty())
+            throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message);
+
         arg_cnt = engine_args.size(); /// Update `arg_cnt` here because extractZooKeeperPathAndReplicaNameFromEngineArgs() could add arguments.
         arg_num = 2;                  /// zookeeper_path and replica_name together are always two arguments.
     }

From 1bae2d9d4ffa6b4757dc2aeccb9eccf89bebc072 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Tue, 7 May 2024 12:57:14 +0200
Subject: [PATCH 41/54] update comment

---
 src/Storages/StorageS3.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 8a4e30fed1d..e65d0cb5be4 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -489,7 +489,8 @@ size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount()
     {
         /// 1000 files were listed, and we cannot make any estimation of _how many more_ there are (because we list bucket lazily);
         /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do
-        /// as it would lead to serious (up to <max_threads> times) reading performance degradation.
+        /// as it would lead to serious slow down of the execution, since objects are going
+        /// to be fetched sequentially rather than in-parallel with up to <max_threads> times.
         return std::numeric_limits<size_t>::max();
     }
     else

From 463f95e956b62a98c3817e27943c753123ebb2d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 7 May 2024 13:47:35 +0200
Subject: [PATCH 42/54] Extra constraints for stress and fuzzer tests

---
 .../test/fuzzer/query-fuzzer-tweaks-users.xml |  5 ++++
 docker/test/stateless/stress_tests.lib        | 30 ++++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
index c31d2fd7f39..ad261be1abe 100644
--- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
+++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml
@@ -31,6 +31,11 @@
                 <allow_experimental_analyzer>
                     <readonly/>
                 </allow_experimental_analyzer>
+
+                <!-- This feature is broken, deprecated and will be removed. We don't want more reports about it -->
+                <allow_experimental_object_type>
+                    <readonly/>
+                </allow_experimental_object_type>
             </constraints>
         </default>
     </profiles>
diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib
index a0865f638a1..6aaddbfe590 100644
--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
@@ -120,13 +120,41 @@ EOL
 
     local max_users_mem
     max_users_mem=$((total_mem*30/100)) # 30%
+    # Similar to docker/test/fuzzer/query-fuzzer-tweaks-users.xml
     echo "Setting max_memory_usage_for_user=$max_users_mem and max_memory_usage for queries to 10G"
-    cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml <<EOL
+    cat > /etc/clickhouse-server/users.d/stress_test_tweaks-users.xml <<EOL
 <clickhouse>
     <profiles>
         <default>
+            <max_execution_time>60</max_execution_time>
             <max_memory_usage>10G</max_memory_usage>
             <max_memory_usage_for_user>${max_users_mem}</max_memory_usage_for_user>
+
+            <table_function_remote_max_addresses>200</table_function_remote_max_addresses>
+
+            <constraints>
+                <max_execution_time>
+                    <max>60</max>
+                </max_execution_time>
+
+                <max_memory_usage>
+                    <max>10G</max>
+                </max_memory_usage>
+
+                <table_function_remote_max_addresses>
+                    <max>200</max>
+                </table_function_remote_max_addresses>
+
+                <!-- Don't waste cycles testing the old interpreter. Spend time in the new analyzer instead -->
+                <allow_experimental_analyzer>
+                    <readonly/>
+                </allow_experimental_analyzer>
+
+                <!-- This feature is broken, deprecated and will be removed. We don't want more reports about it -->
+                <allow_experimental_object_type>
+                    <readonly/>
+                </allow_experimental_object_type>
+            </constraints>
         </default>
     </profiles>
 </clickhouse>

From 0609054e9849b915847cbd93cb3d76786eebe0cc Mon Sep 17 00:00:00 2001
From: unashi <unashi@tencent.com>
Date: Tue, 7 May 2024 21:09:33 +0800
Subject: [PATCH 43/54] [update] update a stateless case

---
 .../02187_async_inserts_all_formats.          | 52 +++++++++++++++++++
 .../02187_async_inserts_all_formats.reference |  3 ++
 2 files changed, 55 insertions(+)
 create mode 100644 tests/queries/0_stateless/02187_async_inserts_all_formats.

diff --git a/tests/queries/0_stateless/02187_async_inserts_all_formats. b/tests/queries/0_stateless/02187_async_inserts_all_formats.
new file mode 100644
index 00000000000..f42a6d39d4f
--- /dev/null
+++ b/tests/queries/0_stateless/02187_async_inserts_all_formats.
@@ -0,0 +1,52 @@
+Arrow
+ArrowStream
+Avro
+BSONEachRow
+CSV
+CSVWithNames
+CSVWithNamesAndTypes
+CustomSeparated
+CustomSeparatedWithNames
+CustomSeparatedWithNamesAndTypes
+JSON
+JSONColumns
+JSONColumnsWithMetadata
+JSONCompact
+JSONCompactColumns
+JSONCompactEachRow
+JSONCompactEachRowWithNames
+JSONCompactEachRowWithNamesAndTypes
+JSONCompactStringsEachRow
+JSONCompactStringsEachRowWithNames
+JSONCompactStringsEachRowWithNamesAndTypes
+JSONEachRow
+JSONLines
+JSONObjectEachRow
+JSONStringsEachRow
+MsgPack
+NDJSON
+Native
+ORC
+Parquet
+Raw
+RawWithNames
+RawWithNamesAndTypes
+RowBinary
+RowBinaryWithNames
+RowBinaryWithNamesAndTypes
+TSKV
+TSV
+TSVRaw
+TSVRawWithNames
+TSVRawWithNamesAndTypes
+TSVWithNames
+TSVWithNamesAndTypes
+TabSeparated
+TabSeparatedRaw
+TabSeparatedRawWithNames
+TabSeparatedRawWithNamesAndTypes
+TabSeparatedWithNames
+TabSeparatedWithNamesAndTypes
+Values
+LineAsString
+OK
diff --git a/tests/queries/0_stateless/02187_async_inserts_all_formats.reference b/tests/queries/0_stateless/02187_async_inserts_all_formats.reference
index 2de728b4cb4..f42a6d39d4f 100644
--- a/tests/queries/0_stateless/02187_async_inserts_all_formats.reference
+++ b/tests/queries/0_stateless/02187_async_inserts_all_formats.reference
@@ -28,6 +28,9 @@ NDJSON
 Native
 ORC
 Parquet
+Raw
+RawWithNames
+RawWithNamesAndTypes
 RowBinary
 RowBinaryWithNames
 RowBinaryWithNamesAndTypes

From 85f766f27c0cbd49e267334c3d184627b554e853 Mon Sep 17 00:00:00 2001
From: unashi <unashi@tencent.com>
Date: Tue, 7 May 2024 21:11:45 +0800
Subject: [PATCH 44/54] [update] update a stateless case

---
 .../02187_async_inserts_all_formats.          | 52 -------------------
 1 file changed, 52 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02187_async_inserts_all_formats.

diff --git a/tests/queries/0_stateless/02187_async_inserts_all_formats. b/tests/queries/0_stateless/02187_async_inserts_all_formats.
deleted file mode 100644
index f42a6d39d4f..00000000000
--- a/tests/queries/0_stateless/02187_async_inserts_all_formats.
+++ /dev/null
@@ -1,52 +0,0 @@
-Arrow
-ArrowStream
-Avro
-BSONEachRow
-CSV
-CSVWithNames
-CSVWithNamesAndTypes
-CustomSeparated
-CustomSeparatedWithNames
-CustomSeparatedWithNamesAndTypes
-JSON
-JSONColumns
-JSONColumnsWithMetadata
-JSONCompact
-JSONCompactColumns
-JSONCompactEachRow
-JSONCompactEachRowWithNames
-JSONCompactEachRowWithNamesAndTypes
-JSONCompactStringsEachRow
-JSONCompactStringsEachRowWithNames
-JSONCompactStringsEachRowWithNamesAndTypes
-JSONEachRow
-JSONLines
-JSONObjectEachRow
-JSONStringsEachRow
-MsgPack
-NDJSON
-Native
-ORC
-Parquet
-Raw
-RawWithNames
-RawWithNamesAndTypes
-RowBinary
-RowBinaryWithNames
-RowBinaryWithNamesAndTypes
-TSKV
-TSV
-TSVRaw
-TSVRawWithNames
-TSVRawWithNamesAndTypes
-TSVWithNames
-TSVWithNamesAndTypes
-TabSeparated
-TabSeparatedRaw
-TabSeparatedRawWithNames
-TabSeparatedRawWithNamesAndTypes
-TabSeparatedWithNames
-TabSeparatedWithNamesAndTypes
-Values
-LineAsString
-OK

From 54418fcea24fbe1e713ccf3dfbde2db0ea046ec8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Tue, 7 May 2024 16:59:29 +0300
Subject: [PATCH 45/54] Update src/Storages/System/StorageSystemColumns.cpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: János Benjamin Antal <antaljanosbenjamin@users.noreply.github.com>
---
 src/Storages/System/StorageSystemColumns.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp
index 15b73fd1884..74b44cc0a2d 100644
--- a/src/Storages/System/StorageSystemColumns.cpp
+++ b/src/Storages/System/StorageSystemColumns.cpp
@@ -419,7 +419,7 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline,
 
         /// Add `table` column.
         MutableColumnPtr table_column_mut = ColumnString::create();
-        size_t num_databases = database_column->size();
+        const auto num_databases = database_column->size();
         IColumn::Offsets offsets(num_databases);
 
         for (size_t i = 0; i < num_databases; ++i)

From cad9c97725e4943730d0dabaa3df2cdf008be948 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 7 May 2024 17:21:25 +0300
Subject: [PATCH 46/54] Analyzer setting max_streams_to_max_threads_ratio
 overflow fix

---
 .../AggregateFunctionSparkbar.cpp                  |  4 ++--
 src/Planner/PlannerJoinTree.cpp                    | 10 +++++++++-
 ...streams_to_max_threads_ratio_overflow.reference |  0
 ...g_max_streams_to_max_threads_ratio_overflow.sql | 14 ++++++++++++++
 4 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.reference
 create mode 100644 tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.sql

diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
index b6e538520a8..362ffbe20d2 100644
--- a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp
@@ -253,9 +253,9 @@ private:
             else
             {
                 Y scaled;
-                bool has_overfllow = common::mulOverflow<Y>(y, levels_num, scaled);
+                bool has_overflow = common::mulOverflow<Y>(y, levels_num, scaled);
 
-                if (has_overfllow)
+                if (has_overflow)
                     y = y / (y_max / levels_num) + 1;
                 else
                     y = scaled / y_max + 1;
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 85cad1dcd69..094cf73dbc6 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -708,7 +708,15 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
 
         /// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads
         if (max_streams > 1 && !is_sync_remote)
-            max_streams = static_cast<size_t>(max_streams * settings.max_streams_to_max_threads_ratio);
+        {
+            if (auto streams_with_ratio = max_streams * settings.max_streams_to_max_threads_ratio; canConvertTo<size_t>(streams_with_ratio))
+                max_streams = static_cast<size_t>(streams_with_ratio);
+            else
+                throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND,
+                    "Exceeded limit for `max_streams` with `max_streams_to_max_threads_ratio`. "
+                    "Make sure that `max_streams * max_streams_to_max_threads_ratio` is in some reasonable boundaries, current value: {}",
+                    streams_with_ratio);
+        }
 
         if (table_node)
             table_expression_query_info.table_expression_modifiers = table_node->getTableExpressionModifiers();
diff --git a/tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.reference b/tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.sql b/tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.sql
new file mode 100644
index 00000000000..af326c15bd8
--- /dev/null
+++ b/tests/queries/0_stateless/03148_setting_max_streams_to_max_threads_ratio_overflow.sql
@@ -0,0 +1,14 @@
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table
+(
+    id UInt64,
+    value String
+) ENGINE = MergeTree ORDER BY id;
+
+INSERT INTO test_table VALUES (0, 'Value_0');
+
+SELECT * FROM test_table SETTINGS max_threads = 1025, max_streams_to_max_threads_ratio = -9223372036854775808, allow_experimental_analyzer = 1; -- { serverError PARAMETER_OUT_OF_BOUND }
+
+SELECT * FROM test_table SETTINGS max_threads = 1025, max_streams_to_max_threads_ratio = -9223372036854775808, allow_experimental_analyzer = 0; -- { serverError PARAMETER_OUT_OF_BOUND }
+
+DROP TABLE test_table;

From 07472b3e95b8c0beceb8efc177872bb049faf6c6 Mon Sep 17 00:00:00 2001
From: Constantine Peresypkin <pconstantine@gmail.com>
Date: Wed, 10 Apr 2024 19:54:29 -0400
Subject: [PATCH 47/54] Add setting to force NULL for omitted fields

Fixes #60884
---
 src/Core/Settings.h                           |  1 +
 src/Core/SettingsChangesHistory.h             |  1 +
 src/Formats/FormatFactory.cpp                 |  1 +
 src/Formats/FormatSettings.h                  |  1 +
 .../Impl/BSONEachRowRowInputFormat.cpp        |  9 +++-
 .../Impl/JSONColumnsBlockInputFormatBase.cpp  |  3 ++
 .../Impl/JSONEachRowRowInputFormat.cpp        | 10 ++++-
 .../Formats/Impl/TSKVRowInputFormat.cpp       | 12 ++++-
 .../RowInputFormatWithNamesAndTypes.cpp       | 20 +++++++++
 .../03004_force_null_for_omitted.reference    | 44 +++++++++++++++++++
 .../03004_force_null_for_omitted.sql          | 36 +++++++++++++++
 11 files changed, 135 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/03004_force_null_for_omitted.reference
 create mode 100644 tests/queries/0_stateless/03004_force_null_for_omitted.sql

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index b4313d9af56..f80bf1e4e3e 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1006,6 +1006,7 @@ class IColumn;
     M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
     M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \
     M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \
+    M(Bool, input_format_force_null_for_omitted_fields, false, "Force initialize omitted fields with null values", 0) \
     M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \
     M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
     M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index cd1cd341c29..5ea99aa0192 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -91,6 +91,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"cross_join_min_rows_to_compress", 0, 10000000, "A new setting."},
               {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."},
               {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
+              {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
               }},
     {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
               {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"},
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index b7e9899da46..3199445864d 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -146,6 +146,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
     format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence;
     format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields;
     format_settings.null_as_default = settings.input_format_null_as_default;
+    format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields;
     format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
     format_settings.parquet.row_group_rows = settings.output_format_parquet_row_group_size;
     format_settings.parquet.row_group_bytes = settings.output_format_parquet_row_group_size_bytes;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index da225a39ec9..83b5c534297 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -32,6 +32,7 @@ struct FormatSettings
     bool write_statistics = true;
     bool import_nested_json = false;
     bool null_as_default = true;
+    bool force_null_for_omitted_fields = false;
     bool decimal_trailing_zeros = false;
     bool defaults_for_omitted_fields = true;
     bool is_writing_to_terminal = false;
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
index 340bcc8aae5..6a3475a1830 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@@ -39,6 +39,7 @@ namespace ErrorCodes
     extern const int ILLEGAL_COLUMN;
     extern const int TOO_LARGE_STRING_SIZE;
     extern const int UNKNOWN_TYPE;
+    extern const int TYPE_MISMATCH;
 }
 
 namespace
@@ -820,7 +821,13 @@ bool BSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi
     /// Fill non-visited columns with the default values.
     for (size_t i = 0; i < num_columns; ++i)
         if (!seen_columns[i])
-            header.getByPosition(i).type->insertDefaultInto(*columns[i]);
+        {
+            const auto & type = header.getByPosition(i).type;
+            if (format_settings.force_null_for_omitted_fields && !isNullableOrLowCardinalityNullable(type))
+                throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot insert NULL value into a column of type '{}' at index {}", type->getName(), i);
+            else
+                type->insertDefaultInto(*columns[i]);
+        }
 
     if (format_settings.defaults_for_omitted_fields)
         ext.read_columns = read_columns;
diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
index faa4f36bbb0..e61e55efc8e 100644
--- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
+++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp
@@ -13,6 +13,7 @@ namespace ErrorCodes
 {
     extern const int INCORRECT_DATA;
     extern const int EMPTY_DATA_PASSED;
+    extern const int TYPE_MISMATCH;
 }
 
 
@@ -194,6 +195,8 @@ Chunk JSONColumnsBlockInputFormatBase::read()
     {
         if (!seen_columns[i])
         {
+            if (format_settings.force_null_for_omitted_fields && !isNullableOrLowCardinalityNullable(fields[i].type))
+                throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot insert NULL value into a column `{}` of type '{}'", fields[i].name, fields[i].type->getName());
             columns[i]->insertManyDefaults(rows);
             if (format_settings.defaults_for_omitted_fields)
                 block_missing_values.setBits(i, rows);
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index a78d8d016cd..8855a1bc28d 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -18,6 +18,7 @@ namespace ErrorCodes
     extern const int INCORRECT_DATA;
     extern const int CANNOT_READ_ALL_DATA;
     extern const int LOGICAL_ERROR;
+    extern const int TYPE_MISMATCH;
 }
 
 namespace
@@ -233,7 +234,14 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi
     /// Fill non-visited columns with the default values.
     for (size_t i = 0; i < num_columns; ++i)
         if (!seen_columns[i])
-            header.getByPosition(i).type->insertDefaultInto(*columns[i]);
+        {
+            const auto & type = header.getByPosition(i).type;
+            if (format_settings.force_null_for_omitted_fields && !isNullableOrLowCardinalityNullable(type))
+                throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot insert NULL value into a column `{}` of type '{}'", columnName(i), type->getName());
+            else
+                type->insertDefaultInto(*columns[i]);
+        }
+
 
     /// Return info about defaults set.
     /// If defaults_for_omitted_fields is set to 0, we should just leave already inserted defaults.
diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
index 29bc0012dc0..5382527fcdc 100644
--- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
@@ -15,6 +15,7 @@ namespace ErrorCodes
     extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
     extern const int CANNOT_READ_ALL_DATA;
     extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
+    extern const int TYPE_MISMATCH;
 }
 
 
@@ -190,7 +191,16 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex
     /// Fill in the not met columns with default values.
     for (size_t i = 0; i < num_columns; ++i)
         if (!seen_columns[i])
-            header.getByPosition(i).type->insertDefaultInto(*columns[i]);
+        {
+            const auto & type = header.getByPosition(i).type;
+            if (format_settings.force_null_for_omitted_fields && !isNullableOrLowCardinalityNullable(type))
+                throw Exception(
+                    ErrorCodes::TYPE_MISMATCH,
+                    "Cannot insert NULL value into a column `{}` of type '{}'",
+                    header.getByPosition(i).name,
+                    type->getName());
+            type->insertDefaultInto(*columns[i]);
+        }
 
     /// return info about defaults set
     if (format_settings.defaults_for_omitted_fields)
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
index 2ad6a825c8f..ae30d741c2f 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@@ -18,6 +18,7 @@ namespace ErrorCodes
 {
     extern const int INCORRECT_DATA;
     extern const int LOGICAL_ERROR;
+    extern const int TYPE_MISMATCH;
 }
 
 namespace
@@ -124,6 +125,17 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
             }
         }
     }
+
+    if (format_settings.force_null_for_omitted_fields)
+    {
+        for (auto index : column_mapping->not_presented_columns)
+            if (!isNullableOrLowCardinalityNullable(data_types[index]))
+                throw Exception(
+                    ErrorCodes::TYPE_MISMATCH,
+                    "Cannot insert NULL value into a column type '{}' at index {}",
+                    data_types[index]->getName(),
+                    index);
+    }
 }
 
 void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & column_names_out, std::vector<String> & type_names_out)
@@ -217,7 +229,15 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
             {
                 const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column];
                 if (rem_column_index)
+                {
+                    if (format_settings.force_null_for_omitted_fields && !isNullableOrLowCardinalityNullable(data_types[*rem_column_index]))
+                        throw Exception(
+                            ErrorCodes::TYPE_MISMATCH,
+                            "Cannot insert NULL value into a column type '{}' at index {}",
+                            data_types[*rem_column_index]->getName(),
+                            *rem_column_index);
                     columns[*rem_column_index]->insertDefault();
+                }
                 ++file_column;
             }
             break;
diff --git a/tests/queries/0_stateless/03004_force_null_for_omitted.reference b/tests/queries/0_stateless/03004_force_null_for_omitted.reference
new file mode 100644
index 00000000000..a4c928aae8c
--- /dev/null
+++ b/tests/queries/0_stateless/03004_force_null_for_omitted.reference
@@ -0,0 +1,44 @@
+0	0
+0	0
+2	0
+0	0
+4	0
+0	\N
+0	\N
+2	\N
+0	\N
+4	\N
+0	\N
+0	\N
+2	\N
+0	\N
+4	\N
+0	\N
+0	\N
+2	\N
+0	\N
+4	\N
+0	\N
+0	\N
+2	\N
+0	\N
+4	\N
+0
+0	\N
+1	\N
+1	\N
+1	\N
+1	\N
+1	0
+1	\N
+1	\N
+1	2
+3	0
+1	0
+1	\N
+1	\N
+1	2
+3	0
+1	0
+1	\N
+1	\N
diff --git a/tests/queries/0_stateless/03004_force_null_for_omitted.sql b/tests/queries/0_stateless/03004_force_null_for_omitted.sql
new file mode 100644
index 00000000000..43ba2568acb
--- /dev/null
+++ b/tests/queries/0_stateless/03004_force_null_for_omitted.sql
@@ -0,0 +1,36 @@
+set allow_suspicious_low_cardinality_types = 1;
+insert into function file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null Nullable(UInt32)') select number % 2 ? NULL : number from numbers(5) settings engine_file_truncate_on_insert=1;
+select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo UInt32');
+select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo UInt32') settings input_format_force_null_for_omitted_fields = 1; -- { serverError TYPE_MISMATCH }
+select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo Nullable(UInt32)');
+select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo Nullable(UInt32)') settings input_format_force_null_for_omitted_fields = 1;
+select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo LowCardinality(Nullable(UInt32))');
+select * from file(concat(currentDatabase(), '.03004_data.bsonEachRow'), auto, 'null UInt32, foo LowCardinality(Nullable(UInt32))') settings input_format_force_null_for_omitted_fields = 1;
+
+select * from format(JSONEachRow, 'foo UInt32', '{}');
+select * from format(JSONEachRow, 'foo UInt32', '{}') settings input_format_force_null_for_omitted_fields = 1;  -- { serverError TYPE_MISMATCH }
+select * from format(JSONEachRow, 'foo UInt32, bar Nullable(UInt32)', '{}');
+select * from format(JSONEachRow, 'foo UInt32, bar Nullable(UInt32)', '{\"foo\":1}');
+select * from format(JSONEachRow, 'foo UInt32, bar Nullable(UInt32)', '{}') settings input_format_force_null_for_omitted_fields = 1;  -- { serverError TYPE_MISMATCH }
+select * from format(JSONEachRow, 'foo UInt32, bar Nullable(UInt32)', '{\"foo\":1}') settings input_format_force_null_for_omitted_fields = 1;
+select * from format(JSONEachRow, 'foo UInt32, bar LowCardinality(Nullable(UInt32))', '{\"foo\":1}');
+select * from format(JSONEachRow, 'foo UInt32, bar LowCardinality(Nullable(UInt32))', '{\"foo\":1}') settings input_format_force_null_for_omitted_fields = 1;
+
+select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\nUInt32\n1');
+select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1;  -- { serverError TYPE_MISMATCH }
+select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar Nullable(UInt32)', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1;
+select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar LowCardinality(Nullable(UInt32))', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1;
+select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo,bar\nUInt32,UInt32\n1,2\n3\n') settings input_format_csv_allow_variable_number_of_columns = 1;
+select * from format(CSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo,bar\nUInt32,UInt32\n1,2\n3\n') settings input_format_csv_allow_variable_number_of_columns = 1, input_format_force_null_for_omitted_fields = 1;  -- { serverError TYPE_MISMATCH }
+
+select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\nUInt32\n1');
+select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1;  -- { serverError TYPE_MISMATCH }
+select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar Nullable(UInt32)', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1;
+select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar LowCardinality(Nullable(UInt32))', 'foo\nUInt32\n1') settings input_format_force_null_for_omitted_fields = 1;
+select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\tbar\nUInt32\tUInt32\n1\t2\n3\n') settings input_format_tsv_allow_variable_number_of_columns = 1;
+select * from format(TSVWithNamesAndTypes, 'foo UInt32, bar UInt32', 'foo\tbar\nUInt32\tUInt32\n1\t2\n3\n') settings input_format_tsv_allow_variable_number_of_columns = 1, input_format_force_null_for_omitted_fields = 1;  -- { serverError TYPE_MISMATCH }
+
+select * from format(TSKV, 'foo UInt32, bar UInt32', 'foo=1\n');
+select * from format(TSKV, 'foo UInt32, bar UInt32', 'foo=1\n') settings input_format_force_null_for_omitted_fields = 1;  -- { serverError TYPE_MISMATCH }
+select * from format(TSKV, 'foo UInt32, bar Nullable(UInt32)', 'foo=1\n') settings input_format_force_null_for_omitted_fields = 1;
+select * from format(TSKV, 'foo UInt32, bar LowCardinality(Nullable(UInt32))', 'foo=1\n') settings input_format_force_null_for_omitted_fields = 1;

From b2377c3fefe8951158de201ea399485f6805f955 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 7 May 2024 15:31:35 +0000
Subject: [PATCH 48/54] Fix mysql dictionary source

---
 src/Dictionaries/ExternalQueryBuilder.cpp     |  2 +-
 .../test_dictionaries_mysql/test.py           | 38 ++++++++++++++++++-
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp
index 792c4e3e907..a31301cd0f3 100644
--- a/src/Dictionaries/ExternalQueryBuilder.cpp
+++ b/src/Dictionaries/ExternalQueryBuilder.cpp
@@ -401,7 +401,7 @@ std::string ExternalQueryBuilder::composeLoadKeysQuery(
         {
             writeString("SELECT * FROM (", out);
             writeString(query, out);
-            writeString(") WHERE ", out);
+            writeString(") AS subquery WHERE ", out);
             composeKeysCondition(key_columns, requested_rows, method, partition_key_prefix, out);
             writeString(";", out);
 
diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py
index 360456b2046..332f4ca11bb 100644
--- a/tests/integration/test_dictionaries_mysql/test.py
+++ b/tests/integration/test_dictionaries_mysql/test.py
@@ -76,7 +76,7 @@ def test_mysql_dictionaries_custom_query_full_load(started_cluster):
 
     query = instance.query
     query(
-        """
+        f"""
     CREATE DICTIONARY test_dictionary_custom_query
     (
         id UInt64,
@@ -95,12 +95,46 @@ def test_mysql_dictionaries_custom_query_full_load(started_cluster):
     """
     )
 
-    result = query("SELECT id, value_1, value_2 FROM test_dictionary_custom_query")
+    result = query(
+        "SELECT dictGetString('test_dictionary_custom_query', 'value_1', toUInt64(1))"
+    )
+    assert result == "Value_1\n"
 
+    result = query("SELECT id, value_1, value_2 FROM test_dictionary_custom_query")
     assert result == "1\tValue_1\tValue_2\n"
 
     query("DROP DICTIONARY test_dictionary_custom_query;")
 
+    query(
+        f"""
+    CREATE DICTIONARY test_cache_dictionary_custom_query
+    (
+        id1 UInt64,
+        id2 UInt64,
+        value_concat String
+    )
+    PRIMARY KEY id1, id2
+    LAYOUT(COMPLEX_KEY_CACHE(SIZE_IN_CELLS 10))
+    SOURCE(MYSQL(
+        HOST 'mysql80'
+        PORT 3306
+        USER 'root'
+        PASSWORD 'clickhouse'
+        QUERY 'SELECT id AS id1, id + 1 AS id2, CONCAT_WS(" ", "The", value_1) AS value_concat FROM test.test_table_1'))
+    LIFETIME(0)
+    """
+    )
+
+    result = query(
+        "SELECT dictGetString('test_cache_dictionary_custom_query', 'value_concat', (1, 2))"
+    )
+    assert result == "The Value_1\n"
+
+    result = query("SELECT id1, value_concat FROM test_cache_dictionary_custom_query")
+    assert result == "1\tThe Value_1\n"
+
+    query("DROP DICTIONARY test_cache_dictionary_custom_query;")
+
     execute_mysql_query(mysql_connection, "DROP TABLE test.test_table_1;")
     execute_mysql_query(mysql_connection, "DROP TABLE test.test_table_2;")
 

From 836cf150b5b4a9625aee0d440a0d64a966b4c4e0 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 7 May 2024 17:39:04 +0200
Subject: [PATCH 49/54] Fix

---
 src/Disks/StoragePolicy.cpp                   | 13 ++++++----
 .../test_disk_over_web_server/test.py         | 24 +++++++++++++++++++
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp
index 390afb368f8..ccdc34d5d06 100644
--- a/src/Disks/StoragePolicy.cpp
+++ b/src/Disks/StoragePolicy.cpp
@@ -462,15 +462,18 @@ StoragePolicySelectorPtr StoragePolicySelector::updateFromConfig(const Poco::Uti
     /// First pass, check.
     for (const auto & [name, policy] : policies)
     {
-        if (name.starts_with(TMP_STORAGE_POLICY_PREFIX))
-            continue;
+        if (!name.starts_with(TMP_STORAGE_POLICY_PREFIX))
+        {
+            if (!result->policies.contains(name))
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage policy {} is missing in new configuration", backQuote(name));
 
-        if (!result->policies.contains(name))
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage policy {} is missing in new configuration", backQuote(name));
+            policy->checkCompatibleWith(result->policies[name]);
+        }
 
-        policy->checkCompatibleWith(result->policies[name]);
         for (const auto & disk : policy->getDisks())
+        {
             disks_before_reload.insert(disk->getName());
+        }
     }
 
     /// Second pass, load.
diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py
index 8ddc1ff3c31..15b26c6b09e 100644
--- a/tests/integration/test_disk_over_web_server/test.py
+++ b/tests/integration/test_disk_over_web_server/test.py
@@ -40,6 +40,12 @@ def cluster():
             image="clickhouse/clickhouse-server",
             tag=CLICKHOUSE_CI_MIN_TESTED_VERSION,
         )
+        cluster.add_instance(
+            "node5",
+            main_configs=["configs/storage_conf.xml"],
+            with_nginx=True,
+            allow_analyzer=False,
+        )
 
         cluster.start()
 
@@ -390,3 +396,21 @@ def test_page_cache(cluster):
 
         node.query("DROP TABLE test{} SYNC".format(i))
         print(f"Ok {i}")
+
+
+def test_config_reload(cluster):
+    node1 = cluster.instances["node5"]
+    table_name = "config_reload"
+
+    global uuids
+    node1.query(
+        f"""
+        DROP TABLE IF EXISTS {table_name};
+        CREATE TABLE {table_name} UUID '{uuids[0]}'
+        (id Int32) ENGINE = MergeTree() ORDER BY id
+        SETTINGS disk = disk(type=web, endpoint='http://nginx:80/test1/');
+    """
+    )
+
+    node1.query("SYSTEM RELOAD CONFIG")
+    node1.query(f"DROP TABLE {table_name} SYNC")

From 511146c99c7d0c92802052643ae71e6f3f4c6dad Mon Sep 17 00:00:00 2001
From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com>
Date: Tue, 7 May 2024 19:51:47 +0200
Subject: [PATCH 50/54] Update CHANGELOG.md

---
 CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 955e2f5b72f..f40c42c4462 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -33,7 +33,6 @@
 * A mode for `topK`/`topkWeighed` support mode, which return count of values and its error. [#54508](https://github.com/ClickHouse/ClickHouse/pull/54508) ([UnamedRus](https://github.com/UnamedRus)).
 * Added function `toMillisecond` which returns the millisecond component for values of type`DateTime` or `DateTime64`. [#60281](https://github.com/ClickHouse/ClickHouse/pull/60281) ([Shaun Struwig](https://github.com/Blargian)).
 * Allow configuring HTTP redirect handlers for clickhouse-server. For example, you can make `/` redirect to the Play UI. [#60390](https://github.com/ClickHouse/ClickHouse/pull/60390) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* Allow Raw as a synonym for TSVRaw. [#63394](https://github.com/ClickHouse/ClickHouse/pull/63394) ([Unalian](https://github.com/Unalian))
 
 #### Performance Improvement
 * Optimized function `dotProduct` to omit unnecessary and expensive memory copies. [#60928](https://github.com/ClickHouse/ClickHouse/pull/60928) ([Robert Schulze](https://github.com/rschu1ze)).

From f52dfd98aa0ff7d1c037da02fdf2cf402e7ad3a6 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 7 May 2024 16:31:21 +0000
Subject: [PATCH 51/54] add test for 49307

---
 .../03148_mutations_virtual_columns.reference    |  1 +
 .../03148_mutations_virtual_columns.sql          | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 tests/queries/0_stateless/03148_mutations_virtual_columns.reference
 create mode 100644 tests/queries/0_stateless/03148_mutations_virtual_columns.sql

diff --git a/tests/queries/0_stateless/03148_mutations_virtual_columns.reference b/tests/queries/0_stateless/03148_mutations_virtual_columns.reference
new file mode 100644
index 00000000000..7c5e8041147
--- /dev/null
+++ b/tests/queries/0_stateless/03148_mutations_virtual_columns.reference
@@ -0,0 +1 @@
+2	all_2_2_0
diff --git a/tests/queries/0_stateless/03148_mutations_virtual_columns.sql b/tests/queries/0_stateless/03148_mutations_virtual_columns.sql
new file mode 100644
index 00000000000..045869b224a
--- /dev/null
+++ b/tests/queries/0_stateless/03148_mutations_virtual_columns.sql
@@ -0,0 +1,16 @@
+DROP TABLE IF EXISTS t_mut_virtuals;
+
+CREATE TABLE t_mut_virtuals (id UInt64, s String) ENGINE = MergeTree ORDER BY id;
+
+INSERT INTO t_mut_virtuals VALUES (1, 'a');
+INSERT INTO t_mut_virtuals VALUES (2, 'b');
+
+SET insert_keeper_fault_injection_probability = 0;
+SET mutations_sync = 2;
+
+ALTER TABLE t_mut_virtuals UPDATE s = _part WHERE 1;
+ALTER TABLE t_mut_virtuals DELETE WHERE _part LIKE 'all_1_1_0%';
+
+SELECT * FROM t_mut_virtuals ORDER BY id;
+
+DROP TABLE t_mut_virtuals;

From 95b76bf6a47f0e23d41ce33c2223cee93066ad3e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 7 May 2024 20:58:19 +0200
Subject: [PATCH 52/54] Remove leftovers of GCC support in cmake rules

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 CMakeLists.txt                             | 119 +++++++++------------
 cmake/linux/default_libs.cmake             |  16 ++-
 cmake/sanitize.cmake                       |   8 +-
 cmake/tools.cmake                          |  85 +++++----------
 cmake/warnings.cmake                       |  66 ++++++------
 contrib/capnproto-cmake/CMakeLists.txt     |   4 +-
 contrib/openssl-cmake/CMakeLists.txt       |  10 +-
 contrib/sentry-native-cmake/CMakeLists.txt |   2 +-
 8 files changed, 125 insertions(+), 185 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index be804a14765..abbc48ab23a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -135,23 +135,21 @@ endif ()
 include (cmake/check_flags.cmake)
 include (cmake/add_warning.cmake)
 
-if (COMPILER_CLANG)
-    # generate ranges for fast "addr2line" search
-    if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
-        # NOTE: that clang has a bug because of it does not emit .debug_aranges
-        # with ThinLTO, so custom ld.lld wrapper is shipped in docker images.
-        set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges")
-    endif ()
-
-    # See https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/
-    if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing")
-        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing")
-    endif()
-
-    no_warning(enum-constexpr-conversion) # breaks Protobuf in clang-16
+# generate ranges for fast "addr2line" search
+if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
+    # NOTE: that clang has a bug because of it does not emit .debug_aranges
+    # with ThinLTO, so custom ld.lld wrapper is shipped in docker images.
+    set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges")
 endif ()
 
+# See https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/
+if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing")
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing")
+endif()
+
+no_warning(enum-constexpr-conversion) # breaks Protobuf in clang-16
+
 option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
 option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
 option(ENABLE_BENCHMARKS "Build all benchmark programs in 'benchmarks' subdirectories" OFF)
@@ -284,16 +282,12 @@ endif ()
 
 option (ENABLE_BUILD_PROFILING "Enable profiling of build time" OFF)
 if (ENABLE_BUILD_PROFILING)
-     if (COMPILER_CLANG)
-        set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace")
+    set (COMPILER_FLAGS "${COMPILER_FLAGS} -ftime-trace")
 
-        if (LINKER_NAME MATCHES "lld")
-            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--time-trace")
-            set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--time-trace")
-        endif ()
-     else ()
-        message (${RECONFIGURE_MESSAGE_LEVEL} "Build profiling is only available with CLang")
-     endif ()
+    if (LINKER_NAME MATCHES "lld")
+        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--time-trace")
+        set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--time-trace")
+    endif ()
 endif ()
 
 set (CMAKE_CXX_STANDARD 23)
@@ -304,22 +298,20 @@ set (CMAKE_C_STANDARD 11)
 set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
 set (CMAKE_C_STANDARD_REQUIRED ON)
 
-if (COMPILER_CLANG)
-    # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
-    # See https://reviews.llvm.org/D112921
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation")
+# Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
+# See https://reviews.llvm.org/D112921
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation")
 
-    # falign-functions=32 prevents from random performance regressions with the code change. Thus, providing more stable
-    # benchmarks.
-    set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32")
+# falign-functions=32 prevents from random performance regressions with the code change. Thus, providing more stable
+# benchmarks.
+set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32")
 
-    if (ARCH_AMD64)
-        # align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change,
-        # which makes benchmark results more stable.
-        set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries")
-        set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
-    endif()
-endif ()
+if (ARCH_AMD64)
+    # align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change,
+    # which makes benchmark results more stable.
+    set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries")
+    set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}")
+endif()
 
 # Disable floating-point expression contraction in order to get consistent floating point calculation results across platforms
 set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffp-contract=off")
@@ -348,39 +340,34 @@ set (CMAKE_ASM_FLAGS                     "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} $
 set (CMAKE_ASM_FLAGS_RELWITHDEBINFO      "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
 set (CMAKE_ASM_FLAGS_DEBUG               "${CMAKE_ASM_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
 
-if (COMPILER_CLANG)
-    if (OS_DARWIN)
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main")
-    endif()
+if (OS_DARWIN)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-U,_inside_main")
+endif()
 
-    # Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead.
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths")
+# Display absolute paths in error messages. Otherwise KDevelop fails to navigate to correct file and opens a new file instead.
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths")
 
-    if (NOT ENABLE_TESTS AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX)
-        # https://clang.llvm.org/docs/ThinLTO.html
-        # Applies to clang and linux only.
-        # Disabled when building with tests or sanitizers.
-        option(ENABLE_THINLTO "Clang-specific link time optimization" ON)
-    endif()
+if (NOT ENABLE_TESTS AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX)
+    # https://clang.llvm.org/docs/ThinLTO.html
+    # Applies to clang and linux only.
+    # Disabled when building with tests or sanitizers.
+    option(ENABLE_THINLTO "Clang-specific link time optimization" ON)
+endif()
 
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers")
-
-    # We cannot afford to use LTO when compiling unit tests, and it's not enough
-    # to only supply -fno-lto at the final linking stage. So we disable it
-    # completely.
-    if (ENABLE_THINLTO AND NOT ENABLE_TESTS AND NOT SANITIZE)
-        # Link time optimization
-        set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables")
-        set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables")
-        set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables")
-    elseif (ENABLE_THINLTO)
-        message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable ThinLTO")
-    endif ()
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers")
 
+# We cannot afford to use LTO when compiling unit tests, and it's not enough
+# to only supply -fno-lto at the final linking stage. So we disable it
+# completely.
+if (ENABLE_THINLTO AND NOT ENABLE_TESTS AND NOT SANITIZE)
+    # Link time optimization
+    set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables")
+    set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables")
+    set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -flto=thin -fwhole-program-vtables")
 elseif (ENABLE_THINLTO)
-    message (${RECONFIGURE_MESSAGE_LEVEL} "ThinLTO is only available with Clang")
+    message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot enable ThinLTO")
 endif ()
 
 # Turns on all external libs like s3, kafka, ODBC, ...
diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake
index e5ca8e296fc..4a06243243e 100644
--- a/cmake/linux/default_libs.cmake
+++ b/cmake/linux/default_libs.cmake
@@ -5,17 +5,15 @@ set (DEFAULT_LIBS "-nodefaultlibs")
 
 # We need builtins from Clang's RT even without libcxx - for ubsan+int128.
 # See https://bugs.llvm.org/show_bug.cgi?id=16404
-if (COMPILER_CLANG)
-    execute_process (COMMAND ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
+execute_process (COMMAND ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
 
-    # Apparently, in clang-19, the UBSan support library for C++ was moved out into ubsan_standalone_cxx.a, so we have to include both.
-    if (SANITIZE STREQUAL undefined)
-        string(REPLACE "builtins.a" "ubsan_standalone_cxx.a" EXTRA_BUILTINS_LIBRARY "${BUILTINS_LIBRARY}")
-    endif ()
+# Apparently, in clang-19, the UBSan support library for C++ was moved out into ubsan_standalone_cxx.a, so we have to include both.
+if (SANITIZE STREQUAL undefined)
+    string(REPLACE "builtins.a" "ubsan_standalone_cxx.a" EXTRA_BUILTINS_LIBRARY "${BUILTINS_LIBRARY}")
+endif ()
 
-    if (NOT EXISTS "${BUILTINS_LIBRARY}")
-        set (BUILTINS_LIBRARY "-lgcc")
-    endif ()
+if (NOT EXISTS "${BUILTINS_LIBRARY}")
+    set (BUILTINS_LIBRARY "-lgcc")
 endif ()
 
 if (OS_ANDROID)
diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index a3523203912..08716c1196b 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -26,9 +26,7 @@ if (SANITIZE)
 
     elseif (SANITIZE STREQUAL "thread")
         set (TSAN_FLAGS "-fsanitize=thread")
-        if (COMPILER_CLANG)
-            set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/tsan_ignorelist.txt")
-        endif()
+        set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/tsan_ignorelist.txt")
 
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
         set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
@@ -44,9 +42,7 @@ if (SANITIZE)
             # that's why we often receive reports about UIO. The simplest way to avoid this is just  set this flag here.
             set(UBSAN_FLAGS "${UBSAN_FLAGS} -fno-sanitize=unsigned-integer-overflow")
         endif()
-        if (COMPILER_CLANG)
-            set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/ubsan_ignorelist.txt")
-        endif()
+        set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/tests/ubsan_ignorelist.txt")
 
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
         set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
diff --git a/cmake/tools.cmake b/cmake/tools.cmake
index 1ba3007b0f3..024505411a3 100644
--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@@ -1,10 +1,6 @@
 # Compiler
 
-if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
-    set (COMPILER_CLANG 1) # Safe to treat AppleClang as a regular Clang, in general.
-elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    set (COMPILER_CLANG 1)
-else ()
+if (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang")
     message (FATAL_ERROR "Compiler ${CMAKE_CXX_COMPILER_ID} is not supported")
 endif ()
 
@@ -17,30 +13,26 @@ set (CLANG_MINIMUM_VERSION 16)
 set (XCODE_MINIMUM_VERSION 12.0)
 set (APPLE_CLANG_MINIMUM_VERSION 12.0.0)
 
-if (COMPILER_CLANG)
-    if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
-        # (Experimental!) Specify "-DALLOW_APPLECLANG=ON" when running CMake configuration step, if you want to experiment with using it.
-        if (NOT ALLOW_APPLECLANG AND NOT DEFINED ENV{ALLOW_APPLECLANG})
-            message (FATAL_ERROR "Compilation with AppleClang is unsupported. Please use vanilla Clang, e.g. from Homebrew.")
-        endif ()
+if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
+    # (Experimental!) Specify "-DALLOW_APPLECLANG=ON" when running CMake configuration step, if you want to experiment with using it.
+    if (NOT ALLOW_APPLECLANG AND NOT DEFINED ENV{ALLOW_APPLECLANG})
+        message (FATAL_ERROR "Compilation with AppleClang is unsupported. Please use vanilla Clang, e.g. from Homebrew.")
+    endif ()
 
-        # For a mapping between XCode / AppleClang / vanilla Clang versions, see https://en.wikipedia.org/wiki/Xcode
-        if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${APPLE_CLANG_MINIMUM_VERSION})
-            message (FATAL_ERROR "Compilation with AppleClang version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${APPLE_CLANG_MINIMUM_VERSION} (Xcode ${XCODE_MINIMUM_VERSION}).")
-        endif ()
-    else ()
-        if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION})
-            message (FATAL_ERROR "Compilation with Clang version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${CLANG_MINIMUM_VERSION}.")
-        endif ()
+    # For a mapping between XCode / AppleClang / vanilla Clang versions, see https://en.wikipedia.org/wiki/Xcode
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${APPLE_CLANG_MINIMUM_VERSION})
+        message (FATAL_ERROR "Compilation with AppleClang version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${APPLE_CLANG_MINIMUM_VERSION} (Xcode ${XCODE_MINIMUM_VERSION}).")
+    endif ()
+else ()
+    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION})
+        message (FATAL_ERROR "Compilation with Clang version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${CLANG_MINIMUM_VERSION}.")
     endif ()
 endif ()
 
-# Linker
-
 string (REGEX MATCHALL "[0-9]+" COMPILER_VERSION_LIST ${CMAKE_CXX_COMPILER_VERSION})
 list (GET COMPILER_VERSION_LIST 0 COMPILER_VERSION_MAJOR)
 
-# Example values: `lld-10`
+# Linker
 option (LINKER_NAME "Linker name or full path")
 
 if (LINKER_NAME MATCHES "gold")
@@ -48,19 +40,15 @@ if (LINKER_NAME MATCHES "gold")
 endif ()
 
 if (NOT LINKER_NAME)
-    if (COMPILER_CLANG)
-        if (OS_LINUX AND NOT ARCH_S390X)
-            find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "ld.lld")
-        elseif (OS_DARWIN)
-            find_program (LLD_PATH NAMES "ld")
-        endif ()
+    if (OS_LINUX AND NOT ARCH_S390X)
+        find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "ld.lld")
+    elseif (OS_DARWIN)
+        find_program (LLD_PATH NAMES "ld")
     endif ()
     if (LLD_PATH)
         if (OS_LINUX OR OS_DARWIN)
-            if (COMPILER_CLANG)
-                # Clang driver simply allows full linker path.
-                set (LINKER_NAME ${LLD_PATH})
-            endif ()
+            # Clang driver simply allows full linker path.
+            set (LINKER_NAME ${LLD_PATH})
         endif ()
     endif()
 endif()
@@ -82,47 +70,28 @@ else ()
 endif ()
 
 # Archiver
-
-if (COMPILER_CLANG)
-    find_program (LLVM_AR_PATH NAMES "llvm-ar-${COMPILER_VERSION_MAJOR}" "llvm-ar")
-endif ()
-
+find_program (LLVM_AR_PATH NAMES "llvm-ar-${COMPILER_VERSION_MAJOR}" "llvm-ar")
 if (LLVM_AR_PATH)
     set (CMAKE_AR "${LLVM_AR_PATH}")
 endif ()
-
 message(STATUS "Using archiver: ${CMAKE_AR}")
 
 # Ranlib
-
-if (COMPILER_CLANG)
-    find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib-${COMPILER_VERSION_MAJOR}" "llvm-ranlib")
-endif ()
-
+find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib-${COMPILER_VERSION_MAJOR}" "llvm-ranlib")
 if (LLVM_RANLIB_PATH)
     set (CMAKE_RANLIB "${LLVM_RANLIB_PATH}")
 endif ()
-
 message(STATUS "Using ranlib: ${CMAKE_RANLIB}")
 
 # Install Name Tool
-
-if (COMPILER_CLANG)
-    find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool-${COMPILER_VERSION_MAJOR}" "llvm-install-name-tool")
-endif ()
-
+find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool-${COMPILER_VERSION_MAJOR}" "llvm-install-name-tool")
 if (LLVM_INSTALL_NAME_TOOL_PATH)
     set (CMAKE_INSTALL_NAME_TOOL "${LLVM_INSTALL_NAME_TOOL_PATH}")
 endif ()
-
 message(STATUS "Using install-name-tool: ${CMAKE_INSTALL_NAME_TOOL}")
 
 # Objcopy
-
-if (COMPILER_CLANG)
-    find_program (OBJCOPY_PATH NAMES "llvm-objcopy-${COMPILER_VERSION_MAJOR}" "llvm-objcopy" "objcopy")
-endif ()
-
+find_program (OBJCOPY_PATH NAMES "llvm-objcopy-${COMPILER_VERSION_MAJOR}" "llvm-objcopy" "objcopy")
 if (OBJCOPY_PATH)
     message (STATUS "Using objcopy: ${OBJCOPY_PATH}")
 else ()
@@ -130,11 +99,7 @@ else ()
 endif ()
 
 # Strip
-
-if (COMPILER_CLANG)
-    find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip")
-endif ()
-
+find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip")
 if (STRIP_PATH)
     message (STATUS "Using strip: ${STRIP_PATH}")
 else ()
diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake
index 455e4f09939..807d92d9077 100644
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@@ -15,37 +15,35 @@ if ((NOT CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") AND (NOT SANITIZE) AND (NOT CMAKE
     add_warning(frame-larger-than=65536)
 endif ()
 
-if (COMPILER_CLANG)
-    # Add some warnings that are not available even with -Wall -Wextra -Wpedantic.
-    # We want to get everything out of the compiler for code quality.
-    add_warning(everything)
-    add_warning(pedantic)
-    no_warning(zero-length-array)
-    no_warning(c++98-compat-pedantic)
-    no_warning(c++98-compat)
-    no_warning(c++20-compat) # Use constinit in C++20 without warnings
-    no_warning(sign-conversion)
-    no_warning(implicit-int-conversion)
-    no_warning(implicit-int-float-conversion)
-    no_warning(ctad-maybe-unsupported) # clang 9+, linux-only
-    no_warning(disabled-macro-expansion)
-    no_warning(documentation-unknown-command)
-    no_warning(double-promotion)
-    no_warning(exit-time-destructors)
-    no_warning(float-equal)
-    no_warning(global-constructors)
-    no_warning(missing-prototypes)
-    no_warning(missing-variable-declarations)
-    no_warning(padded)
-    no_warning(switch-enum)
-    no_warning(undefined-func-template)
-    no_warning(unused-template)
-    no_warning(vla)
-    no_warning(weak-template-vtables)
-    no_warning(weak-vtables)
-    no_warning(thread-safety-negative) # experimental flag, too many false positives
-    no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16
-    no_warning(unsafe-buffer-usage) # too aggressive
-    no_warning(switch-default) # conflicts with "defaults in a switch covering all enum values"
-    # TODO Enable conversion, sign-conversion, double-promotion warnings.
-endif ()
+# Add some warnings that are not available even with -Wall -Wextra -Wpedantic.
+# We want to get everything out of the compiler for code quality.
+add_warning(everything)
+add_warning(pedantic)
+no_warning(zero-length-array)
+no_warning(c++98-compat-pedantic)
+no_warning(c++98-compat)
+no_warning(c++20-compat) # Use constinit in C++20 without warnings
+no_warning(sign-conversion)
+no_warning(implicit-int-conversion)
+no_warning(implicit-int-float-conversion)
+no_warning(ctad-maybe-unsupported) # clang 9+, linux-only
+no_warning(disabled-macro-expansion)
+no_warning(documentation-unknown-command)
+no_warning(double-promotion)
+no_warning(exit-time-destructors)
+no_warning(float-equal)
+no_warning(global-constructors)
+no_warning(missing-prototypes)
+no_warning(missing-variable-declarations)
+no_warning(padded)
+no_warning(switch-enum)
+no_warning(undefined-func-template)
+no_warning(unused-template)
+no_warning(vla)
+no_warning(weak-template-vtables)
+no_warning(weak-vtables)
+no_warning(thread-safety-negative) # experimental flag, too many false positives
+no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16
+no_warning(unsafe-buffer-usage) # too aggressive
+no_warning(switch-default) # conflicts with "defaults in a switch covering all enum values"
+# TODO Enable conversion, sign-conversion, double-promotion warnings.
diff --git a/contrib/capnproto-cmake/CMakeLists.txt b/contrib/capnproto-cmake/CMakeLists.txt
index e76268592ee..c07e9e6925b 100644
--- a/contrib/capnproto-cmake/CMakeLists.txt
+++ b/contrib/capnproto-cmake/CMakeLists.txt
@@ -81,9 +81,7 @@ set (CAPNPC_SRCS
 add_library(_capnpc ${CAPNPC_SRCS})
 target_link_libraries(_capnpc PUBLIC _capnp)
 
-if (COMPILER_CLANG)
-    set (CAPNP_PRIVATE_CXX_FLAGS -fno-char8_t)
-endif ()
+set (CAPNP_PRIVATE_CXX_FLAGS -fno-char8_t)
 
 target_compile_options(_kj PRIVATE ${CAPNP_PRIVATE_CXX_FLAGS})
 target_compile_options(_capnp PRIVATE ${CAPNP_PRIVATE_CXX_FLAGS})
diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt
index 021c88bcb04..72846143b9e 100644
--- a/contrib/openssl-cmake/CMakeLists.txt
+++ b/contrib/openssl-cmake/CMakeLists.txt
@@ -91,12 +91,10 @@ set(LIB_SOVERSION ${VERSION_MAJOR})
 
 enable_language(ASM)
 
-if(COMPILER_CLANG)
-    add_definitions(-Wno-unused-command-line-argument)
-    # Note that s390x build uses mold linker
-    if(NOT ARCH_S390X)
-        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=lld") # only relevant for -DENABLE_OPENSSL_DYNAMIC=1
-    endif()
+add_definitions(-Wno-unused-command-line-argument)
+# Note that s390x build uses mold linker
+if(NOT ARCH_S390X)
+    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=lld") # only relevant for -DENABLE_OPENSSL_DYNAMIC=1
 endif()
 
 if(ARCH_AMD64)
diff --git a/contrib/sentry-native-cmake/CMakeLists.txt b/contrib/sentry-native-cmake/CMakeLists.txt
index 6364e75db28..6e4c8c36081 100644
--- a/contrib/sentry-native-cmake/CMakeLists.txt
+++ b/contrib/sentry-native-cmake/CMakeLists.txt
@@ -1,4 +1,4 @@
-if (NOT OS_FREEBSD AND NOT (OS_DARWIN AND COMPILER_CLANG))
+if (NOT OS_FREEBSD AND NOT OS_DARWIN)
     option (ENABLE_SENTRY "Enable Sentry" ${ENABLE_LIBRARIES})
 else()
     option (ENABLE_SENTRY "Enable Sentry" OFF)

From 577dccd47ff70af55140b673a436354d289c1344 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 7 May 2024 21:09:19 +0200
Subject: [PATCH 53/54] Fix ProfileEventTimeIncrement code

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/ElapsedTimeProfileEventIncrement.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/Common/ElapsedTimeProfileEventIncrement.h b/src/Common/ElapsedTimeProfileEventIncrement.h
index 731295a4cfd..aa944beeaa9 100644
--- a/src/Common/ElapsedTimeProfileEventIncrement.h
+++ b/src/Common/ElapsedTimeProfileEventIncrement.h
@@ -17,19 +17,18 @@ enum Time
 template <Time unit>
 struct ProfileEventTimeIncrement
 {
-    explicit ProfileEventTimeIncrement<time>(ProfileEvents::Event event_)
+    explicit ProfileEventTimeIncrement(ProfileEvents::Event event_)
         : event(event_), watch(CLOCK_MONOTONIC) {}
 
-    template <Time time = unit>
     UInt64 elapsed()
     {
-        if constexpr (time == Time::Nanoseconds)
+        if constexpr (unit == Time::Nanoseconds)
             return watch.elapsedNanoseconds();
-        else if constexpr (time == Time::Microseconds)
+        else if constexpr (unit == Time::Microseconds)
             return watch.elapsedMicroseconds();
-        else if constexpr (time == Time::Milliseconds)
+        else if constexpr (unit == Time::Milliseconds)
             return watch.elapsedMilliseconds();
-        else if constexpr (time == Time::Seconds)
+        else if constexpr (unit == Time::Seconds)
             return watch.elapsedSeconds();
     }
 

From 3f5feb1520798af4d3643b6dc9f45ea2a418d7a2 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Tue, 7 May 2024 23:53:30 +0200
Subject: [PATCH 54/54] Update test.py

---
 tests/integration/test_disk_over_web_server/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py
index 15b26c6b09e..dd5163082ef 100644
--- a/tests/integration/test_disk_over_web_server/test.py
+++ b/tests/integration/test_disk_over_web_server/test.py
@@ -44,7 +44,7 @@ def cluster():
             "node5",
             main_configs=["configs/storage_conf.xml"],
             with_nginx=True,
-            allow_analyzer=False,
+            use_old_analyzer=True,
         )
 
         cluster.start()