From 174607c6bf20c800298595de82d24ccb0b894197 Mon Sep 17 00:00:00 2001
From: Vasily Nemkov <V.Nemkov@gmail.com>
Date: Mon, 27 Nov 2023 15:10:07 +0100
Subject: [PATCH 001/105] Fixed potential exception due to stale profile UUID

`SettingsProfilesInfo::profiles` is not updated in bg if any of the profiles assigned to the user change, but `SettingsProfilesInfo::profiles_with_implicit` is.

Update of #42641
kudos @tavplubix  https://github.com/ClickHouse/ClickHouse/pull/42641/files/3d0c07ac5b8f18917f2314474030910176ec7940#r1406196201
---
 src/Access/SettingsProfilesInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/Access/SettingsProfilesInfo.cpp b/src/Access/SettingsProfilesInfo.cpp
index d8b52ecf5e4..ae72cd52f2c 100644
--- a/src/Access/SettingsProfilesInfo.cpp
+++ b/src/Access/SettingsProfilesInfo.cpp
@@ -66,7 +66,7 @@ Strings SettingsProfilesInfo::getProfileNames() const
 {
     Strings result;
     result.reserve(profiles.size());
-    for (const auto & profile_id : profiles)
+    for (const auto & profile_id : profiles_with_implicit)
     {
         const auto p = names_of_profiles.find(profile_id);
         if (p != names_of_profiles.end())

From 7411fcc907c65513dcb3895aa339007d3ac36aed Mon Sep 17 00:00:00 2001
From: Vasily Nemkov <V.Nemkov@gmail.com>
Date: Tue, 28 Nov 2023 12:12:23 +0100
Subject: [PATCH 002/105] Filtering of profile UUIDs for SettingsProfilesInfo

---
 src/Access/SettingsProfilesCache.cpp | 12 ++++++++++--
 src/Access/SettingsProfilesCache.h   |  6 +++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/Access/SettingsProfilesCache.cpp b/src/Access/SettingsProfilesCache.cpp
index f03e68ba455..11cb3a79926 100644
--- a/src/Access/SettingsProfilesCache.cpp
+++ b/src/Access/SettingsProfilesCache.cpp
@@ -4,6 +4,8 @@
 #include <Access/SettingsProfilesInfo.h>
 #include <Common/quoteString.h>
 
+#include <boost/range/algorithm_ext/erase.hpp>
+
 
 namespace DB
 {
@@ -141,7 +143,7 @@ void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & ena
     auto info = std::make_shared<SettingsProfilesInfo>(access_control);
 
     info->profiles = merged_settings.toProfileIDs();
-    substituteProfiles(merged_settings, info->profiles_with_implicit, info->names_of_profiles);
+    substituteProfiles(merged_settings, info->profiles, info->profiles_with_implicit, info->names_of_profiles);
 
     info->settings = merged_settings.toSettingsChanges();
     info->constraints = merged_settings.toSettingsConstraints(access_control);
@@ -152,6 +154,7 @@ void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & ena
 
 void SettingsProfilesCache::substituteProfiles(
     SettingsProfileElements & elements,
+    std::vector<UUID> & profiles,
     std::vector<UUID> & substituted_profiles,
     std::unordered_map<UUID, String> & names_of_substituted_profiles) const
 {
@@ -184,6 +187,11 @@ void SettingsProfilesCache::substituteProfiles(
         names_of_substituted_profiles.emplace(profile_id, profile->getName());
     }
     std::reverse(substituted_profiles.begin(), substituted_profiles.end());
+
+    boost::range::remove_erase_if(profiles, [&substituted_profiles_set](const UUID & profile_id)
+    {
+        return !substituted_profiles_set.contains(profile_id);
+    });
 }
 
 std::shared_ptr<const EnabledSettings> SettingsProfilesCache::getEnabledSettings(
@@ -231,7 +239,7 @@ std::shared_ptr<const SettingsProfilesInfo> SettingsProfilesCache::getSettingsPr
 
     info->profiles.push_back(profile_id);
     info->profiles_with_implicit.push_back(profile_id);
-    substituteProfiles(elements, info->profiles_with_implicit, info->names_of_profiles);
+    substituteProfiles(elements, info->profiles, info->profiles_with_implicit, info->names_of_profiles);
     info->settings = elements.toSettingsChanges();
     info->constraints.merge(elements.toSettingsConstraints(access_control));
 
diff --git a/src/Access/SettingsProfilesCache.h b/src/Access/SettingsProfilesCache.h
index 28914596ccc..afc3c3e13a5 100644
--- a/src/Access/SettingsProfilesCache.h
+++ b/src/Access/SettingsProfilesCache.h
@@ -37,7 +37,11 @@ private:
     void profileRemoved(const UUID & profile_id);
     void mergeSettingsAndConstraints();
     void mergeSettingsAndConstraintsFor(EnabledSettings & enabled) const;
-    void substituteProfiles(SettingsProfileElements & elements, std::vector<UUID> & substituted_profiles, std::unordered_map<UUID, String> & names_of_substituted_profiles) const;
+
+    void substituteProfiles(SettingsProfileElements & elements,
+        std::vector<UUID> & profiles,
+        std::vector<UUID> & substituted_profiles,
+        std::unordered_map<UUID, String> & names_of_substituted_profiles) const;
 
     const AccessControl & access_control;
     std::unordered_map<UUID, SettingsProfilePtr> all_profiles;

From 6aaf1565e1c3d52d2867e375f52950e2cdbaa504 Mon Sep 17 00:00:00 2001
From: Vasily Nemkov <V.Nemkov@gmail.com>
Date: Tue, 28 Nov 2023 12:14:01 +0100
Subject: [PATCH 003/105] Using profiles instead of profiles_with_implicit for
 getProfileNames()

---
 src/Access/SettingsProfilesInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Access/SettingsProfilesInfo.cpp b/src/Access/SettingsProfilesInfo.cpp
index ae72cd52f2c..d8b52ecf5e4 100644
--- a/src/Access/SettingsProfilesInfo.cpp
+++ b/src/Access/SettingsProfilesInfo.cpp
@@ -66,7 +66,7 @@ Strings SettingsProfilesInfo::getProfileNames() const
 {
     Strings result;
     result.reserve(profiles.size());
-    for (const auto & profile_id : profiles_with_implicit)
+    for (const auto & profile_id : profiles)
     {
         const auto p = names_of_profiles.find(profile_id);
         if (p != names_of_profiles.end())

From b634e043d5e64073961bd9cd337f4dbef6615657 Mon Sep 17 00:00:00 2001
From: Vasily Nemkov <V.Nemkov@gmail.com>
Date: Tue, 28 Nov 2023 14:32:45 +0100
Subject: [PATCH 004/105] std::erase_if instead of
 boost::range::remove_erase_if

---
 src/Access/SettingsProfilesCache.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Access/SettingsProfilesCache.cpp b/src/Access/SettingsProfilesCache.cpp
index 11cb3a79926..9f4fc5a5d89 100644
--- a/src/Access/SettingsProfilesCache.cpp
+++ b/src/Access/SettingsProfilesCache.cpp
@@ -4,8 +4,6 @@
 #include <Access/SettingsProfilesInfo.h>
 #include <Common/quoteString.h>
 
-#include <boost/range/algorithm_ext/erase.hpp>
-
 
 namespace DB
 {
@@ -188,7 +186,7 @@ void SettingsProfilesCache::substituteProfiles(
     }
     std::reverse(substituted_profiles.begin(), substituted_profiles.end());
 
-    boost::range::remove_erase_if(profiles, [&substituted_profiles_set](const UUID & profile_id)
+    std::erase_if(profiles, [&substituted_profiles_set](const UUID & profile_id)
     {
         return !substituted_profiles_set.contains(profile_id);
     });

From 995b51ef736c20d9490f75cebaa4f42291df40cf Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 7 Dec 2023 03:41:32 +0000
Subject: [PATCH 005/105] Allow avoiding resolving hostnames in DDLWorker

---
 programs/server/config.xml                    |  3 +
 src/Interpreters/DDLTask.cpp                  | 18 ++++-
 src/Interpreters/DDLTask.h                    |  2 +-
 src/Interpreters/DDLWorker.cpp                |  5 +-
 src/Interpreters/DDLWorker.h                  |  2 +
 .../test_ddl_config_hostname/__init__.py      |  0
 .../configs/remote_servers.xml                | 19 +++++
 .../test_ddl_config_hostname/test.py          | 80 +++++++++++++++++++
 8 files changed, 125 insertions(+), 4 deletions(-)
 create mode 100644 tests/integration/test_ddl_config_hostname/__init__.py
 create mode 100644 tests/integration/test_ddl_config_hostname/configs/remote_servers.xml
 create mode 100644 tests/integration/test_ddl_config_hostname/test.py

diff --git a/programs/server/config.xml b/programs/server/config.xml
index e333082d099..688f0bf5645 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1380,6 +1380,9 @@
 
         <!-- Controls how many tasks could be in the queue -->
         <!-- <max_tasks_in_queue>1000</max_tasks_in_queue> -->
+
+        <!-- Host name of the current node. If specified, will only compare and not resolve hostnames inside the DDL tasks -->
+        <!-- <host_name>replica</host_name> -->
     </distributed_ddl>
 
     <!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 6e9155ab2a2..172d68f2941 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -215,14 +215,28 @@ ContextMutablePtr DDLTaskBase::makeQueryContext(ContextPtr from_context, const Z
 }
 
 
-bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper)
+bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper, const std::optional<std::string> & config_host_name)
 {
     bool host_in_hostlist = false;
     std::exception_ptr first_exception = nullptr;
 
+    auto maybe_secure_port = global_context->getTCPPortSecure();
+
     for (const HostID & host : entry.hosts)
     {
-        auto maybe_secure_port = global_context->getTCPPortSecure();
+        if (config_host_name)
+        {
+            if (host.host_name != *config_host_name)
+                continue;
+
+            if (!(maybe_secure_port && maybe_secure_port == host.port) && !(global_context->getTCPPort() == host.port))
+                continue;
+
+            host_in_hostlist = true;
+            host_id = host;
+            host_id_str = host.toString();
+            break;
+        }
 
         try
         {
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 1ceb74c7048..e1a81ac97af 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -143,7 +143,7 @@ struct DDLTask : public DDLTaskBase
 {
     DDLTask(const String & name, const String & path) : DDLTaskBase(name, path) {}
 
-    bool findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper);
+    bool findCurrentHostID(ContextPtr global_context, Poco::Logger * log, const ZooKeeperPtr & zookeeper, const std::optional<std::string> & config_host_name);
 
     void setClusterInfo(ContextPtr context, Poco::Logger * log);
 
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index 30cf6fd0568..de24dea1857 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -107,6 +107,9 @@ DDLWorker::DDLWorker(
         cleanup_delay_period = config->getUInt64(prefix + ".cleanup_delay_period", static_cast<UInt64>(cleanup_delay_period));
         max_tasks_in_queue = std::max<UInt64>(1, config->getUInt64(prefix + ".max_tasks_in_queue", max_tasks_in_queue));
 
+        if (config->has(prefix + ".host_name"))
+            config_host_name = config->getString(prefix + ".host_name");
+
         if (config->has(prefix + ".profile"))
             context->setSetting("profile", config->getString(prefix + ".profile"));
     }
@@ -214,7 +217,7 @@ DDLTaskPtr DDLWorker::initAndCheckTask(const String & entry_name, String & out_r
     /// Stage 2: resolve host_id and check if we should execute query or not
     /// Multiple clusters can use single DDL queue path in ZooKeeper,
     /// So we should skip task if we cannot find current host in cluster hosts list.
-    if (!task->findCurrentHostID(context, log, zookeeper))
+    if (!task->findCurrentHostID(context, log, zookeeper, config_host_name))
     {
         out_reason = "There is no a local address in host list";
         return add_to_skip_set();
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index d34a4135199..adc9a491d81 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -153,6 +153,8 @@ protected:
     ContextMutablePtr context;
     Poco::Logger * log;
 
+    std::optional<std::string> config_host_name; /// host_name from config
+
     std::string host_fqdn;      /// current host domain name
     std::string host_fqdn_id;   /// host_name:port
     std::string queue_dir;      /// dir with queue of queries
diff --git a/tests/integration/test_ddl_config_hostname/__init__.py b/tests/integration/test_ddl_config_hostname/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_ddl_config_hostname/configs/remote_servers.xml b/tests/integration/test_ddl_config_hostname/configs/remote_servers.xml
new file mode 100644
index 00000000000..8c6a507951d
--- /dev/null
+++ b/tests/integration/test_ddl_config_hostname/configs/remote_servers.xml
@@ -0,0 +1,19 @@
+<clickhouse>
+    <remote_servers>
+        <test_cluster>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </test_cluster>
+    </remote_servers>
+
+    <allow_zookeeper_write>1</allow_zookeeper_write>
+
+    <distributed_ddl>
+        <host_name>node1</host_name>
+    </distributed_ddl>
+</clickhouse>
diff --git a/tests/integration/test_ddl_config_hostname/test.py b/tests/integration/test_ddl_config_hostname/test.py
new file mode 100644
index 00000000000..f6cb5f5c38e
--- /dev/null
+++ b/tests/integration/test_ddl_config_hostname/test.py
@@ -0,0 +1,80 @@
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance(
+    "node1",
+    main_configs=["configs/remote_servers.xml"],
+    with_zookeeper=True,
+    stay_alive=True,
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_ddl_queue_delete_add_replica(started_cluster):
+    #  Some query started on the cluster, then we deleted some unfinished node
+    #  and added a new node to the cluster. Considering that there are less
+    #  finished nodes than expected and we can't resolve deleted node's hostname
+    #  the queue will be stuck on a new node.
+    #  <host_name> inside <distributed_ddl> allows us to simply discard deleted
+    #  node's hostname by simple comparison without trying to resolve it.
+
+    node1.query(
+        "create table hostname_change on cluster test_cluster (n int) engine=Log"
+    )
+
+    # There's no easy way to change hostname of a container, so let's update values in zk
+    query_znode = node1.query(
+        "select max(name) from system.zookeeper where path='/clickhouse/task_queue/ddl'"
+    )[:-1]
+
+    value = (
+        node1.query(
+            "select value from system.zookeeper where path='/clickhouse/task_queue/ddl' and name='{}' format TSVRaw".format(
+                query_znode
+            )
+        )[:-1]
+        .replace("hosts: ['node1:9000']", "hosts: ['finished_node:9000','deleted_node:9000']")
+        .replace("initiator: node1:9000", "initiator: finished_node:9000")
+        .replace("\\'", "#")
+        .replace("'", "\\'")
+        .replace("\n", "\\n")
+        .replace("#", "\\'")
+    )
+
+    finished_znode = node1.query(
+        "select name from system.zookeeper where path='/clickhouse/task_queue/ddl/{}/finished' and name like '%node1%'".format(
+            query_znode
+        )
+    )[:-1]
+
+    node1.query(
+        "insert into system.zookeeper (name, path, value) values ('{}', '/clickhouse/task_queue/ddl', '{}')".format(
+            query_znode, value
+        )
+    )
+    started_cluster.get_kazoo_client("zoo1").delete(
+        "/clickhouse/task_queue/ddl/{}/finished/{}".format(query_znode, finished_znode)
+    )
+
+    node1.query(
+        "insert into system.zookeeper (name, path, value) values ('{}', '/clickhouse/task_queue/ddl/{}/finished', '0\\n')".format(
+            finished_znode.replace("node1", "finished_node"), query_znode
+        )
+    )
+
+    node1.restart_clickhouse(kill=True)
+
+    node1.query(
+        "create table hostname_change2 on cluster test_cluster (n int) engine=Log"
+    )

From 10bfd054d82bd70d0a931ad71c6031a54306740c Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Fri, 8 Dec 2023 19:40:09 +0000
Subject: [PATCH 006/105] Fix style & review

---
 src/Interpreters/DDLTask.cpp                  |  4 ++--
 .../test_ddl_config_hostname/test.py          | 24 ++++++++-----------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 172d68f2941..0164f5668a2 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -226,10 +226,10 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
     {
         if (config_host_name)
         {
-            if (host.host_name != *config_host_name)
+            if (config_host_name != host.host_name)
                 continue;
 
-            if (!(maybe_secure_port && maybe_secure_port == host.port) && !(global_context->getTCPPort() == host.port))
+            if (maybe_secure_port != host.port && global_context->getTCPPort() != host.port)
                 continue;
 
             host_in_hostlist = true;
diff --git a/tests/integration/test_ddl_config_hostname/test.py b/tests/integration/test_ddl_config_hostname/test.py
index f6cb5f5c38e..724e766c9dc 100644
--- a/tests/integration/test_ddl_config_hostname/test.py
+++ b/tests/integration/test_ddl_config_hostname/test.py
@@ -40,11 +40,11 @@ def test_ddl_queue_delete_add_replica(started_cluster):
 
     value = (
         node1.query(
-            "select value from system.zookeeper where path='/clickhouse/task_queue/ddl' and name='{}' format TSVRaw".format(
-                query_znode
-            )
+            f"select value from system.zookeeper where path='/clickhouse/task_queue/ddl' and name='{query_znode}' format TSVRaw"
         )[:-1]
-        .replace("hosts: ['node1:9000']", "hosts: ['finished_node:9000','deleted_node:9000']")
+        .replace(
+            "hosts: ['node1:9000']", "hosts: ['finished_node:9000','deleted_node:9000']"
+        )
         .replace("initiator: node1:9000", "initiator: finished_node:9000")
         .replace("\\'", "#")
         .replace("'", "\\'")
@@ -53,24 +53,20 @@ def test_ddl_queue_delete_add_replica(started_cluster):
     )
 
     finished_znode = node1.query(
-        "select name from system.zookeeper where path='/clickhouse/task_queue/ddl/{}/finished' and name like '%node1%'".format(
-            query_znode
-        )
+        f"select name from system.zookeeper where path='/clickhouse/task_queue/ddl/{query_znode}/finished' and name like '%node1%'"
     )[:-1]
 
     node1.query(
-        "insert into system.zookeeper (name, path, value) values ('{}', '/clickhouse/task_queue/ddl', '{}')".format(
-            query_znode, value
-        )
+        f"insert into system.zookeeper (name, path, value) values ('{query_znode}', '/clickhouse/task_queue/ddl', '{value}')"
     )
     started_cluster.get_kazoo_client("zoo1").delete(
-        "/clickhouse/task_queue/ddl/{}/finished/{}".format(query_znode, finished_znode)
+        f"/clickhouse/task_queue/ddl/{query_znode}/finished/{finished_znode}"
     )
 
+    finished_znode = finished_znode.replace("node1", "finished_node")
+
     node1.query(
-        "insert into system.zookeeper (name, path, value) values ('{}', '/clickhouse/task_queue/ddl/{}/finished', '0\\n')".format(
-            finished_znode.replace("node1", "finished_node"), query_znode
-        )
+        f"insert into system.zookeeper (name, path, value) values ('{finished_znode}', '/clickhouse/task_queue/ddl/{query_znode}/finished', '0\\n')"
     )
 
     node1.restart_clickhouse(kill=True)

From 18e29bc6a20b9fc63160385f8d967202c5b44eda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 15 Dec 2023 10:46:30 +0800
Subject: [PATCH 007/105] BloomFilter support match function

---
 src/Common/OptimizedRegularExpression.h       |  1 +
 .../MergeTree/MergeTreeIndexFullText.cpp      | 99 +++++++++++++++++++
 .../MergeTree/MergeTreeIndexFullText.h        |  2 +
 3 files changed, 102 insertions(+)

diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h
index 4521b81dfe2..a4418df698a 100644
--- a/src/Common/OptimizedRegularExpression.h
+++ b/src/Common/OptimizedRegularExpression.h
@@ -106,6 +106,7 @@ public:
         bool & required_substring_is_prefix,
         std::vector<std::string> & alternatives);
 
+
 private:
     bool is_trivial;
     bool required_substring_is_prefix;
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 7dbe7a0cbe4..bf954fd1e46 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -17,6 +17,7 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Core/Defines.h>
+#include <Common/OptimizedRegularExpression.h>
 
 #include <Poco/Logger.h>
 
@@ -201,6 +202,8 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
              || element.function == RPNElement::FUNCTION_IN
              || element.function == RPNElement::FUNCTION_NOT_IN
              || element.function == RPNElement::FUNCTION_MULTI_SEARCH
+             || element.function == RPNElement::FUNCTION_MATCH
+             || element.function == RPNElement::FUNCTION_MULTI_MATCH
              || element.function == RPNElement::ALWAYS_FALSE)
         {
             rpn_stack.push_back(false);
@@ -233,6 +236,7 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
 /// Keep in-sync with MergeTreeIndexConditionGin::mayBeTrueOnTranuleInPart
 bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
 {
+    std::cout<<"======== Flag into mayBeTrueOnGranule"<<std::endl;
     std::shared_ptr<MergeTreeIndexGranuleFullText> granule
             = std::dynamic_pointer_cast<MergeTreeIndexGranuleFullText>(idx_granule);
     if (!granule)
@@ -286,6 +290,41 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
             rpn_stack.emplace_back(
                     std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
         }
+        else if (element.function == RPNElement::FUNCTION_MATCH)
+        {
+            // If bloom filter is not null means we got required substring
+
+            if (!element.set_bloom_filters.empty())
+            {
+
+                std::vector<bool> result(element.set_bloom_filters.back().size(), true);
+
+                const auto & bloom_filters = element.set_bloom_filters[0];
+
+                for (size_t row = 0; row < bloom_filters.size(); ++row)
+                    result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
+
+                if (element.bloom_filter)
+                {
+                    //auto required = rpn_stack.back();
+                    //rpn_stack.pop_back();
+                    //auto alternative = std::find(std::cbegin(result), std::cend(result), true) != std::end(result);
+                    //rpn_stack.emplace_back(required.can_be_true && alternative, true);
+                    auto alternative = std::find(std::cbegin(result), std::cend(result), true) != std::end(result);
+                    rpn_stack.emplace_back(alternative, true);
+                }
+                else
+                    rpn_stack.emplace_back(
+                            std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
+            }
+            //TODO: Need to check why bloom_filter is not null while set_bloom_filters is not empty
+            else if (element.bloom_filter)
+            {
+                std::cout<<"=========== Bloom Filter is not null"<<std::endl;
+                rpn_stack.emplace_back(granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
+            }
+
+        }
         else if (element.function == RPNElement::FUNCTION_NOT)
         {
             rpn_stack.back() = !rpn_stack.back();
@@ -390,6 +429,8 @@ bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode &
                  function_name == "notEquals" ||
                  function_name == "has" ||
                  function_name == "mapContains" ||
+                 function_name == "match" ||
+                 function_name == "multiMatchAny" ||
                  function_name == "like" ||
                  function_name == "notLike" ||
                  function_name.starts_with("hasToken") ||
@@ -510,6 +551,64 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
         token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
         return true;
     }
+
+    if (function_name == "match")
+    {
+        out.key_column = *key_index;
+        out.function = RPNElement::FUNCTION_MATCH;
+        out.bloom_filter = std::make_unique<BloomFilter>(params);
+
+        auto & string_view = const_value.get<String>();
+        String required_substring;
+        bool is_trivial;
+        bool required_substring_is_prefix;
+        std::vector<String> alternatives;
+        OptimizedRegularExpression::analyze(string_view, required_substring, is_trivial, required_substring_is_prefix, alternatives);
+        std::cout<<"========= is trivial:"<<is_trivial<<std::endl;
+        std::cout<<"========= required_substring_is_prefix:"<<required_substring_is_prefix<<std::endl;
+        std::cout<<"========= regex string is:"<<string_view<<std::endl;
+        std::cout<<"========= required sub string is:"<<required_substring<<std::endl;
+        for (const auto & alternative : alternatives)
+            std::cout<<"========= alternative string:"<<alternative<<std::endl;
+
+        if (required_substring.empty() && alternatives.empty())
+            return false;
+
+        if (!alternatives.empty())
+        {
+            std::vector<std::vector<BloomFilter>> bloom_filters;
+            bloom_filters.emplace_back();
+            for (const auto & alternative : alternatives)
+            {
+                bloom_filters.back().emplace_back(params);
+                token_extractor->stringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back());
+            }
+            out.set_bloom_filters = std::move(bloom_filters);
+        }
+        else if (!required_substring.empty())
+           token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
+
+        return true;
+    }
+
+    if (function_name == "notEquals")
+    {
+        out.key_column = *key_index;
+        out.function = RPNElement::FUNCTION_NOT_EQUALS;
+        out.bloom_filter = std::make_unique<BloomFilter>(params);
+        const auto & value = const_value.get<String>();
+        token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
+        return true;
+    }
+    else if (function_name == "equals")
+    {
+        out.key_column = *key_index;
+        out.function = RPNElement::FUNCTION_EQUALS;
+        out.bloom_filter = std::make_unique<BloomFilter>(params);
+        const auto & value = const_value.get<String>();
+        token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
+        return true;
+    }
     else if (function_name == "has")
     {
         out.key_column = *key_index;
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h
index fbfa0fd27fc..85c873f42ba 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.h
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h
@@ -90,8 +90,10 @@ private:
             FUNCTION_NOT_EQUALS,
             FUNCTION_HAS,
             FUNCTION_IN,
+            FUNCTION_MATCH,
             FUNCTION_NOT_IN,
             FUNCTION_MULTI_SEARCH,
+            FUNCTION_MULTI_MATCH,
             FUNCTION_UNKNOWN, /// Can take any value.
             /// Operators of the logical expression.
             FUNCTION_NOT,

From e36cd6a06e303976b9c2705788d5070151d17233 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 15 Dec 2023 10:56:52 +0800
Subject: [PATCH 008/105] BloomFilter support match function

---
 src/Storages/MergeTree/MergeTreeIndexFullText.cpp | 6 ------
 src/Storages/MergeTree/MergeTreeIndexFullText.h   | 1 -
 2 files changed, 7 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index bf954fd1e46..6e8c517e883 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -203,7 +203,6 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
              || element.function == RPNElement::FUNCTION_NOT_IN
              || element.function == RPNElement::FUNCTION_MULTI_SEARCH
              || element.function == RPNElement::FUNCTION_MATCH
-             || element.function == RPNElement::FUNCTION_MULTI_MATCH
              || element.function == RPNElement::ALWAYS_FALSE)
         {
             rpn_stack.push_back(false);
@@ -236,7 +235,6 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
 /// Keep in-sync with MergeTreeIndexConditionGin::mayBeTrueOnTranuleInPart
 bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
 {
-    std::cout<<"======== Flag into mayBeTrueOnGranule"<<std::endl;
     std::shared_ptr<MergeTreeIndexGranuleFullText> granule
             = std::dynamic_pointer_cast<MergeTreeIndexGranuleFullText>(idx_granule);
     if (!granule)
@@ -564,10 +562,6 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
         bool required_substring_is_prefix;
         std::vector<String> alternatives;
         OptimizedRegularExpression::analyze(string_view, required_substring, is_trivial, required_substring_is_prefix, alternatives);
-        std::cout<<"========= is trivial:"<<is_trivial<<std::endl;
-        std::cout<<"========= required_substring_is_prefix:"<<required_substring_is_prefix<<std::endl;
-        std::cout<<"========= regex string is:"<<string_view<<std::endl;
-        std::cout<<"========= required sub string is:"<<required_substring<<std::endl;
         for (const auto & alternative : alternatives)
             std::cout<<"========= alternative string:"<<alternative<<std::endl;
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h
index 85c873f42ba..c35b2f2f3c4 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.h
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h
@@ -93,7 +93,6 @@ private:
             FUNCTION_MATCH,
             FUNCTION_NOT_IN,
             FUNCTION_MULTI_SEARCH,
-            FUNCTION_MULTI_MATCH,
             FUNCTION_UNKNOWN, /// Can take any value.
             /// Operators of the logical expression.
             FUNCTION_NOT,

From ebc570aef5dcb470343ced071d4dccdc5f38f175 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 15 Dec 2023 11:31:07 +0800
Subject: [PATCH 009/105] optimize code

---
 .../MergeTree/MergeTreeIndexFullText.cpp      | 50 ++++++-------------
 1 file changed, 15 insertions(+), 35 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 6e8c517e883..fd577ed93db 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -242,6 +242,18 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
 
     /// Check like in KeyCondition.
     std::vector<BoolMask> rpn_stack;
+    auto multi_funtion_processor = [&rpn_stack, &granule] (const RPNElement & element)
+    {
+        std::vector<bool> result(element.set_bloom_filters.back().size(), true);
+
+        const auto & bloom_filters = element.set_bloom_filters[0];
+
+        for (size_t row = 0; row < bloom_filters.size(); ++row)
+            result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
+
+        rpn_stack.emplace_back(
+                std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
+    };
     for (const auto & element : rpn)
     {
         if (element.function == RPNElement::FUNCTION_UNKNOWN)
@@ -278,50 +290,19 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
         }
         else if (element.function == RPNElement::FUNCTION_MULTI_SEARCH)
         {
-            std::vector<bool> result(element.set_bloom_filters.back().size(), true);
-
-            const auto & bloom_filters = element.set_bloom_filters[0];
-
-            for (size_t row = 0; row < bloom_filters.size(); ++row)
-                result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
-
-            rpn_stack.emplace_back(
-                    std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
+            multi_funtion_processor(element);
         }
         else if (element.function == RPNElement::FUNCTION_MATCH)
         {
-            // If bloom filter is not null means we got required substring
-
+            // If set_bloom_filters is not empty means we got alternative substring
             if (!element.set_bloom_filters.empty())
             {
-
-                std::vector<bool> result(element.set_bloom_filters.back().size(), true);
-
-                const auto & bloom_filters = element.set_bloom_filters[0];
-
-                for (size_t row = 0; row < bloom_filters.size(); ++row)
-                    result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
-
-                if (element.bloom_filter)
-                {
-                    //auto required = rpn_stack.back();
-                    //rpn_stack.pop_back();
-                    //auto alternative = std::find(std::cbegin(result), std::cend(result), true) != std::end(result);
-                    //rpn_stack.emplace_back(required.can_be_true && alternative, true);
-                    auto alternative = std::find(std::cbegin(result), std::cend(result), true) != std::end(result);
-                    rpn_stack.emplace_back(alternative, true);
-                }
-                else
-                    rpn_stack.emplace_back(
-                            std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
+                multi_funtion_processor(element);
             }
-            //TODO: Need to check why bloom_filter is not null while set_bloom_filters is not empty
             else if (element.bloom_filter)
             {
-                std::cout<<"=========== Bloom Filter is not null"<<std::endl;
                 rpn_stack.emplace_back(granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
             }
-
         }
         else if (element.function == RPNElement::FUNCTION_NOT)
         {
@@ -428,7 +409,6 @@ bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode &
                  function_name == "has" ||
                  function_name == "mapContains" ||
                  function_name == "match" ||
-                 function_name == "multiMatchAny" ||
                  function_name == "like" ||
                  function_name == "notLike" ||
                  function_name.starts_with("hasToken") ||

From 57a5bef09e1354baff359f16362c5251ecc4364c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 15 Dec 2023 11:49:56 +0800
Subject: [PATCH 010/105] optimize code

---
 .../MergeTree/MergeTreeIndexFullText.cpp       | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index fd577ed93db..23b95ed2c7d 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -565,24 +565,6 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
         return true;
     }
 
-    if (function_name == "notEquals")
-    {
-        out.key_column = *key_index;
-        out.function = RPNElement::FUNCTION_NOT_EQUALS;
-        out.bloom_filter = std::make_unique<BloomFilter>(params);
-        const auto & value = const_value.get<String>();
-        token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
-        return true;
-    }
-    else if (function_name == "equals")
-    {
-        out.key_column = *key_index;
-        out.function = RPNElement::FUNCTION_EQUALS;
-        out.bloom_filter = std::make_unique<BloomFilter>(params);
-        const auto & value = const_value.get<String>();
-        token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
-        return true;
-    }
     else if (function_name == "has")
     {
         out.key_column = *key_index;

From 3bd7505a836a2181bc0aa92cb344a9c34f581c07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Fri, 15 Dec 2023 11:50:56 +0800
Subject: [PATCH 011/105] optimize code

---
 src/Common/OptimizedRegularExpression.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h
index a4418df698a..4521b81dfe2 100644
--- a/src/Common/OptimizedRegularExpression.h
+++ b/src/Common/OptimizedRegularExpression.h
@@ -106,7 +106,6 @@ public:
         bool & required_substring_is_prefix,
         std::vector<std::string> & alternatives);
 
-
 private:
     bool is_trivial;
     bool required_substring_is_prefix;

From 93b18a32c3948d0d48bf724bc9745f530c83970a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Mon, 18 Dec 2023 16:02:19 +0800
Subject: [PATCH 012/105] add tests

---
 .../MergeTree/MergeTreeIndexFullText.cpp      | 13 +++--
 .../02943_tokenbf_support_match.reference     | 51 +++++++++++++++++++
 .../02943_tokenbf_support_match.sql           | 43 ++++++++++++++++
 3 files changed, 100 insertions(+), 7 deletions(-)
 create mode 100644 tests/queries/0_stateless/02943_tokenbf_support_match.reference
 create mode 100644 tests/queries/0_stateless/02943_tokenbf_support_match.sql

diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 23b95ed2c7d..2a206f69024 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -242,6 +242,7 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
 
     /// Check like in KeyCondition.
     std::vector<BoolMask> rpn_stack;
+
     auto multi_funtion_processor = [&rpn_stack, &granule] (const RPNElement & element)
     {
         std::vector<bool> result(element.set_bloom_filters.back().size(), true);
@@ -254,6 +255,7 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
         rpn_stack.emplace_back(
                 std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
     };
+
     for (const auto & element : rpn)
     {
         if (element.function == RPNElement::FUNCTION_UNKNOWN)
@@ -294,11 +296,11 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
         }
         else if (element.function == RPNElement::FUNCTION_MATCH)
         {
-            // If set_bloom_filters is not empty means we got alternative substring
             if (!element.set_bloom_filters.empty())
             {
                 multi_funtion_processor(element);
             }
+            // If set_bloom_filters is not empty means we got alternative substring
             else if (element.bloom_filter)
             {
                 rpn_stack.emplace_back(granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
@@ -538,12 +540,9 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
 
         auto & string_view = const_value.get<String>();
         String required_substring;
-        bool is_trivial;
-        bool required_substring_is_prefix;
         std::vector<String> alternatives;
-        OptimizedRegularExpression::analyze(string_view, required_substring, is_trivial, required_substring_is_prefix, alternatives);
-        for (const auto & alternative : alternatives)
-            std::cout<<"========= alternative string:"<<alternative<<std::endl;
+        bool tmp_var;
+        OptimizedRegularExpression::analyze(string_view, required_substring, tmp_var, tmp_var, alternatives);
 
         if (required_substring.empty() && alternatives.empty())
             return false;
@@ -559,7 +558,7 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
             }
             out.set_bloom_filters = std::move(bloom_filters);
         }
-        else if (!required_substring.empty())
+        else
            token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
 
         return true;
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.reference b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
new file mode 100644
index 00000000000..7e36857190a
--- /dev/null
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
@@ -0,0 +1,51 @@
+============== SKIP 3 GRANUS ============
+============== Required String: Hello ============
+============== Alternative String: Hello ClickHouse ============
+============== Required String: Hello World ============
+Expression ((Projection + Before ORDER BY))
+  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
+  Indexes:
+    PrimaryKey
+      Condition: true
+      Parts: 1/1
+      Granules: 5/5
+    Skip
+      Name: str_idx
+      Description: tokenbf_v1 GRANULARITY 1
+      Parts: 1/1
+      Granules: 2/5
+
+
+============== SKIP 3 GRANUS ============
+============== No Required String ============
+============== Alternative String: ClickHouse ============
+============== Alternative String: World ============
+Expression ((Projection + Before ORDER BY))
+  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
+  Indexes:
+    PrimaryKey
+      Condition: true
+      Parts: 1/1
+      Granules: 5/5
+    Skip
+      Name: str_idx
+      Description: tokenbf_v1 GRANULARITY 1
+      Parts: 1/1
+      Granules: 2/5
+
+
+============== SKIP 4 GRANUS ============
+============== Required String: OLAP============
+============== No Alternative String============
+Expression ((Projection + Before ORDER BY))
+  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
+  Indexes:
+    PrimaryKey
+      Condition: true
+      Parts: 1/1
+      Granules: 5/5
+    Skip
+      Name: str_idx
+      Description: tokenbf_v1 GRANULARITY 1
+      Parts: 1/1
+      Granules: 1/5
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.sql b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
new file mode 100644
index 00000000000..078e32ae94c
--- /dev/null
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
@@ -0,0 +1,43 @@
+DROP DATABASE IF EXISTS test_tokenbf_match;
+
+CREATE DATABASE test_tokenbf_match;
+
+CREATE TABLE test_tokenbf_match.test_tokenbf 
+(
+    `id` UInt32,
+    `str` String,
+    INDEX str_idx str TYPE tokenbf_v1(256, 2, 0) GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity = 1;
+ 
+INSERT INTO test_tokenbf_match.test_tokenbf VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Hello Github'), (4, 'Hello Cloud'), (5, 'OLAP Database');
+
+SELECT '============== SKIP 3 GRANUS ============';
+SELECT '============== Required String: Hello ============';
+SELECT '============== Alternative String: Hello ClickHouse ============';
+SELECT '============== Required String: Hello World ============';
+
+EXPLAIN indexes=1 SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)');
+
+SELECT '';
+SELECT '';
+
+SELECT '============== SKIP 3 GRANUS ============';
+SELECT '============== No Required String ============';
+SELECT '============== Alternative String: ClickHouse ============';
+SELECT '============== Alternative String: World ============';
+
+EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)');
+
+SELECT '';
+SELECT '';
+
+SELECT '============== SKIP 4 GRANUS ============';
+SELECT '============== Required String: OLAP============';
+SELECT '============== No Alternative String============';
+
+EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*');
+
+DROP DATABASE IF EXISTS test_tokenbf_match;

From 761554e86d04ca3d7984037cfba72a34e00b1498 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Mon, 18 Dec 2023 18:08:41 +0800
Subject: [PATCH 013/105] fix test

---
 .../02943_tokenbf_support_match.reference     | 28 ++++++++++-------
 .../02943_tokenbf_support_match.sql           | 30 ++++++++++++-------
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.reference b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
index 7e36857190a..241346c13d7 100644
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.reference
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
@@ -1,7 +1,9 @@
-============== SKIP 3 GRANUS ============
-============== Required String: Hello ============
-============== Alternative String: Hello ClickHouse ============
-============== Required String: Hello World ============
+========================================
+| SKIP 3 GRANUS                        |
+| Required String: Hello               |
+| Alternative String: Hello ClickHouse |
+| Alternative String: Hello World      |
+========================================
 Expression ((Projection + Before ORDER BY))
   ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
   Indexes:
@@ -16,10 +18,12 @@ Expression ((Projection + Before ORDER BY))
       Granules: 2/5
 
 
-============== SKIP 3 GRANUS ============
-============== No Required String ============
-============== Alternative String: ClickHouse ============
-============== Alternative String: World ============
+========================================
+| SKIP 3 GRANUS                        |
+| No Required String                   |
+| Alternative String: ClickHouse       |
+| Alternative String: World            |
+========================================
 Expression ((Projection + Before ORDER BY))
   ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
   Indexes:
@@ -34,9 +38,11 @@ Expression ((Projection + Before ORDER BY))
       Granules: 2/5
 
 
-============== SKIP 4 GRANUS ============
-============== Required String: OLAP============
-============== No Alternative String============
+========================================
+| SKIP 4 GRANUS                        |
+| Required String: OLAP                |
+| No Alternative String                |
+========================================
 Expression ((Projection + Before ORDER BY))
   ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
   Indexes:
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.sql b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
index 078e32ae94c..2a98151624c 100644
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.sql
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
@@ -1,3 +1,5 @@
+-- Tags: no-parallel
+
 DROP DATABASE IF EXISTS test_tokenbf_match;
 
 CREATE DATABASE test_tokenbf_match;
@@ -14,29 +16,35 @@ SETTINGS index_granularity = 1;
  
 INSERT INTO test_tokenbf_match.test_tokenbf VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Hello Github'), (4, 'Hello Cloud'), (5, 'OLAP Database');
 
-SELECT '============== SKIP 3 GRANUS ============';
-SELECT '============== Required String: Hello ============';
-SELECT '============== Alternative String: Hello ClickHouse ============';
-SELECT '============== Required String: Hello World ============';
+SELECT '========================================';
+SELECT '| SKIP 3 GRANUS                        |';
+SELECT '| Required String: Hello               |';
+SELECT '| Alternative String: Hello ClickHouse |';
+SELECT '| Alternative String: Hello World      |';
+SELECT '========================================';
 
 EXPLAIN indexes=1 SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)');
 
 SELECT '';
 SELECT '';
 
-SELECT '============== SKIP 3 GRANUS ============';
-SELECT '============== No Required String ============';
-SELECT '============== Alternative String: ClickHouse ============';
-SELECT '============== Alternative String: World ============';
+SELECT '========================================';
+SELECT '| SKIP 3 GRANUS                        |';
+SELECT '| No Required String                   |';
+SELECT '| Alternative String: ClickHouse       |';
+SELECT '| Alternative String: World            |';
+SELECT '========================================';
 
 EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)');
 
 SELECT '';
 SELECT '';
 
-SELECT '============== SKIP 4 GRANUS ============';
-SELECT '============== Required String: OLAP============';
-SELECT '============== No Alternative String============';
+SELECT '========================================';
+SELECT '| SKIP 4 GRANUS                        |';
+SELECT '| Required String: OLAP                |';
+SELECT '| No Alternative String                |';
+SELECT '========================================';
 
 EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*');
 

From 83d4b729615bc09b5019c164790353e9425891eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 19 Dec 2023 15:01:21 +0800
Subject: [PATCH 014/105] fix test

---
 .../02943_tokenbf_support_match.reference     | 47 ---------------
 .../02943_tokenbf_support_match.sql           | 59 ++++++++++++-------
 2 files changed, 38 insertions(+), 68 deletions(-)

diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.reference b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
index 241346c13d7..d02011eb2a1 100644
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.reference
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
@@ -1,57 +1,10 @@
-========================================
-| SKIP 3 GRANUS                        |
-| Required String: Hello               |
-| Alternative String: Hello ClickHouse |
-| Alternative String: Hello World      |
-========================================
-Expression ((Projection + Before ORDER BY))
-  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
-  Indexes:
-    PrimaryKey
-      Condition: true
-      Parts: 1/1
       Granules: 5/5
-    Skip
-      Name: str_idx
-      Description: tokenbf_v1 GRANULARITY 1
-      Parts: 1/1
       Granules: 2/5
 
 
-========================================
-| SKIP 3 GRANUS                        |
-| No Required String                   |
-| Alternative String: ClickHouse       |
-| Alternative String: World            |
-========================================
-Expression ((Projection + Before ORDER BY))
-  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
-  Indexes:
-    PrimaryKey
-      Condition: true
-      Parts: 1/1
       Granules: 5/5
-    Skip
-      Name: str_idx
-      Description: tokenbf_v1 GRANULARITY 1
-      Parts: 1/1
       Granules: 2/5
 
 
-========================================
-| SKIP 4 GRANUS                        |
-| Required String: OLAP                |
-| No Alternative String                |
-========================================
-Expression ((Projection + Before ORDER BY))
-  ReadFromMergeTree (test_tokenbf_match.test_tokenbf)
-  Indexes:
-    PrimaryKey
-      Condition: true
-      Parts: 1/1
       Granules: 5/5
-    Skip
-      Name: str_idx
-      Description: tokenbf_v1 GRANULARITY 1
-      Parts: 1/1
       Granules: 1/5
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.sql b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
index 2a98151624c..b48eb45c0d0 100644
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.sql
+++ b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
@@ -16,36 +16,53 @@ SETTINGS index_granularity = 1;
  
 INSERT INTO test_tokenbf_match.test_tokenbf VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Hello Github'), (4, 'Hello Cloud'), (5, 'OLAP Database');
 
-SELECT '========================================';
-SELECT '| SKIP 3 GRANUS                        |';
-SELECT '| Required String: Hello               |';
-SELECT '| Alternative String: Hello ClickHouse |';
-SELECT '| Alternative String: Hello World      |';
-SELECT '========================================';
+--SKIP 3 GRANUS
+--Required String: Hello
+--Alternative String: Hello ClickHouse
+--Alternative String: Hello World
+SELECT 
+  *
+FROM
+(
+    EXPLAIN indexes=1
+    SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)')
+)
+WHERE
+  explain like '%Granules%';
 
-EXPLAIN indexes=1 SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)');
 
 SELECT '';
 SELECT '';
 
-SELECT '========================================';
-SELECT '| SKIP 3 GRANUS                        |';
-SELECT '| No Required String                   |';
-SELECT '| Alternative String: ClickHouse       |';
-SELECT '| Alternative String: World            |';
-SELECT '========================================';
 
-EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)');
+--SKIP 3 GRANUS
+--No Required String
+--Alternative String: ClickHouse
+--Alternative String: World
+SELECT
+  *
+FROM
+(
+    EXPLAIN indexes = 1
+    SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)')
+)
+WHERE
+  explain like '%Granules%';
 
 SELECT '';
 SELECT '';
 
-SELECT '========================================';
-SELECT '| SKIP 4 GRANUS                        |';
-SELECT '| Required String: OLAP                |';
-SELECT '| No Alternative String                |';
-SELECT '========================================';
-
-EXPLAIN indexes = 1 SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*');
+--SKIP 4 GRANUS
+--Required String: OLAP
+--No Alternative String
+SELECT
+  *
+FROM
+(
+    EXPLAIN indexes = 1
+    SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*')
+)
+WHERE
+  explain like '%Granules%';
 
 DROP DATABASE IF EXISTS test_tokenbf_match;

From 6df2548417c46023aff87339f53691501380b48a Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 19 Dec 2023 09:11:18 +0000
Subject: [PATCH 015/105] Some minor adjustments

---
 .../MergeTree/MergeTreeIndexFullText.cpp      | 116 +++++++++---------
 ...f_indexes_support_match_function.reference |  26 ++++
 ...ngrambf_indexes_support_match_function.sql | 107 ++++++++++++++++
 .../02943_tokenbf_support_match.reference     |  10 --
 .../02943_tokenbf_support_match.sql           |  68 ----------
 5 files changed, 192 insertions(+), 135 deletions(-)
 create mode 100644 tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
 create mode 100644 tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
 delete mode 100644 tests/queries/0_stateless/02943_tokenbf_support_match.reference
 delete mode 100644 tests/queries/0_stateless/02943_tokenbf_support_match.sql

diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
index 85343aabd50..3dbc4e8a7f1 100644
--- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp
@@ -1,23 +1,23 @@
 #include <Storages/MergeTree/MergeTreeIndexFullText.h>
 
 #include <Columns/ColumnArray.h>
-#include <DataTypes/DataTypesNumber.h>
+#include <Common/OptimizedRegularExpression.h>
+#include <Core/Defines.h>
 #include <DataTypes/DataTypeArray.h>
-#include <IO/WriteHelpers.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/misc.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/MergeTree/RPNBuilder.h>
-#include <Storages/MergeTree/MergeTreeIndexUtils.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTSelectQuery.h>
-#include <Core/Defines.h>
-#include <Common/OptimizedRegularExpression.h>
+#include <Parsers/ASTSubquery.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/MergeTree/MergeTreeIndexUtils.h>
+#include <Storages/MergeTree/RPNBuilder.h>
 
 #include <Poco/Logger.h>
 
@@ -243,20 +243,6 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
 
     /// Check like in KeyCondition.
     std::vector<BoolMask> rpn_stack;
-
-    auto multi_funtion_processor = [&rpn_stack, &granule] (const RPNElement & element)
-    {
-        std::vector<bool> result(element.set_bloom_filters.back().size(), true);
-
-        const auto & bloom_filters = element.set_bloom_filters[0];
-
-        for (size_t row = 0; row < bloom_filters.size(); ++row)
-            result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
-
-        rpn_stack.emplace_back(
-                std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
-    };
-
     for (const auto & element : rpn)
     {
         if (element.function == RPNElement::FUNCTION_UNKNOWN)
@@ -294,17 +280,32 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
         else if (element.function == RPNElement::FUNCTION_MULTI_SEARCH
             || element.function == RPNElement::FUNCTION_HAS_ANY)
         {
-            multi_funtion_processor(element);
+            std::vector<bool> result(element.set_bloom_filters.back().size(), true);
+
+            const auto & bloom_filters = element.set_bloom_filters[0];
+
+            for (size_t row = 0; row < bloom_filters.size(); ++row)
+                result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
+
+            rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
         }
         else if (element.function == RPNElement::FUNCTION_MATCH)
         {
             if (!element.set_bloom_filters.empty())
             {
-                multi_funtion_processor(element);
+                /// Alternative substrings
+                std::vector<bool> result(element.set_bloom_filters.back().size(), true);
+
+                const auto & bloom_filters = element.set_bloom_filters[0];
+
+                for (size_t row = 0; row < bloom_filters.size(); ++row)
+                    result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
+
+                rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
             }
-            // If set_bloom_filters is not empty means we got alternative substring
             else if (element.bloom_filter)
             {
+                /// Required substrings
                 rpn_stack.emplace_back(granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
             }
         }
@@ -535,38 +536,6 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
         return true;
     }
 
-    if (function_name == "match")
-    {
-        out.key_column = *key_index;
-        out.function = RPNElement::FUNCTION_MATCH;
-        out.bloom_filter = std::make_unique<BloomFilter>(params);
-
-        auto & string_view = const_value.get<String>();
-        String required_substring;
-        std::vector<String> alternatives;
-        bool tmp_var;
-        OptimizedRegularExpression::analyze(string_view, required_substring, tmp_var, tmp_var, alternatives);
-
-        if (required_substring.empty() && alternatives.empty())
-            return false;
-
-        if (!alternatives.empty())
-        {
-            std::vector<std::vector<BloomFilter>> bloom_filters;
-            bloom_filters.emplace_back();
-            for (const auto & alternative : alternatives)
-            {
-                bloom_filters.back().emplace_back(params);
-                token_extractor->stringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back());
-            }
-            out.set_bloom_filters = std::move(bloom_filters);
-        }
-        else
-           token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
-
-        return true;
-    }
-
     else if (function_name == "has")
     {
         out.key_column = *key_index;
@@ -654,6 +623,39 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
         out.set_bloom_filters = std::move(bloom_filters);
         return true;
     }
+    else if (function_name == "match")
+    {
+        out.key_column = *key_index;
+        out.function = RPNElement::FUNCTION_MATCH;
+        out.bloom_filter = std::make_unique<BloomFilter>(params);
+
+        auto & value = const_value.get<String>();
+        String required_substring;
+        bool dummy_is_trivial, dummy_required_substring_is_prefix;
+        std::vector<String> alternatives;
+        OptimizedRegularExpression::analyze(value, required_substring, dummy_is_trivial, dummy_required_substring_is_prefix, alternatives);
+
+        if (required_substring.empty() && alternatives.empty())
+            return false;
+
+        /// out.set_bloom_filters means alternatives exist
+        /// out.bloom_filter means required_substring exists
+        if (!alternatives.empty())
+        {
+            std::vector<std::vector<BloomFilter>> bloom_filters;
+            bloom_filters.emplace_back();
+            for (const auto & alternative : alternatives)
+            {
+                bloom_filters.back().emplace_back(params);
+                token_extractor->stringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back());
+            }
+            out.set_bloom_filters = std::move(bloom_filters);
+        }
+        else
+           token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
+
+        return true;
+    }
 
     return false;
 }
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
new file mode 100644
index 00000000000..41ca02e3877
--- /dev/null
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
@@ -0,0 +1,26 @@
+1	Hello ClickHouse
+2	Hello World
+1	Hello ClickHouse
+2	Hello World
+          Granules: 6/6
+          Granules: 2/6
+          Granules: 6/6
+          Granules: 2/6
+---
+1	Hello ClickHouse
+2	Hello World
+6	World Champion
+1	Hello ClickHouse
+2	Hello World
+6	World Champion
+          Granules: 6/6
+          Granules: 3/6
+          Granules: 6/6
+          Granules: 3/6
+---
+5	OLAP Database
+5	OLAP Database
+          Granules: 6/6
+          Granules: 1/6
+          Granules: 6/6
+          Granules: 1/6
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
new file mode 100644
index 00000000000..7378df41b8d
--- /dev/null
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
@@ -0,0 +1,107 @@
+DROP TABLE IF EXISTS tokenbf_tab;
+DROP TABLE IF EXISTS ngrambf_tab;
+
+CREATE TABLE tokenbf_tab
+(
+    id UInt32,
+    str String,
+    INDEX idx str TYPE tokenbf_v1(256, 2, 0)
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity = 1;
+
+CREATE TABLE ngrambf_tab
+(
+    id UInt32,
+    str String,
+    INDEX idx str TYPE ngrambf_v1(3, 256, 2, 0)
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS index_granularity = 1;
+
+INSERT INTO tokenbf_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion');
+INSERT INTO ngrambf_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion');
+
+SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
+SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
+
+-- Skip 2/6 granules
+-- Required string: 'Hello '
+-- Alternatives: 'Hello ClickHouse', 'Hello World'
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+SELECT '---';
+
+SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
+SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
+
+-- Skip 3/6 granules
+-- Required string: -
+-- Alternatives: 'ClickHouse', 'World'
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+SELECT '---';
+
+SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
+SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
+
+-- Skip 5/6 granules
+-- Required string: 'OLAP'
+-- Alternatives: -
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
+)
+WHERE
+  explain LIKE '%Granules: %';
+
+DROP TABLE tokenbf_tab;
+DROP TABLE ngrambf_tab;
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.reference b/tests/queries/0_stateless/02943_tokenbf_support_match.reference
deleted file mode 100644
index d02011eb2a1..00000000000
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.reference
+++ /dev/null
@@ -1,10 +0,0 @@
-      Granules: 5/5
-      Granules: 2/5
-
-
-      Granules: 5/5
-      Granules: 2/5
-
-
-      Granules: 5/5
-      Granules: 1/5
diff --git a/tests/queries/0_stateless/02943_tokenbf_support_match.sql b/tests/queries/0_stateless/02943_tokenbf_support_match.sql
deleted file mode 100644
index b48eb45c0d0..00000000000
--- a/tests/queries/0_stateless/02943_tokenbf_support_match.sql
+++ /dev/null
@@ -1,68 +0,0 @@
--- Tags: no-parallel
-
-DROP DATABASE IF EXISTS test_tokenbf_match;
-
-CREATE DATABASE test_tokenbf_match;
-
-CREATE TABLE test_tokenbf_match.test_tokenbf 
-(
-    `id` UInt32,
-    `str` String,
-    INDEX str_idx str TYPE tokenbf_v1(256, 2, 0) GRANULARITY 1
-)
-ENGINE = MergeTree
-ORDER BY id
-SETTINGS index_granularity = 1;
- 
-INSERT INTO test_tokenbf_match.test_tokenbf VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Hello Github'), (4, 'Hello Cloud'), (5, 'OLAP Database');
-
---SKIP 3 GRANUS
---Required String: Hello
---Alternative String: Hello ClickHouse
---Alternative String: Hello World
-SELECT 
-  *
-FROM
-(
-    EXPLAIN indexes=1
-    SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)')
-)
-WHERE
-  explain like '%Granules%';
-
-
-SELECT '';
-SELECT '';
-
-
---SKIP 3 GRANUS
---No Required String
---Alternative String: ClickHouse
---Alternative String: World
-SELECT
-  *
-FROM
-(
-    EXPLAIN indexes = 1
-    SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)')
-)
-WHERE
-  explain like '%Granules%';
-
-SELECT '';
-SELECT '';
-
---SKIP 4 GRANUS
---Required String: OLAP
---No Alternative String
-SELECT
-  *
-FROM
-(
-    EXPLAIN indexes = 1
-    SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*')
-)
-WHERE
-  explain like '%Granules%';
-
-DROP DATABASE IF EXISTS test_tokenbf_match;

From e71f6893cc96d63c829e6f4f61178c8367dd0063 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 14 Dec 2023 18:01:11 +0100
Subject: [PATCH 016/105] Add brief comment for MergeTreeSequentialSource

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/MergeTreeSequentialSource.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 5075e43448a..ba50447be0f 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -20,7 +20,9 @@ namespace ErrorCodes
 }
 
 
-/// Lightweight (in terms of logic) stream for reading single part from MergeTree
+/// Lightweight (in terms of logic) stream for reading single part from
+/// MergeTree, used for merges and mutations.
+///
 /// NOTE:
 ///  It doesn't filter out rows that are deleted with lightweight deletes.
 ///  Use createMergeTreeSequentialSource filter out those rows.

From 79de5c16c92fc93a2a428aa12b6530e16ff2a7f9 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 14 Dec 2023 17:58:54 +0100
Subject: [PATCH 017/105] Apply all reader settings for merges/mutations

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/MergeTreeSequentialSource.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index ba50447be0f..1e406358277 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -144,10 +144,14 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
         columns_for_reader = data_part->getColumns().addTypes(columns_to_read);
     }
 
-    ReadSettings read_settings;
+    const auto & context = storage.getContext();
+    ReadSettings read_settings = context->getReadSettings();
+    read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
+    /// It does not make sense to use pthread_threadpool for background merges/mutations
+    /// And also to preserve backward compatibility
+    read_settings.local_fs_method = LocalFSReadMethod::pread;
     if (read_with_direct_io)
         read_settings.direct_io_threshold = 1;
-    read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
 
     MergeTreeReaderSettings reader_settings =
     {

From 6ed9b53d1f9f570b19b7b36b7d872ff10f3bca7d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 14 Dec 2023 19:11:59 +0100
Subject: [PATCH 018/105] Refactor test_throttling slightly for upcoming tests

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../{server_overrides.xml => dynamic_overrides.xml}       | 0
 .../configs/{server_backups.xml => static_overrides.xml}  | 0
 tests/integration/test_throttling/test.py                 | 8 ++++----
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename tests/integration/test_throttling/configs/{server_overrides.xml => dynamic_overrides.xml} (100%)
 rename tests/integration/test_throttling/configs/{server_backups.xml => static_overrides.xml} (100%)

diff --git a/tests/integration/test_throttling/configs/server_overrides.xml b/tests/integration/test_throttling/configs/dynamic_overrides.xml
similarity index 100%
rename from tests/integration/test_throttling/configs/server_overrides.xml
rename to tests/integration/test_throttling/configs/dynamic_overrides.xml
diff --git a/tests/integration/test_throttling/configs/server_backups.xml b/tests/integration/test_throttling/configs/static_overrides.xml
similarity index 100%
rename from tests/integration/test_throttling/configs/server_backups.xml
rename to tests/integration/test_throttling/configs/static_overrides.xml
diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py
index 04d02cc859d..31884fad88a 100644
--- a/tests/integration/test_throttling/test.py
+++ b/tests/integration/test_throttling/test.py
@@ -34,8 +34,8 @@ node = cluster.add_instance(
     "node",
     stay_alive=True,
     main_configs=[
-        "configs/server_backups.xml",
-        "configs/server_overrides.xml",
+        "configs/static_overrides.xml",
+        "configs/dynamic_overrides.xml",
         "configs/ssl.xml",
     ],
     user_configs=[
@@ -64,7 +64,7 @@ def revert_config():
         [
             "bash",
             "-c",
-            f"echo '<clickhouse></clickhouse>' > /etc/clickhouse-server/config.d/server_overrides.xml",
+            f"echo '<clickhouse></clickhouse>' > /etc/clickhouse-server/config.d/dynamic_overrides.xml",
         ]
     )
     node.exec_in_container(
@@ -96,7 +96,7 @@ def node_update_config(mode, setting, value=None):
     if mode is None:
         return
     if mode == "server":
-        config_path = "/etc/clickhouse-server/config.d/server_overrides.xml"
+        config_path = "/etc/clickhouse-server/config.d/dynamic_overrides.xml"
         config_content = f"""
         <clickhouse><{setting}>{value}</{setting}></clickhouse>
         """

From 837f4ea676665cda6383fb3b3e3ed04d8560ba76 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 14 Dec 2023 18:30:11 +0100
Subject: [PATCH 019/105] Add ability to throttle merges/mutations

Main motivation was to has an ability to throttle background tasks, to
avoid affecting queries.

To new server settings had been added for this:
- max_mutations_bandwidth_for_server
- max_merges_bandwidth_for_server

Note, that they limit only reading, since usually you will not write
more data then you read, but sometimes it is possible in case of ALTER
UPDATE.

But for now, to keep things simple, I decided to limit this with only
2 settings instead of 4.

Note, that if the write throttling will be needed, then they can use the
same settings, and just create new throttler for write.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Core/ServerSettings.h                     |  2 ++
 src/Interpreters/Context.cpp                  | 19 ++++++++++++
 src/Interpreters/Context.h                    |  3 ++
 src/Interpreters/MutationsInterpreter.cpp     |  1 +
 src/Storages/MergeTree/MergeTask.cpp          |  2 ++
 .../MergeTree/MergeTreeSequentialSource.cpp   | 24 +++++++++++++--
 .../MergeTree/MergeTreeSequentialSource.h     |  8 +++++
 .../configs/static_overrides.xml              |  3 ++
 tests/integration/test_throttling/test.py     | 29 +++++++++++++++++++
 9 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 85e3d33f80b..310b3585eab 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -26,6 +26,8 @@ namespace DB
     M(UInt64, max_active_parts_loading_thread_pool_size, 64, "The number of threads to load active set of data parts (Active ones) at startup.", 0) \
     M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The number of threads to load inactive set of data parts (Outdated ones) at startup.", 0) \
     M(UInt64, max_parts_cleaning_thread_pool_size, 128, "The number of threads for concurrent removal of inactive data parts.", 0) \
+    M(UInt64, max_mutations_bandwidth_for_server, 0, "The maximum read speed of all mutations on server in bytes per second. Zero means unlimited.", 0) \
+    M(UInt64, max_merges_bandwidth_for_server, 0, "The maximum read speed of all merges on server in bytes per second. Zero means unlimited.", 0) \
     M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
     M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
     M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 589d03cc074..746c7706eb4 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -325,6 +325,9 @@ struct ContextSharedPart : boost::noncopyable
 
     mutable ThrottlerPtr backups_server_throttler;          /// A server-wide throttler for BACKUPs
 
+    mutable ThrottlerPtr mutations_throttler;               /// A server-wide throttler for mutations
+    mutable ThrottlerPtr merges_throttler;                  /// A server-wide throttler for merges
+
     MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
     std::unique_ptr<DDLWorker> ddl_worker TSA_GUARDED_BY(mutex); /// Process ddl commands from zk.
     LoadTaskPtr ddl_worker_startup_task;                         /// To postpone `ddl_worker->startup()` after all tables startup
@@ -733,6 +736,12 @@ struct ContextSharedPart : boost::noncopyable
 
         if (auto bandwidth = server_settings.max_backup_bandwidth_for_server)
             backups_server_throttler = std::make_shared<Throttler>(bandwidth);
+
+        if (auto bandwidth = server_settings.max_mutations_bandwidth_for_server)
+            mutations_throttler = std::make_shared<Throttler>(bandwidth);
+
+        if (auto bandwidth = server_settings.max_merges_bandwidth_for_server)
+            merges_throttler = std::make_shared<Throttler>(bandwidth);
     }
 };
 
@@ -2994,6 +3003,16 @@ ThrottlerPtr Context::getBackupsThrottler() const
     return throttler;
 }
 
+ThrottlerPtr Context::getMutationsThrottler() const
+{
+    return shared->mutations_throttler;
+}
+
+ThrottlerPtr Context::getMergesThrottler() const
+{
+    return shared->merges_throttler;
+}
+
 bool Context::hasDistributedDDL() const
 {
     return getConfigRef().has("distributed_ddl");
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 39d2212ce80..a7ff7c270bc 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1324,6 +1324,9 @@ public:
 
     ThrottlerPtr getBackupsThrottler() const;
 
+    ThrottlerPtr getMutationsThrottler() const;
+    ThrottlerPtr getMergesThrottler() const;
+
     /// Kitchen sink
     using ContextData::KitchenSink;
     using ContextData::kitchen_sink;
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index bf50766c165..a6ea03f8a03 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -1280,6 +1280,7 @@ void MutationsInterpreter::Source::read(
         VirtualColumns virtual_columns(std::move(required_columns), part);
 
         createReadFromPartStep(
+            MergeTreeSequentialSourceType::Mutation,
             plan, *data, storage_snapshot, part,
             std::move(virtual_columns.columns_to_read),
             apply_deleted_mask_, filter, context_,
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 8b5e9ba96ee..5592ffd57dc 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -566,6 +566,7 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
     for (size_t part_num = 0; part_num < global_ctx->future_part->parts.size(); ++part_num)
     {
         Pipe pipe = createMergeTreeSequentialSource(
+            MergeTreeSequentialSourceType::Merge,
             *global_ctx->data,
             global_ctx->storage_snapshot,
             global_ctx->future_part->parts[part_num],
@@ -920,6 +921,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
     for (const auto & part : global_ctx->future_part->parts)
     {
         Pipe pipe = createMergeTreeSequentialSource(
+            MergeTreeSequentialSourceType::Merge,
             *global_ctx->data,
             global_ctx->storage_snapshot,
             part,
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 1e406358277..85dbbf87515 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -30,6 +30,7 @@ class MergeTreeSequentialSource : public ISource
 {
 public:
     MergeTreeSequentialSource(
+        MergeTreeSequentialSourceType type,
         const MergeTreeData & storage_,
         const StorageSnapshotPtr & storage_snapshot_,
         MergeTreeData::DataPartPtr data_part_,
@@ -85,6 +86,7 @@ private:
 
 
 MergeTreeSequentialSource::MergeTreeSequentialSource(
+    MergeTreeSequentialSourceType type,
     const MergeTreeData & storage_,
     const StorageSnapshotPtr & storage_snapshot_,
     MergeTreeData::DataPartPtr data_part_,
@@ -152,6 +154,17 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
     read_settings.local_fs_method = LocalFSReadMethod::pread;
     if (read_with_direct_io)
         read_settings.direct_io_threshold = 1;
+    /// Configure throttling
+    switch (type)
+    {
+        case Mutation:
+            read_settings.local_throttler = context->getMutationsThrottler();
+            break;
+        case Merge:
+            read_settings.local_throttler = context->getMergesThrottler();
+            break;
+    }
+    read_settings.remote_throttler = read_settings.local_throttler;
 
     MergeTreeReaderSettings reader_settings =
     {
@@ -244,6 +257,7 @@ MergeTreeSequentialSource::~MergeTreeSequentialSource() = default;
 
 
 Pipe createMergeTreeSequentialSource(
+    MergeTreeSequentialSourceType type,
     const MergeTreeData & storage,
     const StorageSnapshotPtr & storage_snapshot,
     MergeTreeData::DataPartPtr data_part,
@@ -264,7 +278,7 @@ Pipe createMergeTreeSequentialSource(
     if (need_to_filter_deleted_rows && !has_filter_column)
         columns_to_read.emplace_back(filter_column.name);
 
-    auto column_part_source = std::make_shared<MergeTreeSequentialSource>(
+    auto column_part_source = std::make_shared<MergeTreeSequentialSource>(type,
         storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges),
         /*apply_deleted_mask=*/ false, read_with_direct_io, take_column_types_from_storage, quiet);
 
@@ -292,6 +306,7 @@ class ReadFromPart final : public ISourceStep
 {
 public:
     ReadFromPart(
+        MergeTreeSequentialSourceType type_,
         const MergeTreeData & storage_,
         const StorageSnapshotPtr & storage_snapshot_,
         MergeTreeData::DataPartPtr data_part_,
@@ -301,6 +316,7 @@ public:
         ContextPtr context_,
         Poco::Logger * log_)
         : ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)})
+        , type(type_)
         , storage(storage_)
         , storage_snapshot(storage_snapshot_)
         , data_part(std::move(data_part_))
@@ -337,7 +353,7 @@ public:
             }
         }
 
-        auto source = createMergeTreeSequentialSource(
+        auto source = createMergeTreeSequentialSource(type,
             storage,
             storage_snapshot,
             data_part,
@@ -353,6 +369,7 @@ public:
     }
 
 private:
+    MergeTreeSequentialSourceType type;
     const MergeTreeData & storage;
     StorageSnapshotPtr storage_snapshot;
     MergeTreeData::DataPartPtr data_part;
@@ -364,6 +381,7 @@ private:
 };
 
 void createReadFromPartStep(
+    MergeTreeSequentialSourceType type,
     QueryPlan & plan,
     const MergeTreeData & storage,
     const StorageSnapshotPtr & storage_snapshot,
@@ -374,7 +392,7 @@ void createReadFromPartStep(
     ContextPtr context,
     Poco::Logger * log)
 {
-    auto reading = std::make_unique<ReadFromPart>(
+    auto reading = std::make_unique<ReadFromPart>(type,
         storage, storage_snapshot, std::move(data_part),
         std::move(columns_to_read), apply_deleted_mask,
         filter, std::move(context), log);
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h
index 396d3f76886..41def48aab6 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.h
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h
@@ -8,9 +8,16 @@
 namespace DB
 {
 
+enum MergeTreeSequentialSourceType
+{
+    Mutation,
+    Merge,
+};
+
 /// Create stream for reading single part from MergeTree.
 /// If the part has lightweight delete mask then the deleted rows are filtered out.
 Pipe createMergeTreeSequentialSource(
+    MergeTreeSequentialSourceType type,
     const MergeTreeData & storage,
     const StorageSnapshotPtr & storage_snapshot,
     MergeTreeData::DataPartPtr data_part,
@@ -25,6 +32,7 @@ Pipe createMergeTreeSequentialSource(
 class QueryPlan;
 
 void createReadFromPartStep(
+    MergeTreeSequentialSourceType type,
     QueryPlan & plan,
     const MergeTreeData & storage,
     const StorageSnapshotPtr & storage_snapshot,
diff --git a/tests/integration/test_throttling/configs/static_overrides.xml b/tests/integration/test_throttling/configs/static_overrides.xml
index a8c43f8beaf..9f3bad2f882 100644
--- a/tests/integration/test_throttling/configs/static_overrides.xml
+++ b/tests/integration/test_throttling/configs/static_overrides.xml
@@ -31,4 +31,7 @@
         <allowed_disk>default</allowed_disk>
         <allowed_path>/backups/</allowed_path>
     </backups>
+
+    <max_mutations_bandwidth_for_server>1000000</max_mutations_bandwidth_for_server> <!-- 1M -->
+    <max_merges_bandwidth_for_server>1000000</max_merges_bandwidth_for_server> <!-- 1M -->
 </clickhouse>
diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py
index 31884fad88a..c53c2bb1ddf 100644
--- a/tests/integration/test_throttling/test.py
+++ b/tests/integration/test_throttling/test.py
@@ -430,3 +430,32 @@ def test_write_throttling(policy, mode, setting, value, should_took):
     )
     _, took = elapsed(node.query, f"insert into data select * from numbers(1e6)")
     assert_took(took, should_took)
+
+
+def test_max_mutations_bandwidth_for_server():
+    node.query(
+        """
+        drop table if exists data;
+        create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9;
+    """
+    )
+    node.query("insert into data select * from numbers(1e6)")
+    _, took = elapsed(
+        node.query,
+        "alter table data update key = -key where 1 settings mutations_sync = 1",
+    )
+    # reading 1e6*8 bytes with 1M/s bandwith should take (8-1)/1=7 seconds
+    assert_took(took, 7)
+
+
+def test_max_merges_bandwidth_for_server():
+    node.query(
+        """
+        drop table if exists data;
+        create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9;
+    """
+    )
+    node.query("insert into data select * from numbers(1e6)")
+    _, took = elapsed(node.query, "optimize table data final")
+    # reading 1e6*8 bytes with 1M/s bandwith should take (8-1)/1=7 seconds
+    assert_took(took, 7)

From 83f4b7defb0d8acd10fd25d8c1aff7704140801f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=87=8C=E6=B6=9B?= <lingtaolf@gmail.com>
Date: Tue, 26 Dec 2023 15:00:44 +0800
Subject: [PATCH 020/105] rebase master

---
 ...f_indexes_support_match_function.reference | 24 +++++++++----------
 ...ngrambf_indexes_support_match_function.sql |  2 ++
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
index 41ca02e3877..5c6a213a03f 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
@@ -2,10 +2,10 @@
 2	Hello World
 1	Hello ClickHouse
 2	Hello World
-          Granules: 6/6
-          Granules: 2/6
-          Granules: 6/6
-          Granules: 2/6
+            Granules: 6/6
+            Granules: 2/6
+            Granules: 6/6
+            Granules: 2/6
 ---
 1	Hello ClickHouse
 2	Hello World
@@ -13,14 +13,14 @@
 1	Hello ClickHouse
 2	Hello World
 6	World Champion
-          Granules: 6/6
-          Granules: 3/6
-          Granules: 6/6
-          Granules: 3/6
+            Granules: 6/6
+            Granules: 3/6
+            Granules: 6/6
+            Granules: 3/6
 ---
 5	OLAP Database
 5	OLAP Database
-          Granules: 6/6
-          Granules: 1/6
-          Granules: 6/6
-          Granules: 1/6
+            Granules: 6/6
+            Granules: 1/6
+            Granules: 6/6
+            Granules: 1/6
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
index 7378df41b8d..df39be8abd6 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
@@ -1,3 +1,4 @@
+SET allow_experimental_analyzer = 1;
 DROP TABLE IF EXISTS tokenbf_tab;
 DROP TABLE IF EXISTS ngrambf_tab;
 
@@ -85,6 +86,7 @@ SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
 -- Required string: 'OLAP'
 -- Alternatives: -
 
+set allow_experimental_analyzer = 1;
 SELECT *
 FROM
 (

From 547e3ed6c04a400222c7cab6205a1912c7d41760 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Tue, 26 Dec 2023 22:57:31 +0000
Subject: [PATCH 021/105] Add a check for the 'host_name' parameter

---
 src/Interpreters/DDLTask.cpp | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 0164f5668a2..d386ab9a91d 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -220,7 +220,20 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
     bool host_in_hostlist = false;
     std::exception_ptr first_exception = nullptr;
 
-    auto maybe_secure_port = global_context->getTCPPortSecure();
+    const auto maybe_secure_port = global_context->getTCPPortSecure();
+    const auto port = global_context->getTCPPort()
+
+    if (config_host_name)
+    {
+        bool is_local_port = (maybe_secure_port && HostID(*config_host_name, *maybe_secure_port).isLocalAddress(*maybe_secure_port)) ||
+                             HostID(*config_host_name, port).isLocalAddress(port);
+
+        if (!is_local_port)
+            throw Exception(
+                ErrorCodes::DNS_ERROR,
+                "{} is not a local adress. Check parameter 'host_name' in the configuration",
+                *config_host_name)
+    }
 
     for (const HostID & host : entry.hosts)
     {
@@ -229,7 +242,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
             if (config_host_name != host.host_name)
                 continue;
 
-            if (maybe_secure_port != host.port && global_context->getTCPPort() != host.port)
+            if (maybe_secure_port != host.port && port != host.port)
                 continue;
 
             host_in_hostlist = true;
@@ -242,7 +255,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
         {
             /// The port is considered local if it matches TCP or TCP secure port that the server is listening.
             bool is_local_port
-                = (maybe_secure_port && host.isLocalAddress(*maybe_secure_port)) || host.isLocalAddress(global_context->getTCPPort());
+                = (maybe_secure_port && host.isLocalAddress(*maybe_secure_port)) || host.isLocalAddress(port);
 
             if (!is_local_port)
                 continue;

From 3140f869cc05692d6c665b3525efb8b5cd8f0f16 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Wed, 27 Dec 2023 17:15:44 +0100
Subject: [PATCH 022/105] Fix typo

---
 src/Interpreters/DDLTask.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index d386ab9a91d..e7796c5d3a5 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -231,7 +231,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
         if (!is_local_port)
             throw Exception(
                 ErrorCodes::DNS_ERROR,
-                "{} is not a local adress. Check parameter 'host_name' in the configuration",
+                "{} is not a local address. Check parameter 'host_name' in the configuration",
                 *config_host_name)
     }
 

From 87eb18eb748f58e3cfbfe96d03124f3b0e04b7ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 28 Dec 2023 01:02:03 +0100
Subject: [PATCH 023/105] Speed up numbers table function

---
 .../QueryPlan/ReadFromSystemNumbersStep.cpp   | 36 +++++++++++++++----
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
index 41690c1b132..a88203e0fca 100644
--- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
@@ -9,6 +9,7 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/MergeTree/KeyCondition.h>
 #include <Storages/System/StorageSystemNumbers.h>
+#include <Common/TargetSpecific.h>
 #include <Common/typeid_cast.h>
 
 namespace DB
@@ -22,6 +23,27 @@ extern const int TOO_MANY_ROWS;
 namespace
 {
 
+MULTITARGET_FUNCTION_AVX2_SSE42(
+    MULTITARGET_FUNCTION_HEADER(void),
+    iotaImpl, MULTITARGET_FUNCTION_BODY((UInt64 * begin, UInt64 count, UInt64 first_value)
+    {
+        for (UInt64 i = 0; i < count; i++)
+            *(begin + i) = first_value + i;
+    })
+)
+
+static void iota(UInt64 * begin, UInt64 count, UInt64 first_value)
+{
+#if USE_MULTITARGET_CODE
+    if (isArchSupported(TargetArch::AVX2))
+        return iotaImplAVX2(begin, count, first_value);
+
+    if (isArchSupported(TargetArch::SSE42))
+        return iotaImplSSE42(begin, count, first_value);
+#endif
+    return iotaImpl(begin, count, first_value);
+}
+
 class NumbersSource : public ISource
 {
 public:
@@ -43,8 +65,7 @@ protected:
         size_t curr = next; /// The local variable for some reason works faster (>20%) than member of class.
         UInt64 * pos = vec.data(); /// This also accelerates the code.
         UInt64 * end = &vec[block_size];
-        while (pos < end)
-            *pos++ = curr++;
+        iota(pos, end - pos, curr);
 
         next += step;
 
@@ -211,17 +232,18 @@ protected:
                 {
                     auto start_value_64 = static_cast<UInt64>(start_value);
                     auto end_value_64 = static_cast<UInt64>(end_value);
-                    while (start_value_64 < end_value_64)
-                        *(pos++) = start_value_64++;
+                    auto size = end_value_64 - start_value_64;
+                    iota(pos, size, start_value_64);
+                    pos += size;
                 }
             };
 
             if (can_provide > need)
             {
                 UInt64 start_value = first_value(range) + cursor.offset_in_range;
-                UInt64 end_value = start_value + need; /// end_value will never overflow
-                while (start_value < end_value)
-                    *(pos++) = start_value++;
+                /// end_value will never overflow
+                iota(pos, need, start_value);
+                pos += need;
 
                 provided += need;
                 cursor.offset_in_range += need;

From 146de5b220d57fa53f1f9bf2c66742202140d807 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 5 Dec 2023 12:28:23 +0000
Subject: [PATCH 024/105] Ignore MVs with dropped target table during pushing
 to views

---
 .../Transforms/buildPushingToViewsChain.cpp   |  6 +++++-
 ...ropped_target_table_no_exception.reference |  4 ++++
 ...with_dropped_target_table_no_exception.sql | 20 +++++++++++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.reference
 create mode 100644 tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql

diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index f85dc28f4c7..b8aafe305a8 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -316,7 +316,11 @@ Chain buildPushingToViewsChain(
             type = QueryViewsLogElement::ViewType::MATERIALIZED;
             result_chain.addTableLock(lock);
 
-            StoragePtr inner_table = materialized_view->getTargetTable();
+            StoragePtr inner_table = materialized_view->tryGetTargetTable();
+            /// If target table was dropped, ignore this materialized view.
+            if (!inner_table)
+                continue;
+
             auto inner_table_id = inner_table->getStorageID();
             auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr();
 
diff --git a/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.reference b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.reference
new file mode 100644
index 00000000000..8fb8a08e3f9
--- /dev/null
+++ b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.reference
@@ -0,0 +1,4 @@
+42
+42
+42
+42
diff --git a/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql
new file mode 100644
index 00000000000..744b2578617
--- /dev/null
+++ b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql
@@ -0,0 +1,20 @@
+drop table if exists from_table;
+drop table if exists to_table;
+drop table if exists mv;
+
+create table from_table (x UInt32) engine=MergeTree order by x;
+create table to_table (x UInt32) engine=MergeTree order by x;
+create materialized view mv to to_table as select * from from_table;
+
+insert into from_table select 42;
+select * from from_table;
+select * from to_table;
+
+drop table to_table;
+
+insert into from_table select 42;
+select * from from_table;
+
+drop table from_table;
+drop view mv;
+

From e66701dd101da0f446eb9b5b52a9aa48aef42a89 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 28 Dec 2023 15:00:39 +0000
Subject: [PATCH 025/105] Add setting
 ignore_materialized_views_with_dropped_target_table

---
 src/Core/Settings.h                                 |  1 +
 .../Transforms/buildPushingToViewsChain.cpp         | 13 ++++++++++++-
 src/Storages/StorageMaterializedView.h              |  1 +
 ..._race_condition_between_insert_and_droppin_mv.sh |  2 +-
 4 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index d96b1b9fc10..9e485d88772 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -584,6 +584,7 @@ class IColumn;
     M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
     M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
     M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
+    M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped taraget table during pushing to views", 0) \
     M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
     M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
     M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index b8aafe305a8..ab9b3a80f12 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -39,6 +39,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int UNKNOWN_TABLE;
 }
 
 ThreadStatusesHolder::~ThreadStatusesHolder()
@@ -319,7 +320,17 @@ Chain buildPushingToViewsChain(
             StoragePtr inner_table = materialized_view->tryGetTargetTable();
             /// If target table was dropped, ignore this materialized view.
             if (!inner_table)
-                continue;
+            {
+                if (context->getSettingsRef().ignore_materialized_views_with_dropped_target_table)
+                    continue;
+
+                throw Exception(
+                    ErrorCodes::UNKNOWN_TABLE,
+                    "Target table '{}' of view '{}' doesn't exists. To ignore this view use setting "
+                    "ignore_materialized_views_with_dropped_target_table",
+                    materialized_view->getTargetTableId().getFullTableName(),
+                    view_id.getFullTableName());
+            }
 
             auto inner_table_id = inner_table->getStorageID();
             auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr();
diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index f37abdfb1a3..8d7f3e5a9a7 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -71,6 +71,7 @@ public:
 
     StoragePtr getTargetTable() const;
     StoragePtr tryGetTargetTable() const;
+    const StorageID & getTargetTableId() const { return target_table_id; }
 
     /// Get the virtual column of the target table;
     NamesAndTypesList getVirtuals() const override;
diff --git a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh
index 9ce4b459fce..6899b31d1d9 100755
--- a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh
+++ b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh
@@ -14,7 +14,7 @@ function insert {
     offset=500
     while true;
     do
-        ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_race_condition_landing SELECT number, toString(number), toString(number) from system.numbers limit $i, $offset"
+        ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_race_condition_landing SELECT number, toString(number), toString(number) from system.numbers limit $i, $offset settings ignore_materialized_views_with_dropped_target_table=1"
         i=$(( $i + $RANDOM % 100 + 400 ))
     done
 }

From 0faf784d2f39e25396dba32a1667814fdce7f850 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 28 Dec 2023 16:59:57 +0000
Subject: [PATCH 026/105] Add a test for alias in USING clause

---
 .../02955_analyzer_using_functional_args.reference    |  1 +
 .../02955_analyzer_using_functional_args.sql          | 11 +++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
 create mode 100644 tests/queries/0_stateless/02955_analyzer_using_functional_args.sql

diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql
new file mode 100644
index 00000000000..e4c1fd86b09
--- /dev/null
+++ b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql
@@ -0,0 +1,11 @@
+CREATE TABLE t1 (x Int16, y ALIAS x + x * 2) ENGINE=MergeTree() ORDER BY x;
+CREATE TABLE t2 (y Int16, z Int16) ENGINE=MergeTree() ORDER BY y;
+
+INSERT INTO t1 VALUES (1231), (123);
+INSERT INTO t2 VALUES (6666, 48);
+INSERT INTO t2 VALUES (369, 50);
+
+SELECT count() FROM t1 INNER JOIN t2 USING (y);
+
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;

From 8a90f12dc9974c543d65a72653b565758c7e128c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 28 Dec 2023 23:28:26 +0100
Subject: [PATCH 027/105] Speedup MIN/MAX for non numeric types

---
 .../AggregateFunctionMax.cpp                  | 83 ++++++++++++++++++-
 .../AggregateFunctionMin.cpp                  | 83 ++++++++++++++++++-
 2 files changed, 158 insertions(+), 8 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp
index e74224a24c3..a440aedb62c 100644
--- a/src/AggregateFunctions/AggregateFunctionMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -19,7 +19,7 @@ public:
     explicit AggregateFunctionsSingleValueMax(const DataTypePtr & type) : Parent(type) { }
 
     /// Specializations for native numeric types
-    ALWAYS_INLINE inline void addBatchSinglePlace(
+    void addBatchSinglePlace(
         size_t row_begin,
         size_t row_end,
         AggregateDataPtr __restrict place,
@@ -27,7 +27,7 @@ public:
         Arena * arena,
         ssize_t if_argument_pos) const override;
 
-    ALWAYS_INLINE inline void addBatchSinglePlaceNotNull(
+    void addBatchSinglePlaceNotNull(
         size_t row_begin,
         size_t row_end,
         AggregateDataPtr __restrict place,
@@ -74,7 +74,50 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
+    constexpr int nan_direction_hint = 1;
+    auto const & column = *columns[0];
+    if (if_argument_pos >= 0)
+    {
+        size_t index = row_begin;
+        const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
+        while (if_flags[index] == 0 && index < row_end)
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+                index = i;
+        }
+        this->data(place).changeIfGreater(column, index, arena);
+    }
+    else
+    {
+        if (row_begin >= row_end)
+            return;
+
+        /// TODO: Introduce row_begin and row_end to getPermutation
+        if (row_begin != 0 || row_end != column.size())
+        {
+            size_t index = row_begin;
+            for (size_t i = index + 1; i < row_end; i++)
+            {
+                if (column.compareAt(i, index, column, nan_direction_hint) > 0)
+                    index = i;
+            }
+            this->data(place).changeIfGreater(column, index, arena);
+        }
+        else
+        {
+            constexpr IColumn::PermutationSortDirection direction = IColumn::PermutationSortDirection::Descending;
+            constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
+            IColumn::Permutation permutation;
+            constexpr UInt64 limit = 1;
+            column.getPermutation(direction, stability, limit, nan_direction_hint, permutation);
+            this->data(place).changeIfGreater(column, permutation[0], arena);
+        }
+    }
 }
 
 // NOLINTBEGIN(bugprone-macro-parentheses)
@@ -119,7 +162,39 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
+    constexpr int nan_direction_hint = 1;
+    auto const & column = *columns[0];
+    if (if_argument_pos >= 0)
+    {
+        size_t index = row_begin;
+        const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
+        while ((if_flags[index] == 0 || null_map[index] != 0) && (index < row_end))
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((if_flags[i] != 0) && (null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+                index = i;
+        }
+        this->data(place).changeIfGreater(column, index, arena);
+    }
+    else
+    {
+        size_t index = row_begin;
+        while ((null_map[index] != 0) && (index < row_end))
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) > 0))
+                index = i;
+        }
+        this->data(place).changeIfGreater(column, index, arena);
+    }
 }
 
 AggregateFunctionPtr createAggregateFunctionMax(
diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp
index 48758aa74b0..8d5d12fa626 100644
--- a/src/AggregateFunctions/AggregateFunctionMin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -20,7 +20,7 @@ public:
     explicit AggregateFunctionsSingleValueMin(const DataTypePtr & type) : Parent(type) { }
 
     /// Specializations for native numeric types
-    ALWAYS_INLINE inline void addBatchSinglePlace(
+    void addBatchSinglePlace(
         size_t row_begin,
         size_t row_end,
         AggregateDataPtr __restrict place,
@@ -28,7 +28,7 @@ public:
         Arena * arena,
         ssize_t if_argument_pos) const override;
 
-    ALWAYS_INLINE inline void addBatchSinglePlaceNotNull(
+    void addBatchSinglePlaceNotNull(
         size_t row_begin,
         size_t row_end,
         AggregateDataPtr __restrict place,
@@ -75,7 +75,50 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
+    constexpr int nan_direction_hint = 1;
+    auto const & column = *columns[0];
+    if (if_argument_pos >= 0)
+    {
+        size_t index = row_begin;
+        const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
+        while (if_flags[index] == 0 && index < row_end)
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((if_flags[i] != 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+                index = i;
+        }
+        this->data(place).changeIfLess(column, index, arena);
+    }
+    else
+    {
+        if (row_begin >= row_end)
+            return;
+
+        /// TODO: Introduce row_begin and row_end to getPermutation
+        if (row_begin != 0 || row_end != column.size())
+        {
+            size_t index = row_begin;
+            for (size_t i = index + 1; i < row_end; i++)
+            {
+                if (column.compareAt(i, index, column, nan_direction_hint) < 0)
+                    index = i;
+            }
+            this->data(place).changeIfLess(column, index, arena);
+        }
+        else
+        {
+            constexpr IColumn::PermutationSortDirection direction = IColumn::PermutationSortDirection::Ascending;
+            constexpr IColumn::PermutationSortStability stability = IColumn::PermutationSortStability::Unstable;
+            IColumn::Permutation permutation;
+            constexpr UInt64 limit = 1;
+            column.getPermutation(direction, stability, limit, nan_direction_hint, permutation);
+            this->data(place).changeIfLess(column, permutation[0], arena);
+        }
+    }
 }
 
 // NOLINTBEGIN(bugprone-macro-parentheses)
@@ -120,7 +163,39 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
+    constexpr int nan_direction_hint = 1;
+    auto const & column = *columns[0];
+    if (if_argument_pos >= 0)
+    {
+        size_t index = row_begin;
+        const auto & if_flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
+        while ((if_flags[index] == 0 || null_map[index] != 0) && (index < row_end))
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((if_flags[i] != 0) && (null_map[index] == 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+                index = i;
+        }
+        this->data(place).changeIfLess(column, index, arena);
+    }
+    else
+    {
+        size_t index = row_begin;
+        while ((null_map[index] != 0) && (index < row_end))
+            index++;
+        if (index >= row_end)
+            return;
+
+        for (size_t i = index + 1; i < row_end; i++)
+        {
+            if ((null_map[i] == 0) && (column.compareAt(i, index, column, nan_direction_hint) < 0))
+                index = i;
+        }
+        this->data(place).changeIfLess(column, index, arena);
+    }
 }
 
 AggregateFunctionPtr createAggregateFunctionMin(

From e692b0a5bda00d14e109aeee1f1045b553183b10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 13:46:01 +0100
Subject: [PATCH 028/105] Move iota implementation to its own file

---
 src/Common/iota.cpp                           |  6 +++
 src/Common/iota.h                             | 42 +++++++++++++++++++
 .../QueryPlan/ReadFromSystemNumbersStep.cpp   | 29 ++-----------
 3 files changed, 52 insertions(+), 25 deletions(-)
 create mode 100644 src/Common/iota.cpp
 create mode 100644 src/Common/iota.h

diff --git a/src/Common/iota.cpp b/src/Common/iota.cpp
new file mode 100644
index 00000000000..7c0d28a66e0
--- /dev/null
+++ b/src/Common/iota.cpp
@@ -0,0 +1,6 @@
+#include <Common/iota.h>
+
+namespace DB
+{
+template void iota(UInt64 * begin, size_t count, UInt64 first_value);
+}
diff --git a/src/Common/iota.h b/src/Common/iota.h
new file mode 100644
index 00000000000..d992032b77c
--- /dev/null
+++ b/src/Common/iota.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <base/defines.h>
+#include <base/types.h>
+#include <Common/Concepts.h>
+#include <Common/TargetSpecific.h>
+
+/// This is a replacement for std::iota to use dynamic dispatch
+/// Note that is only defined for containers with contiguous memory only
+
+namespace DB
+{
+
+/// Make sure to add any new type to the extern declaration at the end of the file and instantiate it in iota.cpp
+template <typename T>
+concept iota_supported_types = (is_any_of<T, UInt64>);
+
+MULTITARGET_FUNCTION_AVX2_SSE42(
+    MULTITARGET_FUNCTION_HEADER(template <iota_supported_types T> void NO_INLINE),
+    iotaImpl, MULTITARGET_FUNCTION_BODY((T * begin, size_t count, T first_value) /// NOLINT
+    {
+        for (size_t i = 0; i < count; i++)
+            *(begin + i) = first_value + i;
+    })
+)
+
+template <iota_supported_types T>
+void iota(T * begin, size_t count, T first_value)
+{
+#if USE_MULTITARGET_CODE
+    if (isArchSupported(TargetArch::AVX2))
+        return iotaImplAVX2(begin, count, first_value);
+
+    if (isArchSupported(TargetArch::SSE42))
+        return iotaImplSSE42(begin, count, first_value);
+#endif
+    return iotaImpl(begin, count, first_value);
+}
+
+extern template void iota(UInt64 * begin, size_t count, UInt64 first_value);
+
+}
diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
index a88203e0fca..329497d66d3 100644
--- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
@@ -9,7 +9,7 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/MergeTree/KeyCondition.h>
 #include <Storages/System/StorageSystemNumbers.h>
-#include <Common/TargetSpecific.h>
+#include <Common/iota.h>
 #include <Common/typeid_cast.h>
 
 namespace DB
@@ -23,27 +23,6 @@ extern const int TOO_MANY_ROWS;
 namespace
 {
 
-MULTITARGET_FUNCTION_AVX2_SSE42(
-    MULTITARGET_FUNCTION_HEADER(void),
-    iotaImpl, MULTITARGET_FUNCTION_BODY((UInt64 * begin, UInt64 count, UInt64 first_value)
-    {
-        for (UInt64 i = 0; i < count; i++)
-            *(begin + i) = first_value + i;
-    })
-)
-
-static void iota(UInt64 * begin, UInt64 count, UInt64 first_value)
-{
-#if USE_MULTITARGET_CODE
-    if (isArchSupported(TargetArch::AVX2))
-        return iotaImplAVX2(begin, count, first_value);
-
-    if (isArchSupported(TargetArch::SSE42))
-        return iotaImplSSE42(begin, count, first_value);
-#endif
-    return iotaImpl(begin, count, first_value);
-}
-
 class NumbersSource : public ISource
 {
 public:
@@ -65,7 +44,7 @@ protected:
         size_t curr = next; /// The local variable for some reason works faster (>20%) than member of class.
         UInt64 * pos = vec.data(); /// This also accelerates the code.
         UInt64 * end = &vec[block_size];
-        iota(pos, end - pos, curr);
+        iota(pos, static_cast<size_t>(end - pos), curr);
 
         next += step;
 
@@ -233,7 +212,7 @@ protected:
                     auto start_value_64 = static_cast<UInt64>(start_value);
                     auto end_value_64 = static_cast<UInt64>(end_value);
                     auto size = end_value_64 - start_value_64;
-                    iota(pos, size, start_value_64);
+                    iota(pos, static_cast<size_t>(size), start_value_64);
                     pos += size;
                 }
             };
@@ -242,7 +221,7 @@ protected:
             {
                 UInt64 start_value = first_value(range) + cursor.offset_in_range;
                 /// end_value will never overflow
-                iota(pos, need, start_value);
+                iota(pos, static_cast<size_t>(need), start_value);
                 pos += need;
 
                 provided += need;

From bda6104f84bdfce53115a728cd2e9d2f3251bc66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 14:38:22 +0100
Subject: [PATCH 029/105] Replace std::iota with DB::iota where possible

---
 ...ateFunctionLargestTriangleThreeBuckets.cpp |  5 ++--
 src/AggregateFunctions/StatCommon.h           |  3 +-
 src/Analyzer/Passes/FuseFunctionsPass.cpp     |  3 +-
 src/Columns/ColumnObject.cpp                  |  3 +-
 src/Columns/tests/gtest_column_sparse.cpp     |  3 +-
 src/Common/iota.cpp                           | 27 ++++++++++++++++++
 src/Common/iota.h                             | 28 +++----------------
 src/Common/tests/gtest_hash_table.cpp         |  3 +-
 .../HashedDictionaryParallelLoader.h          |  3 +-
 src/Dictionaries/PolygonDictionary.cpp        |  3 +-
 src/Dictionaries/PolygonDictionaryUtils.h     |  3 +-
 src/Functions/array/arrayRandomSample.cpp     |  3 +-
 src/Functions/array/arrayShuffle.cpp          |  3 +-
 src/Functions/translate.cpp                   |  5 ++--
 src/Interpreters/tests/gtest_filecache.cpp    |  3 +-
 15 files changed, 59 insertions(+), 39 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
index 850a7c688ad..d5abdbc12fb 100644
--- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
+++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp
@@ -14,8 +14,9 @@
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <IO/ReadHelpers.h>
-#include <Common/PODArray.h>
 #include <Common/assert_cast.h>
+#include <Common/PODArray.h>
+#include <Common/iota.h>
 #include <base/types.h>
 
 #include <boost/math/distributions/normal.hpp>
@@ -48,7 +49,7 @@ struct LargestTriangleThreeBucketsData : public StatisticalSample<Float64, Float
         // sort the this->x and this->y in ascending order of this->x using index
         std::vector<size_t> index(this->x.size());
 
-        std::iota(index.begin(), index.end(), 0);
+        iota(index.data(), index.size(), size_t(0));
         ::sort(index.begin(), index.end(), [&](size_t i1, size_t i2) { return this->x[i1] < this->x[i2]; });
 
         SampleX temp_x{};
diff --git a/src/AggregateFunctions/StatCommon.h b/src/AggregateFunctions/StatCommon.h
index 23054e25189..8b1395ea95c 100644
--- a/src/AggregateFunctions/StatCommon.h
+++ b/src/AggregateFunctions/StatCommon.h
@@ -7,6 +7,7 @@
 #include <base/sort.h>
 
 #include <Common/ArenaAllocator.h>
+#include <Common/iota.h>
 
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
@@ -30,7 +31,7 @@ std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & value
     const size_t size = values.size();
     /// Save initial positions, than sort indices according to the values.
     std::vector<size_t> indexes(size);
-    std::iota(indexes.begin(), indexes.end(), 0);
+    iota(indexes.data(), indexes.size(), size_t(0));
     std::sort(indexes.begin(), indexes.end(),
         [&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
 
diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp
index e77b3ddcb20..443e13b7d9d 100644
--- a/src/Analyzer/Passes/FuseFunctionsPass.cpp
+++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp
@@ -1,5 +1,6 @@
 #include <Analyzer/Passes/FuseFunctionsPass.h>
 
+#include <Common/iota.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeTuple.h>
@@ -184,7 +185,7 @@ FunctionNodePtr createFusedQuantilesNode(std::vector<QueryTreeNodePtr *> & nodes
     {
         /// Sort nodes and parameters in ascending order of quantile level
         std::vector<size_t> permutation(nodes.size());
-        std::iota(permutation.begin(), permutation.end(), 0);
+        iota(permutation.data(), permutation.size(), size_t(0));
         std::sort(permutation.begin(), permutation.end(), [&](size_t i, size_t j) { return parameters[i].get<Float64>() < parameters[j].get<Float64>(); });
 
         std::vector<QueryTreeNodePtr *> new_nodes;
diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp
index 2052ec3c968..f7176568a1b 100644
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@@ -2,6 +2,7 @@
 #include <Columns/ColumnObject.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnArray.h>
+#include <Common/iota.h>
 #include <DataTypes/ObjectUtils.h>
 #include <DataTypes/getLeastSupertype.h>
 #include <DataTypes/DataTypeNothing.h>
@@ -838,7 +839,7 @@ MutableColumnPtr ColumnObject::cloneResized(size_t new_size) const
 void ColumnObject::getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const
 {
     res.resize(num_rows);
-    std::iota(res.begin(), res.end(), 0);
+    iota(res.data(), res.size(), size_t(0));
 }
 
 void ColumnObject::compareColumn(const IColumn & rhs, size_t rhs_row_num,
diff --git a/src/Columns/tests/gtest_column_sparse.cpp b/src/Columns/tests/gtest_column_sparse.cpp
index c3450ff91b4..02b15a2f5c4 100644
--- a/src/Columns/tests/gtest_column_sparse.cpp
+++ b/src/Columns/tests/gtest_column_sparse.cpp
@@ -1,6 +1,7 @@
 #include <Columns/ColumnSparse.h>
 #include <Columns/ColumnsNumber.h>
 
+#include <Common/iota.h>
 #include <Common/randomSeed.h>
 #include <pcg_random.hpp>
 #include <gtest/gtest.h>
@@ -191,7 +192,7 @@ TEST(ColumnSparse, Permute)
         auto [sparse_src, full_src] = createColumns(n, k);
 
         IColumn::Permutation perm(n);
-        std::iota(perm.begin(), perm.end(), 0);
+        iota(perm.data(), perm.size(), size_t(0));
         std::shuffle(perm.begin(), perm.end(), rng);
 
         auto sparse_dst = sparse_src->permute(perm, limit);
diff --git a/src/Common/iota.cpp b/src/Common/iota.cpp
index 7c0d28a66e0..385d3b22207 100644
--- a/src/Common/iota.cpp
+++ b/src/Common/iota.cpp
@@ -1,6 +1,33 @@
+#include <base/defines.h>
 #include <Common/iota.h>
+#include <Common/TargetSpecific.h>
 
 namespace DB
 {
+
+MULTITARGET_FUNCTION_AVX2_SSE42(
+    MULTITARGET_FUNCTION_HEADER(template <iota_supported_types T> void NO_INLINE),
+    iotaImpl, MULTITARGET_FUNCTION_BODY((T * begin, size_t count, T first_value) /// NOLINT
+    {
+        for (size_t i = 0; i < count; i++)
+            *(begin + i) = static_cast<T>(first_value + i);
+    })
+)
+
+template <iota_supported_types T>
+void iota(T * begin, size_t count, T first_value)
+{
+#if USE_MULTITARGET_CODE
+    if (isArchSupported(TargetArch::AVX2))
+        return iotaImplAVX2(begin, count, first_value);
+
+    if (isArchSupported(TargetArch::SSE42))
+        return iotaImplSSE42(begin, count, first_value);
+#endif
+    return iotaImpl(begin, count, first_value);
+}
+
+template void iota(UInt8 * begin, size_t count, UInt8 first_value);
+template void iota(UInt32 * begin, size_t count, UInt32 first_value);
 template void iota(UInt64 * begin, size_t count, UInt64 first_value);
 }
diff --git a/src/Common/iota.h b/src/Common/iota.h
index d992032b77c..485df4bd4f0 100644
--- a/src/Common/iota.h
+++ b/src/Common/iota.h
@@ -1,9 +1,7 @@
 #pragma once
 
-#include <base/defines.h>
 #include <base/types.h>
 #include <Common/Concepts.h>
-#include <Common/TargetSpecific.h>
 
 /// This is a replacement for std::iota to use dynamic dispatch
 /// Note that is only defined for containers with contiguous memory only
@@ -13,30 +11,12 @@ namespace DB
 
 /// Make sure to add any new type to the extern declaration at the end of the file and instantiate it in iota.cpp
 template <typename T>
-concept iota_supported_types = (is_any_of<T, UInt64>);
+concept iota_supported_types = (is_any_of<T, UInt8, UInt32, UInt64>);
 
-MULTITARGET_FUNCTION_AVX2_SSE42(
-    MULTITARGET_FUNCTION_HEADER(template <iota_supported_types T> void NO_INLINE),
-    iotaImpl, MULTITARGET_FUNCTION_BODY((T * begin, size_t count, T first_value) /// NOLINT
-    {
-        for (size_t i = 0; i < count; i++)
-            *(begin + i) = first_value + i;
-    })
-)
-
-template <iota_supported_types T>
-void iota(T * begin, size_t count, T first_value)
-{
-#if USE_MULTITARGET_CODE
-    if (isArchSupported(TargetArch::AVX2))
-        return iotaImplAVX2(begin, count, first_value);
-
-    if (isArchSupported(TargetArch::SSE42))
-        return iotaImplSSE42(begin, count, first_value);
-#endif
-    return iotaImpl(begin, count, first_value);
-}
+template <iota_supported_types T> void iota(T * begin, size_t count, T first_value);
 
+extern template void iota(UInt8 * begin, size_t count, UInt8 first_value);
+extern template void iota(UInt32 * begin, size_t count, UInt32 first_value);
 extern template void iota(UInt64 * begin, size_t count, UInt64 first_value);
 
 }
diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp
index 72941126cfd..ab7c3872170 100644
--- a/src/Common/tests/gtest_hash_table.cpp
+++ b/src/Common/tests/gtest_hash_table.cpp
@@ -6,6 +6,7 @@
 #include <Common/HashTable/HashMap.h>
 #include <Common/HashTable/HashSet.h>
 #include <Common/HashTable/Hash.h>
+#include <Common/iota.h>
 
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteHelpers.h>
@@ -20,7 +21,7 @@ namespace
 std::vector<UInt64> getVectorWithNumbersUpToN(size_t n)
 {
     std::vector<UInt64> res(n);
-    std::iota(res.begin(), res.end(), 0);
+    iota(res.data(), res.size(), size_t(0));
     return res;
 }
 
diff --git a/src/Dictionaries/HashedDictionaryParallelLoader.h b/src/Dictionaries/HashedDictionaryParallelLoader.h
index 907a987555e..ec892af7e36 100644
--- a/src/Dictionaries/HashedDictionaryParallelLoader.h
+++ b/src/Dictionaries/HashedDictionaryParallelLoader.h
@@ -2,6 +2,7 @@
 
 #include <Dictionaries/IDictionary.h>
 #include <Common/CurrentThread.h>
+#include <Common/iota.h>
 #include <Common/scope_guard_safe.h>
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Common/ThreadPool.h>
@@ -53,7 +54,7 @@ public:
         LOG_TRACE(dictionary.log, "Will load the dictionary using {} threads (with {} backlog)", shards, backlog);
 
         shards_slots.resize(shards);
-        std::iota(shards_slots.begin(), shards_slots.end(), 0);
+        iota(shards_slots.data(), shards_slots.size(), UInt64(0));
 
         for (size_t shard = 0; shard < shards; ++shard)
         {
diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp
index df3ae439b00..6f800bd921d 100644
--- a/src/Dictionaries/PolygonDictionary.cpp
+++ b/src/Dictionaries/PolygonDictionary.cpp
@@ -5,6 +5,7 @@
 
 #include <base/sort.h>
 
+#include <Common/iota.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnTuple.h>
 #include <DataTypes/DataTypeArray.h>
@@ -507,7 +508,7 @@ const IColumn * unrollSimplePolygons(const ColumnPtr & column, Offset & offset)
     if (!ptr_polygons)
         throw Exception(ErrorCodes::TYPE_MISMATCH, "Expected a column containing arrays of points");
     offset.ring_offsets.assign(ptr_polygons->getOffsets());
-    std::iota(offset.polygon_offsets.begin(), offset.polygon_offsets.end(), 1);
+    iota<IColumn::Offsets::value_type>(offset.polygon_offsets.data(), offset.polygon_offsets.size(), IColumn::Offsets::value_type(1));
     offset.multi_polygon_offsets.assign(offset.polygon_offsets);
 
     return ptr_polygons->getDataPtr().get();
diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h
index 0238ef0b2b9..63d97e9dabd 100644
--- a/src/Dictionaries/PolygonDictionaryUtils.h
+++ b/src/Dictionaries/PolygonDictionaryUtils.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <base/types.h>
+#include <Common/iota.h>
 #include <Common/ThreadPool.h>
 #include <Poco/Logger.h>
 
@@ -184,7 +185,7 @@ public:
     {
         setBoundingBox();
         std::vector<size_t> order(polygons.size());
-        std::iota(order.begin(), order.end(), 0);
+        iota(order.data(), order.size(), size_t(0));
         root = makeCell(min_x, min_y, max_x, max_y, order);
     }
 
diff --git a/src/Functions/array/arrayRandomSample.cpp b/src/Functions/array/arrayRandomSample.cpp
index 1e28e089a2a..40344efb077 100644
--- a/src/Functions/array/arrayRandomSample.cpp
+++ b/src/Functions/array/arrayRandomSample.cpp
@@ -1,5 +1,6 @@
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnsNumber.h>
+#include <Common/iota.h>
 #include <Common/randomSeed.h>
 #include <DataTypes/DataTypeArray.h>
 #include <Functions/FunctionFactory.h>
@@ -80,7 +81,7 @@ public:
             const size_t cur_samples = std::min(num_elements, samples);
 
             indices.resize(num_elements);
-            std::iota(indices.begin(), indices.end(), prev_array_offset);
+            iota(indices.data(), indices.size(), prev_array_offset);
             std::shuffle(indices.begin(), indices.end(), rng);
 
             for (UInt64 i = 0; i < cur_samples; i++)
diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp
index faa5ae47b29..10cb51d27d2 100644
--- a/src/Functions/array/arrayShuffle.cpp
+++ b/src/Functions/array/arrayShuffle.cpp
@@ -7,6 +7,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
 #include <Common/assert_cast.h>
+#include <Common/iota.h>
 #include <Common/randomSeed.h>
 #include <Common/shuffle.h>
 #include <Common/typeid_cast.h>
@@ -150,7 +151,7 @@ ColumnPtr FunctionArrayShuffleImpl<Traits>::executeGeneric(const ColumnArray & a
     size_t size = offsets.size();
     size_t nested_size = array.getData().size();
     IColumn::Permutation permutation(nested_size);
-    std::iota(std::begin(permutation), std::end(permutation), 0);
+    iota(permutation.data(), permutation.size(), IColumn::Permutation::value_type(0));
 
     ColumnArray::Offset current_offset = 0;
     for (size_t i = 0; i < size; ++i)
diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp
index 836cb4de2f3..ad5be7d9dfd 100644
--- a/src/Functions/translate.cpp
+++ b/src/Functions/translate.cpp
@@ -3,6 +3,7 @@
 #include <Columns/ColumnConst.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
+#include <Common/iota.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/UTF8Helpers.h>
 #include <Common/HashTable/HashMap.h>
@@ -31,7 +32,7 @@ struct TranslateImpl
         if (map_from.size() != map_to.size())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second and third arguments must be the same length");
 
-        std::iota(map.begin(), map.end(), 0);
+        iota(map.data(), map.size(), UInt8(0));
 
         for (size_t i = 0; i < map_from.size(); ++i)
         {
@@ -129,7 +130,7 @@ struct TranslateUTF8Impl
         if (map_from_size != map_to_size)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second and third arguments must be the same length");
 
-        std::iota(map_ascii.begin(), map_ascii.end(), 0);
+        iota(map_ascii.data(), map_ascii.size(), UInt32(0));
 
         const UInt8 * map_from_ptr = reinterpret_cast<const UInt8 *>(map_from.data());
         const UInt8 * map_from_end = map_from_ptr + map_from.size();
diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp
index 1005e6090b8..3e061db4f56 100644
--- a/src/Interpreters/tests/gtest_filecache.cpp
+++ b/src/Interpreters/tests/gtest_filecache.cpp
@@ -11,6 +11,7 @@
 #include <memory>
 #include <thread>
 
+#include <Common/iota.h>
 #include <Common/randomSeed.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <IO/ReadHelpers.h>
@@ -788,7 +789,7 @@ TEST_F(FileCacheTest, writeBuffer)
 
         /// get random permutation of indexes
         std::vector<size_t> indexes(data.size());
-        std::iota(indexes.begin(), indexes.end(), 0);
+        iota(indexes.data(), indexes.size(), size_t(0));
         std::shuffle(indexes.begin(), indexes.end(), rng);
 
         for (auto i : indexes)

From bfc10bd234f2791fd48d30437e76df7d4a304a44 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 29 Dec 2023 15:16:12 +0100
Subject: [PATCH 030/105] an option to avoid waiting for inactive Replicated db
 replicas

---
 docs/en/operations/settings/settings.md       |  2 +
 src/Core/SettingsEnums.cpp                    |  2 +
 src/Core/SettingsEnums.h                      |  2 +
 src/Interpreters/executeDDLQueryOnCluster.cpp | 98 ++++++++++++++-----
 .../test_replicated_database/test.py          |  2 +-
 .../test.py                                   |  2 +-
 ...distributed_ddl_output_mode_long.reference |  4 +-
 .../02447_drop_database_replica.reference     |  8 ++
 .../02447_drop_database_replica.sh            |  3 +
 9 files changed, 96 insertions(+), 27 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 6e087467bb9..d4ee8106320 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3847,6 +3847,8 @@ Possible values:
 - `none` — Is similar to throw, but distributed DDL query returns no result set.
 - `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts.
 - `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts.
+- `null_status_on_timeout_only_active` — similar to `null_status_on_timeout`, but doesn't wait for inactive replicas of the `Replicated` database
+- `throw_only_active` — similar to `throw`, but doesn't wait for inactive replicas of the `Replicated` database
 
 Default value: `throw`.
 
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index c35e69977ed..2e6bb51176d 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -113,6 +113,8 @@ IMPLEMENT_SETTING_ENUM(DistributedDDLOutputMode, ErrorCodes::BAD_ARGUMENTS,
     {{"none",         DistributedDDLOutputMode::NONE},
      {"throw",    DistributedDDLOutputMode::THROW},
      {"null_status_on_timeout", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT},
+     {"throw_only_active", DistributedDDLOutputMode::THROW_ONLY_ACTIVE},
+     {"null_status_on_timeout_only_active", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE},
      {"never_throw", DistributedDDLOutputMode::NEVER_THROW}})
 
 IMPLEMENT_SETTING_ENUM(StreamingHandleErrorMode, ErrorCodes::BAD_ARGUMENTS,
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 2e71c96b954..0b2d47210a8 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -165,6 +165,8 @@ enum class DistributedDDLOutputMode
     THROW,
     NULL_STATUS_ON_TIMEOUT,
     NEVER_THROW,
+    THROW_ONLY_ACTIVE,
+    NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE,
 };
 
 DECLARE_SETTING_ENUM(DistributedDDLOutputMode)
diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index 9486350a0f6..ba7638cd83f 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -200,8 +200,6 @@ public:
     Status prepare() override;
 
 private:
-    static Strings getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path);
-
     static Block getSampleBlock(ContextPtr context_, bool hosts_to_wait);
 
     Strings getNewAndUpdate(const Strings & current_list_of_finished_hosts);
@@ -228,7 +226,8 @@ private:
     NameSet waiting_hosts;  /// hosts from task host list
     NameSet finished_hosts; /// finished hosts from host list
     NameSet ignoring_hosts; /// appeared hosts that are not in hosts list
-    Strings current_active_hosts; /// Hosts that were in active state at the last check
+    Strings current_active_hosts; /// Hosts that are currently executing the task
+    NameSet offline_hosts;  /// Hosts that are not currently running
     size_t num_hosts_finished = 0;
 
     /// Save the first detected error and throw it at the end of execution
@@ -237,7 +236,10 @@ private:
     Int64 timeout_seconds = 120;
     bool is_replicated_database = false;
     bool throw_on_timeout = true;
+    bool only_running_hosts = false;
+
     bool timeout_exceeded = false;
+    bool stop_waiting_offline_hosts = false;
 };
 
 
@@ -316,6 +318,8 @@ DDLQueryStatusSource::DDLQueryStatusSource(
     {
         waiting_hosts = NameSet(hosts_to_wait->begin(), hosts_to_wait->end());
         is_replicated_database = true;
+        only_running_hosts = output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE ||
+                            output_mode == DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT_ONLY_ACTIVE;
     }
     else
     {
@@ -377,6 +381,38 @@ Chunk DDLQueryStatusSource::generateChunkWithUnfinishedHosts() const
     return Chunk(std::move(columns), unfinished_hosts.size());
 }
 
+static NameSet getOfflineHosts(const String & node_path, const NameSet & hosts_to_wait, const ZooKeeperPtr & zookeeper, Poco::Logger * log)
+{
+    fs::path replicas_path;
+    if (node_path.ends_with('/'))
+        replicas_path = fs::path(node_path).parent_path().parent_path().parent_path() / "replicas";
+    else
+        replicas_path = fs::path(node_path).parent_path().parent_path() / "replicas";
+
+    Strings paths;
+    Strings hosts_array;
+    for (const auto & host : hosts_to_wait)
+    {
+        hosts_array.push_back(host);
+        paths.push_back(replicas_path / host / "active");
+    }
+
+    NameSet offline;
+    auto res = zookeeper->tryGet(paths);
+    for (size_t i = 0; i < res.size(); ++i)
+        if (res[i].error == Coordination::Error::ZNONODE)
+            offline.insert(hosts_array[i]);
+
+    if (offline.size() == hosts_to_wait.size())
+    {
+        /// Avoid reporting that all hosts are offline
+        LOG_WARNING(log, "Did not find active hosts, will wait for all {} hosts. This should not happen often", offline.size());
+        return {};
+    }
+
+    return offline;
+}
+
 Chunk DDLQueryStatusSource::generate()
 {
     bool all_hosts_finished = num_hosts_finished >= waiting_hosts.size();
@@ -398,7 +434,7 @@ Chunk DDLQueryStatusSource::generate()
         if (isCancelled())
             return {};
 
-        if (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds)
+        if (stop_waiting_offline_hosts || (timeout_seconds >= 0 && watch.elapsedSeconds() > timeout_seconds))
         {
             timeout_exceeded = true;
 
@@ -406,7 +442,7 @@ Chunk DDLQueryStatusSource::generate()
             size_t num_active_hosts = current_active_hosts.size();
 
             constexpr auto msg_format = "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. "
-                                                "There are {} unfinished hosts ({} of them are currently active), "
+                                                "There are {} unfinished hosts ({} of them are currently executing the task), "
                                                 "they are going to execute the query in background";
             if (throw_on_timeout)
             {
@@ -425,10 +461,7 @@ Chunk DDLQueryStatusSource::generate()
             return generateChunkWithUnfinishedHosts();
         }
 
-        if (num_hosts_finished != 0 || try_number != 0)
-        {
-            sleepForMilliseconds(std::min<size_t>(1000, 50 * (try_number + 1)));
-        }
+        sleepForMilliseconds(std::min<size_t>(1000, 50 * try_number));
 
         bool node_exists = false;
         Strings tmp_hosts;
@@ -440,9 +473,21 @@ Chunk DDLQueryStatusSource::generate()
             retries_ctl.retryLoop([&]()
             {
                 auto zookeeper = context->getZooKeeper();
-                node_exists = zookeeper->exists(node_path);
-                tmp_hosts = getChildrenAllowNoNode(zookeeper, fs::path(node_path) / node_to_wait);
-                tmp_active_hosts = getChildrenAllowNoNode(zookeeper, fs::path(node_path) / "active");
+                Strings paths = {String(fs::path(node_path) / node_to_wait), String(fs::path(node_path) / "active")};
+                auto res = zookeeper->tryGetChildren(paths);
+                for (size_t i = 0; i < res.size(); ++i)
+                    if (res[i].error != Coordination::Error::ZOK && res[i].error != Coordination::Error::ZNONODE)
+                        throw Coordination::Exception::fromPath(res[i].error, paths[i]);
+
+                if (res[0].error == Coordination::Error::ZNONODE)
+                    node_exists = zookeeper->exists(node_path);
+                else
+                    node_exists = true;
+                tmp_hosts = res[0].names;
+                tmp_active_hosts = res[1].names;
+
+                if (only_running_hosts)
+                    offline_hosts = getOfflineHosts(node_path, waiting_hosts, zookeeper, log);
             });
         }
 
@@ -460,6 +505,17 @@ Chunk DDLQueryStatusSource::generate()
 
         Strings new_hosts = getNewAndUpdate(tmp_hosts);
         ++try_number;
+
+        if (only_running_hosts)
+        {
+            size_t num_finished_or_offline = 0;
+            for (const auto & host : waiting_hosts)
+                num_finished_or_offline += finished_hosts.contains(host) || offline_hosts.contains(host);
+
+            if (num_finished_or_offline == waiting_hosts.size())
+                stop_waiting_offline_hosts = true;
+        }
+
         if (new_hosts.empty())
             continue;
 
@@ -470,7 +526,13 @@ Chunk DDLQueryStatusSource::generate()
         {
             ExecutionStatus status(-1, "Cannot obtain error message");
 
-            if (node_to_wait == "finished")
+            /// Replicated database retries in case of error, it should not write error status.
+#ifdef ABORT_ON_LOGICAL_ERROR
+            bool need_check_status = true;
+#else
+            bool need_check_status = !is_replicated_database;
+#endif
+            if (need_check_status)
             {
                 String status_data;
                 bool finished_exists = false;
@@ -496,7 +558,6 @@ Chunk DDLQueryStatusSource::generate()
             if (status.code != 0 && !first_exception
                 && context->getSettingsRef().distributed_ddl_output_mode != DistributedDDLOutputMode::NEVER_THROW)
             {
-                /// Replicated database retries in case of error, it should not write error status.
                 if (is_replicated_database)
                     throw Exception(ErrorCodes::LOGICAL_ERROR, "There was an error on {}: {} (probably it's a bug)", host_id, status.message);
 
@@ -555,15 +616,6 @@ IProcessor::Status DDLQueryStatusSource::prepare()
         return ISource::prepare();
 }
 
-Strings DDLQueryStatusSource::getChildrenAllowNoNode(const std::shared_ptr<zkutil::ZooKeeper> & zookeeper, const String & node_path)
-{
-    Strings res;
-    Coordination::Error code = zookeeper->tryGetChildren(node_path, res);
-    if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE)
-        throw Coordination::Exception::fromPath(code, node_path);
-    return res;
-}
-
 Strings DDLQueryStatusSource::getNewAndUpdate(const Strings & current_list_of_finished_hosts)
 {
     Strings diff;
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index 3ced82ebb57..1fc3fe37044 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -507,7 +507,7 @@ def test_alters_from_different_replicas(started_cluster):
 
     settings = {"distributed_ddl_task_timeout": 5}
     assert (
-        "There are 1 unfinished hosts (0 of them are currently active)"
+        "There are 1 unfinished hosts (0 of them are currently executing the task"
         in competing_node.query_and_get_error(
             "ALTER TABLE alters_from_different_replicas.concurrent_test ADD COLUMN Added0 UInt32;",
             settings=settings,
diff --git a/tests/integration/test_replicated_database_cluster_groups/test.py b/tests/integration/test_replicated_database_cluster_groups/test.py
index b14581c1fe6..647626d8014 100644
--- a/tests/integration/test_replicated_database_cluster_groups/test.py
+++ b/tests/integration/test_replicated_database_cluster_groups/test.py
@@ -96,7 +96,7 @@ def test_cluster_groups(started_cluster):
     main_node_2.stop_clickhouse()
     settings = {"distributed_ddl_task_timeout": 5}
     assert (
-        "There are 1 unfinished hosts (0 of them are currently active)"
+        "There are 1 unfinished hosts (0 of them are currently executing the task)"
         in main_node_1.query_and_get_error(
             "CREATE TABLE cluster_groups.table_2 (d Date, k UInt64) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);",
             settings=settings,
diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference
index 39979a98bde..b9a66a1e1a9 100644
--- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference
+++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.reference
@@ -3,7 +3,7 @@ Received exception from server:
 Code: 57. Error: Received from localhost:9000. Error: There was an error on [localhost:9000]: Code: 57. Error: Table default.none already exists. (TABLE_ALREADY_EXISTS)
 (query: create table none on cluster test_shard_localhost (n int) engine=Memory;)
 Received exception from server:
-Code: 159. Error: Received from localhost:9000. Error: Watching task <task> is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently active), they are going to execute the query in background. (TIMEOUT_EXCEEDED)
+Code: 159. Error: Received from localhost:9000. Error: Watching task <task> is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently executing the task), they are going to execute the query in background. (TIMEOUT_EXCEEDED)
 (query: drop table if exists none on cluster test_unavailable_shard;)
 throw
 localhost	9000	0		0	0
@@ -12,7 +12,7 @@ Code: 57. Error: Received from localhost:9000. Error: There was an error on [loc
 (query: create table throw on cluster test_shard_localhost (n int) engine=Memory format Null;)
 localhost	9000	0		1	0
 Received exception from server:
-Code: 159. Error: Received from localhost:9000. Error: Watching task <task> is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently active), they are going to execute the query in background. (TIMEOUT_EXCEEDED)
+Code: 159. Error: Received from localhost:9000. Error: Watching task <task> is executing longer than distributed_ddl_task_timeout (=1) seconds. There are 1 unfinished hosts (0 of them are currently executing the task), they are going to execute the query in background. (TIMEOUT_EXCEEDED)
 (query: drop table if exists throw on cluster test_unavailable_shard;)
 null_status_on_timeout
 localhost	9000	0		0	0
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference
index f2b41569540..8ad9008057f 100644
--- a/tests/queries/0_stateless/02447_drop_database_replica.reference
+++ b/tests/queries/0_stateless/02447_drop_database_replica.reference
@@ -12,10 +12,18 @@ t
 2
 rdb_default	1	1	s1	r1	1
 2
+s1	r1	OK	2	0
+s2	r1	QUEUED	2	0
+s1	r2	QUEUED	2	0
+s1	r1	OK	2	0
+s2	r1	QUEUED	2	0
+s1	r2	QUEUED	2	0
 2
 rdb_default	1	1	s1	r1	1
 rdb_default	1	2	s1	r2	0
 2
 2
 t
+t2
+t3
 rdb_default_4	1	1	s1	r1	1
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh
index d5b3ceef46a..388af3fad74 100755
--- a/tests/queries/0_stateless/02447_drop_database_replica.sh
+++ b/tests/queries/0_stateless/02447_drop_database_replica.sh
@@ -32,6 +32,9 @@ $CLICKHOUSE_CLIENT -q "system sync database replica $db"
 $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db' and shard_num=1 and replica_num=1"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it"
 
+$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log"
+$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t3 (n int) engine=Log"
+
 $CLICKHOUSE_CLIENT -q "detach database $db3"
 $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db"
 $CLICKHOUSE_CLIENT -q "attach database $db3" 2>/dev/null

From c7fa93d704805a0432428cb59ce3cf85d2f77f1b Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 11 Dec 2023 14:38:59 +0100
Subject: [PATCH 031/105] Add infrastructure for testing replicated MergeTree
 queue

- replicated_queue_fail_next_entry - to fail next queue entry
- replicated_queue_unfail_entries - to "unfail" all queue entries (if
  any)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/ErrorCodes.cpp                     |  1 +
 src/Common/FailPoint.cpp                      |  2 ++
 .../MergeTree/ReplicatedMergeTreeLogEntry.h   |  3 +++
 src/Storages/StorageReplicatedMergeTree.cpp   | 19 +++++++++++++++++++
 4 files changed, 25 insertions(+)

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 9222a27afdf..577a83e40b9 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -589,6 +589,7 @@
     M(707, GCP_ERROR) \
     M(708, ILLEGAL_STATISTIC) \
     M(709, CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT) \
+    M(710, FAULT_INJECTED) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp
index 9665788dac2..f29aee0cdcc 100644
--- a/src/Common/FailPoint.cpp
+++ b/src/Common/FailPoint.cpp
@@ -34,6 +34,8 @@ static struct InitFiu
 
 #define APPLY_FOR_FAILPOINTS(ONCE, REGULAR, PAUSEABLE_ONCE, PAUSEABLE) \
     ONCE(replicated_merge_tree_commit_zk_fail_after_op) \
+    ONCE(replicated_queue_fail_next_entry) \
+    REGULAR(replicated_queue_unfail_entries) \
     ONCE(replicated_merge_tree_insert_quorum_fail_0) \
     REGULAR(replicated_merge_tree_commit_zk_fail_when_recovering_from_hw_fault) \
     REGULAR(use_delayed_remote_source) \
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
index 4821a80a29b..b3ab3d75dcb 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
@@ -171,6 +171,9 @@ struct ReplicatedMergeTreeLogEntryData
     /// The quorum value (for GET_PART) is a non-zero value when the quorum write is enabled.
     size_t quorum = 0;
 
+    /// Used only in tests for permanent fault injection for particular queue entry.
+    bool fault_injected = false;
+
     /// If this MUTATE_PART entry caused by alter(modify/drop) query.
     bool isAlterMutation() const
     {
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index f143a2ec78b..a68294d3dce 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -18,6 +18,7 @@
 #include <Common/thread_local_rng.h>
 #include <Common/typeid_cast.h>
 #include <Common/ThreadFuzzer.h>
+#include <Common/FailPoint.h>
 
 #include <Core/ServerUUID.h>
 
@@ -147,6 +148,12 @@ namespace CurrentMetrics
 namespace DB
 {
 
+namespace FailPoints
+{
+    extern const char replicated_queue_fail_next_entry[];
+    extern const char replicated_queue_unfail_entries[];
+}
+
 namespace ErrorCodes
 {
     extern const int CANNOT_READ_ALL_DATA;
@@ -191,6 +198,7 @@ namespace ErrorCodes
     extern const int TABLE_IS_DROPPED;
     extern const int CANNOT_BACKUP_TABLE;
     extern const int SUPPORT_IS_DISABLED;
+    extern const int FAULT_INJECTED;
 }
 
 namespace ActionLocks
@@ -1931,6 +1939,17 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo
 
 bool StorageReplicatedMergeTree::executeLogEntry(LogEntry & entry)
 {
+    fiu_do_on(FailPoints::replicated_queue_fail_next_entry,
+    {
+        entry.fault_injected = true;
+    });
+    fiu_do_on(FailPoints::replicated_queue_unfail_entries,
+    {
+        entry.fault_injected = false;
+    });
+    if (entry.fault_injected)
+        throw Exception(ErrorCodes::FAULT_INJECTED, "Injecting fault for log entry {}", entry.getDescriptionForLogs(format_version));
+
     if (entry.type == LogEntry::DROP_RANGE || entry.type == LogEntry::DROP_PART)
     {
         executeDropRange(entry);

From 7efe41357598d007f74d7ecc8b61bebbd1a6cb18 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 11 Dec 2023 17:04:12 +0100
Subject: [PATCH 032/105] Add a test for ALTER_METADATA vs MERGE_PARTS race
 (CHECKSUM_DOESNT_MATCH)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 ...metadata_merge_checksum_mismatch.reference |  1 +
 ..._alter_metadata_merge_checksum_mismatch.sh | 98 +++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
 create mode 100755 tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh

diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
new file mode 100644
index 00000000000..0045aab2e30
--- /dev/null
+++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
@@ -0,0 +1 @@
+all_0_2_2_1	RegularMerge	MergeParts	CHECKSUM_DOESNT_MATCH
diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh
new file mode 100755
index 00000000000..20cffcd9f65
--- /dev/null
+++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh
@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+# Tags: no-parallel
+# Tag no-parallel: failpoint is in use
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+set -e
+
+function wait_part()
+{
+    local table=$1 && shift
+    local part=$1 && shift
+
+    for ((i = 0; i < 100; ++i)); do
+        if [[ $($CLICKHOUSE_CLIENT -q "select count() from system.parts where database = '$CLICKHOUSE_DATABASE' and table = '$table' and active and name = '$part'") -eq 1 ]]; then
+            return
+        fi
+        sleep 0.1
+    done
+
+    echo "Part $table::$part does not appeared" >&2
+}
+
+function restore_failpoints()
+{
+    # restore entry error with failpoints (to avoid endless errors in logs)
+    $CLICKHOUSE_CLIENT -nm -q "
+        system enable failpoint replicated_queue_unfail_entries;
+        system sync replica $failed_replica;
+        system disable failpoint replicated_queue_unfail_entries;
+    "
+}
+trap restore_failpoints EXIT
+
+$CLICKHOUSE_CLIENT -nm -q "
+    drop table if exists data_r1;
+    drop table if exists data_r2;
+
+    create table data_r1 (key Int, value Int, index value_idx value type minmax) engine=ReplicatedMergeTree('/clickhouse/tables/{database}/data', '{table}') order by key;
+    create table data_r2 (key Int, value Int, index value_idx value type minmax) engine=ReplicatedMergeTree('/clickhouse/tables/{database}/data', '{table}') order by key;
+
+    insert into data_r1 (key) values (1); -- part all_0_0_0
+"
+
+# will fail ALTER_METADATA on one of replicas
+$CLICKHOUSE_CLIENT -nm -q "
+    system enable failpoint replicated_queue_fail_next_entry;
+    alter table data_r1 drop index value_idx settings alter_sync=0; -- part all_0_0_0_1
+
+    system sync replica data_r1 pull;
+    system sync replica data_r2 pull;
+"
+
+# replica on which ALTER_METADATA had been succeed
+success_replica=
+for ((i = 0; i < 100; ++i)); do
+    for table in data_r1 data_r2; do
+        mutations="$($CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = '$CLICKHOUSE_DATABASE' and table = '$table' and is_done = 0")"
+        if [[ $mutations -eq 0 ]]; then
+            success_replica=$table
+        fi
+    done
+    if [[ -n $success_replica ]]; then
+        break
+    fi
+    sleep 0.1
+done
+case "$success_replica" in
+    data_r1) failed_replica=data_r2;;
+    data_r2) failed_replica=data_r1;;
+    *) echo "ALTER_METADATA does not succeed on any replica" >&2 && exit 1;;
+esac
+mutations_on_failed_replica="$($CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = '$CLICKHOUSE_DATABASE' and table = '$failed_replica' and is_done = 0")"
+if [[ $mutations_on_failed_replica != 1 ]]; then
+    echo "Wrong number of mutations on failed replica $failed_replica, mutations $mutations_on_failed_replica" >&2
+fi
+
+# This will create MERGE_PARTS, on failed replica it will be fetched from source replica (since it does not have all parts to execute merge)
+$CLICKHOUSE_CLIENT -q "optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1" # part all_0_0_1_1
+
+$CLICKHOUSE_CLIENT -nm -q "
+    insert into $success_replica (key) values (2); -- part all_2_2_0
+    optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1; -- part all_0_2_2_1
+    system sync replica $failed_replica pull;
+"
+
+# Wait for part to be merged on failed replica, that will trigger CHECKSUM_DOESNT_MATCH
+wait_part "$failed_replica" all_0_2_2_1
+
+# Already after part fetched there will CHECKSUM_DOESNT_MATCH in case of ALTER_METADATA re-order, but let's restore fail points and sync failed replica first.
+restore_failpoints
+trap '' EXIT
+
+$CLICKHOUSE_CLIENT -q "system flush logs"
+# check for error "Different number of files: 5 compressed (expected 3) and 2 uncompressed ones (expected 2). (CHECKSUM_DOESNT_MATCH)"
+$CLICKHOUSE_CLIENT -q "select part_name, merge_reason, event_type, errorCodeToName(error) from system.part_log where database = '$CLICKHOUSE_DATABASE' and error != 0 order by event_time_microseconds"

From a12df35be4c6954e683dbea53c00599ca6a96d5d Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 11 Dec 2023 17:47:22 +0100
Subject: [PATCH 033/105] Eliminate possible race between ALTER_METADATA and
 MERGE_PARTS

v2: move metadata version check after checking that the part is not covering part
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/MergeFromLogEntryTask.cpp | 16 ++++++++++++++--
 src/Storages/StorageReplicatedMergeTree.cpp      |  6 ++----
 ...er_metadata_merge_checksum_mismatch.reference |  1 -
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
index 9be31859a19..3f0b8c8b247 100644
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@@ -43,6 +43,8 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
     LOG_TRACE(log, "Executing log entry to merge parts {} to {}",
         fmt::join(entry.source_parts, ", "), entry.new_part_name);
 
+    StorageMetadataPtr metadata_snapshot = storage.getInMemoryMetadataPtr();
+    int32_t metadata_version = metadata_snapshot->getMetadataVersion();
     const auto storage_settings_ptr = storage.getSettings();
 
     if (storage_settings_ptr->always_fetch_merged_part)
@@ -129,6 +131,18 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
             };
         }
 
+        int32_t part_metadata_version = source_part_or_covering->getMetadataVersion();
+        if (part_metadata_version > metadata_version)
+        {
+            LOG_DEBUG(log, "Source part metadata version {} is newer then the table metadata version {}. ALTER_METADATA is still in progress.",
+                part_metadata_version, metadata_version);
+            return PrepareResult{
+                .prepared_successfully = false,
+                .need_to_check_missing_part_in_fetch = false,
+                .part_log_writer = {}
+            };
+        }
+
         parts.push_back(source_part_or_covering);
     }
 
@@ -176,8 +190,6 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
     /// It will live until the whole task is being destroyed
     table_lock_holder = storage.lockForShare(RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations);
 
-    StorageMetadataPtr metadata_snapshot = storage.getInMemoryMetadataPtr();
-
     auto future_merged_part = std::make_shared<FutureMergedMutatedPart>(parts, entry.new_part_format);
     if (future_merged_part->name != entry.new_part_name)
     {
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index a68294d3dce..5233393a11f 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1745,14 +1745,12 @@ bool StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(
 
         if (replica_part_header.getColumnsHash() != local_part_header.getColumnsHash())
         {
-            /// Currently there are two (known) cases when it may happen:
+            /// Currently there are only one (known) cases when it may happen:
             ///  - KILL MUTATION query had removed mutation before all replicas have executed assigned MUTATE_PART entries.
             ///    Some replicas may skip this mutation and update part version without actually applying any changes.
             ///    It leads to mismatching checksum if changes were applied on other replicas.
-            ///  - ALTER_METADATA and MERGE_PARTS were reordered on some replicas.
-            ///    It may lead to different number of columns in merged parts on these replicas.
             throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Part {} from {} has different columns hash "
-                            "(it may rarely happen on race condition with KILL MUTATION or ALTER COLUMN).", part_name, replica);
+                            "(it may rarely happen on race condition with KILL MUTATION).", part_name, replica);
         }
 
         replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true);
diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
index 0045aab2e30..e69de29bb2d 100644
--- a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
+++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.reference
@@ -1 +0,0 @@
-all_0_2_2_1	RegularMerge	MergeParts	CHECKSUM_DOESNT_MATCH

From 5521e5d9b16a7527d81cf97742c548570769d143 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 29 Dec 2023 15:58:01 +0000
Subject: [PATCH 034/105] Refactor StorageHDFS and StorageFile virtual columns
 filtering

---
 src/Storages/HDFS/StorageHDFS.cpp   | 185 ++++++++++++++++++++++++----
 src/Storages/HDFS/StorageHDFS.h     |  12 +-
 src/Storages/StorageFile.cpp        | 158 ++++++++++++++++++++----
 src/Storages/StorageFile.h          |  16 ++-
 src/Storages/VirtualColumnUtils.cpp |  36 ++++++
 src/Storages/VirtualColumnUtils.h   |  19 +++
 6 files changed, 369 insertions(+), 57 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index fdbb5e9f171..9d719413c8d 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -15,6 +15,8 @@
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 #include <Processors/Transforms/ExtractColumnsTransform.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <IO/WriteHelpers.h>
 #include <IO/CompressionMethod.h>
@@ -408,6 +410,35 @@ ColumnsDescription StorageHDFS::getTableStructureFromData(
 class HDFSSource::DisclosedGlobIterator::Impl
 {
 public:
+
+    Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+    {
+        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
+        uris = getPathsList(path_from_uri, uri_without_path, context);
+        ActionsDAGPtr filter_dag;
+        if (!uris.empty())
+             filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, uris[0].path);
+
+        if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(uris.size());
+            for (const auto & path_with_info : uris)
+                paths.push_back(path_with_info.path);
+
+            VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context);
+        }
+        auto file_progress_callback = context->getFileProgressCallback();
+
+        for (auto & elem : uris)
+        {
+            elem.path = uri_without_path + elem.path;
+            if (file_progress_callback && elem.info)
+                file_progress_callback(FileProgress(0, elem.info->size));
+        }
+        uris_iter = uris.begin();
+    }
+
     Impl(const String & uri, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     {
         const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
@@ -456,21 +487,21 @@ private:
 class HDFSSource::URISIterator::Impl : WithContext
 {
 public:
-    explicit Impl(const std::vector<String> & uris_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context_)
+    explicit Impl(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_)
         : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback())
     {
-        ASTPtr filter_ast;
+        ActionsDAGPtr filter_dag;
         if (!uris.empty())
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, getPathFromUriAndUriWithoutPath(uris[0]).first, getContext());
+            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, getPathFromUriAndUriWithoutPath(uris[0]).first);
 
-        if (filter_ast)
+        if (filter_dag)
         {
             std::vector<String> paths;
             paths.reserve(uris.size());
             for (const auto & uri : uris)
                 paths.push_back(getPathFromUriAndUriWithoutPath(uri).first);
 
-            VirtualColumnUtils::filterByPathOrFile(uris, paths, query, virtual_columns, getContext(), filter_ast);
+            VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext());
         }
 
         if (!uris.empty())
@@ -520,13 +551,16 @@ private:
 HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(uri, query, virtual_columns, context)) {}
 
+HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+    : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(uri, predicate, virtual_columns, context)) {}
+
 StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next()
 {
     return pimpl->next();
 }
 
-HDFSSource::URISIterator::URISIterator(const std::vector<String> & uris_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    : pimpl(std::make_shared<HDFSSource::URISIterator::Impl>(uris_, query, virtual_columns, context))
+HDFSSource::URISIterator::URISIterator(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+    : pimpl(std::make_shared<HDFSSource::URISIterator::Impl>(uris_, predicate, virtual_columns, context))
 {
 }
 
@@ -541,8 +575,8 @@ HDFSSource::HDFSSource(
     ContextPtr context_,
     UInt64 max_block_size_,
     std::shared_ptr<IteratorWrapper> file_iterator_,
-    bool need_only_count_,
-    const SelectQueryInfo & query_info_)
+    bool need_only_count_)
+    //const SelectQueryInfo & query_info_)
     : ISource(info.source_header, false)
     , WithContext(context_)
     , storage(std::move(storage_))
@@ -553,7 +587,7 @@ HDFSSource::HDFSSource(
     , file_iterator(file_iterator_)
     , columns_description(info.columns_description)
     , need_only_count(need_only_count_)
-    , query_info(query_info_)
+    //, query_info(query_info_)
 {
     initialize();
 }
@@ -843,7 +877,82 @@ bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const
     return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_);
 }
 
-Pipe StorageHDFS::read(
+class ReadFromHDFS : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromHDFS"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromHDFS(
+        Block sample_block,
+        std::vector<String> uris_,
+        bool distributed_processing_,
+        NamesAndTypesList virtual_columns_,
+        bool is_path_with_globs_,
+        ReadFromFormatInfo info_,
+        bool need_only_count_,
+        std::shared_ptr<StorageHDFS> storage_,
+        // StorageSnapshotPtr storage_snapshot_,
+        // const StorageEmbeddedRocksDB & storage_,
+        // SelectQueryInfo query_info_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , uris(std::move(uris_))
+        , distributed_processing(distributed_processing_)
+        , virtual_columns(std::move(virtual_columns_))
+        , is_path_with_globs(is_path_with_globs_)
+        , info(std::move(info_))
+        , need_only_count(need_only_count_)
+        , storage(std::move(storage_))
+        // , storage_snapshot(std::move(storage_snapshot_))
+        // , storage(storage_)
+        // , query_info(std::move(query_info_))
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+    {
+    }
+
+private:
+    std::vector<String> uris;
+    const bool distributed_processing;
+    NamesAndTypesList virtual_columns;
+    bool is_path_with_globs;
+    ReadFromFormatInfo info;
+    const bool need_only_count;
+    std::shared_ptr<StorageHDFS> storage;
+
+    // StorageSnapshotPtr storage_snapshot;
+    // const StorageEmbeddedRocksDB & storage;
+    // SelectQueryInfo query_info;
+    ContextPtr context;
+
+    size_t max_block_size;
+    size_t num_streams;
+
+    std::shared_ptr<HDFSSource::IteratorWrapper> iterator_wrapper;
+
+    // FieldVectorPtr keys;
+    // bool all_scan = false;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+void ReadFromHDFS::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void StorageHDFS::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -852,18 +961,44 @@ Pipe StorageHDFS::read(
     size_t max_block_size,
     size_t num_streams)
 {
-    std::shared_ptr<HDFSSource::IteratorWrapper> iterator_wrapper{nullptr};
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), virtual_columns);
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && context_->getSettingsRef().optimize_count_from_files;
+
+    auto this_ptr = std::static_pointer_cast<StorageHDFS>(shared_from_this());
+
+    auto reading = std::make_unique<ReadFromHDFS>(
+        read_from_format_info.source_header,
+        uris,
+        distributed_processing,
+        virtual_columns,
+        is_path_with_globs,
+        std::move(read_from_format_info),
+        need_only_count,
+        std::move(this_ptr),
+        context_,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (iterator_wrapper)
+        return;
+
     if (distributed_processing)
     {
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>(
-            [callback = context_->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo {
+            [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo {
                 return StorageHDFS::PathWithInfo{callback(), std::nullopt};
         });
     }
     else if (is_path_with_globs)
     {
         /// Iterate through disclosed globs and make a source for each file
-        auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uris[0], query_info.query, virtual_columns, context_);
+        auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uris[0], predicate, virtual_columns, context);
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([glob_iterator]()
         {
             return glob_iterator->next();
@@ -871,31 +1006,31 @@ Pipe StorageHDFS::read(
     }
     else
     {
-        auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(uris, query_info.query, virtual_columns, context_);
+        auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(uris, predicate, virtual_columns, context);
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([uris_iterator]()
         {
             return uris_iterator->next();
         });
     }
+}
 
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), getVirtuals());
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && context_->getSettingsRef().optimize_count_from_files;
+void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    createIterator(nullptr);
 
     Pipes pipes;
-    auto this_ptr = std::static_pointer_cast<StorageHDFS>(shared_from_this());
     for (size_t i = 0; i < num_streams; ++i)
     {
         pipes.emplace_back(std::make_shared<HDFSSource>(
-            read_from_format_info,
-            this_ptr,
-            context_,
+            info,
+            storage,
+            context,
             max_block_size,
             iterator_wrapper,
-            need_only_count,
-            query_info));
+            need_only_count)); //,
+            //query_info));
     }
-    return Pipe::unitePipes(std::move(pipes));
+    pipeline.init(Pipe::unitePipes(std::move(pipes)));
 }
 
 SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/)
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index 18eeb787d77..cee1b674eb7 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -51,7 +51,8 @@ public:
 
     String getName() const override { return "HDFS"; }
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
@@ -115,6 +116,7 @@ public:
     {
         public:
             DisclosedGlobIterator(const String & uri_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
+            DisclosedGlobIterator(const String & uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
             StorageHDFS::PathWithInfo next();
         private:
             class Impl;
@@ -125,7 +127,7 @@ public:
     class URISIterator
     {
         public:
-            URISIterator(const std::vector<String> & uris_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
+            URISIterator(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
             StorageHDFS::PathWithInfo next();
         private:
             class Impl;
@@ -142,8 +144,8 @@ public:
         ContextPtr context_,
         UInt64 max_block_size_,
         std::shared_ptr<IteratorWrapper> file_iterator_,
-        bool need_only_count_,
-        const SelectQueryInfo & query_info_);
+        bool need_only_count_);
+        //const SelectQueryInfo & query_info_);
 
     String getName() const override;
 
@@ -162,7 +164,7 @@ private:
     ColumnsDescription columns_description;
     bool need_only_count;
     size_t total_rows_in_file = 0;
-    SelectQueryInfo query_info;
+    //SelectQueryInfo query_info;
 
     std::unique_ptr<ReadBuffer> read_buf;
     std::shared_ptr<IInputFormat> input_format;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 25bb6691ff6..b040f452410 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -9,6 +9,7 @@
 
 #include <Interpreters/Context.h>
 #include <Interpreters/evaluateConstantExpression.h>
+#include <Interpreters/InterpreterSelectQuery.h>
 
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTIdentifier_fwd.h>
@@ -44,6 +45,8 @@
 #include <Common/filesystemHelpers.h>
 #include <Common/logger_useful.h>
 #include <Common/ProfileEvents.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@@ -947,6 +950,23 @@ StorageFileSource::FilesIterator::FilesIterator(
         VirtualColumnUtils::filterByPathOrFile(files, files, query, virtual_columns, context_, filter_ast);
 }
 
+StorageFileSource::FilesIterator::FilesIterator(
+    const Strings & files_,
+    std::optional<StorageFile::ArchiveInfo> archive_info_,
+    const ActionsDAG::Node * predicate,
+    const NamesAndTypesList & virtual_columns,
+    ContextPtr context_,
+    bool distributed_processing_)
+    : files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_), context(context_)
+{
+    ActionsDAGPtr filter_dag;
+    if (!distributed_processing && !archive_info && !files.empty() && !files[0].empty())
+        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, files[0]);
+
+    if (filter_dag)
+        VirtualColumnUtils::filterByPathOrFile(files, files, filter_dag, virtual_columns, context_);
+}
+
 String StorageFileSource::FilesIterator::next()
 {
     if (distributed_processing)
@@ -974,16 +994,13 @@ const String & StorageFileSource::FilesIterator::getFileNameInArchive()
 StorageFileSource::StorageFileSource(
     const ReadFromFormatInfo & info,
     std::shared_ptr<StorageFile> storage_,
-    const StorageSnapshotPtr & storage_snapshot_,
     ContextPtr context_,
-    const SelectQueryInfo & query_info_,
     UInt64 max_block_size_,
     FilesIteratorPtr files_iterator_,
     std::unique_ptr<ReadBuffer> read_buf_,
     bool need_only_count_)
     : SourceWithKeyCondition(info.source_header, false)
     , storage(std::move(storage_))
-    , storage_snapshot(storage_snapshot_)
     , files_iterator(std::move(files_iterator_))
     , read_buf(std::move(read_buf_))
     , columns_description(info.columns_description)
@@ -991,7 +1008,6 @@ StorageFileSource::StorageFileSource(
     , requested_virtual_columns(info.requested_virtual_columns)
     , block_for_format(info.format_header)
     , context(context_)
-    , query_info(query_info_)
     , max_block_size(max_block_size_)
     , need_only_count(need_only_count_)
 {
@@ -1322,14 +1338,87 @@ std::optional<size_t> StorageFileSource::tryGetNumRowsFromCache(const String & p
     return schema_cache.tryGetNumRows(key, get_last_mod_time);
 }
 
-Pipe StorageFile::read(
+class ReadFromFile : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromFile"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromFile(
+        Block sample_block,
+        std::shared_ptr<StorageFile> storage_,
+        std::vector<std::string> paths_,
+        std::optional<StorageFile::ArchiveInfo> archive_info_,
+        NamesAndTypesList virtual_columns_,
+        bool distributed_processing_,
+        ReadFromFormatInfo info_,
+        const bool need_only_count_,
+        size_t total_bytes_to_read_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , storage(std::move(storage_))
+        , paths(std::move(paths_))
+        , archive_info(std::move(archive_info_))
+        , virtual_columns(std::move(virtual_columns_))
+        , distributed_processing(distributed_processing_)
+        , info(std::move(info_))
+        , need_only_count(need_only_count_)
+        , total_bytes_to_read(total_bytes_to_read_)
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , max_num_streams(num_streams_)
+    {
+    }
+
+private:
+    std::shared_ptr<StorageFile> storage;
+
+    std::vector<std::string> paths;
+    std::optional<StorageFile::ArchiveInfo> archive_info;
+
+    NamesAndTypesList virtual_columns;
+    const bool distributed_processing;
+
+    ReadFromFormatInfo info;
+    const bool need_only_count;
+
+    size_t total_bytes_to_read;
+
+    ContextPtr context;
+
+    size_t max_block_size;
+    const size_t max_num_streams;
+
+    std::shared_ptr<StorageFileSource::FilesIterator> files_iterator;
+
+    // FieldVectorPtr keys;
+    // bool all_scan = false;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+void ReadFromFile::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void StorageFile::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
     ContextPtr context,
     QueryProcessingStage::Enum /*processed_stage*/,
     size_t max_block_size,
-    const size_t max_num_streams)
+    size_t num_streams)
 {
     if (use_table_fd)
     {
@@ -1346,17 +1435,48 @@ Pipe StorageFile::read(
 
         if (p->size() == 1 && !fs::exists(p->at(0)))
         {
-            if (context->getSettingsRef().engine_file_empty_if_not_exists)
-                return Pipe(std::make_shared<NullSource>(storage_snapshot->getSampleBlockForColumns(column_names)));
-            else
+            if (!context->getSettingsRef().engine_file_empty_if_not_exists)
                 throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", p->at(0));
+
+            auto header = storage_snapshot->getSampleBlockForColumns(column_names);
+            InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, context);
+            return;
         }
     }
 
-    auto files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, archive_info, query_info.query, virtual_columns, context, distributed_processing);
-
     auto this_ptr = std::static_pointer_cast<StorageFile>(shared_from_this());
 
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context), getVirtuals());
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && context->getSettingsRef().optimize_count_from_files;
+
+    auto reading = std::make_unique<ReadFromFile>(
+        read_from_format_info.source_header,
+        std::move(this_ptr),
+        paths,
+        archive_info,
+        virtual_columns,
+        distributed_processing,
+        std::move(read_from_format_info),
+        need_only_count,
+        total_bytes_to_read,
+        context,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromFile::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (files_iterator)
+        return;
+
+    files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, archive_info, predicate, virtual_columns, context, distributed_processing);
+}
+
+void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
     size_t num_streams = max_num_streams;
 
     size_t files_to_read = 0;
@@ -1377,10 +1497,6 @@ Pipe StorageFile::read(
     if (progress_callback && !archive_info)
         progress_callback(FileProgress(0, total_bytes_to_read));
 
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context), getVirtuals());
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && context->getSettingsRef().optimize_count_from_files;
-
     for (size_t i = 0; i < num_streams; ++i)
     {
         /// In case of reading from fd we have to check whether we have already created
@@ -1388,22 +1504,20 @@ Pipe StorageFile::read(
         /// If yes, then we should use it in StorageFileSource. Atomic bool flag is needed
         /// to prevent data race in case of parallel reads.
         std::unique_ptr<ReadBuffer> read_buffer;
-        if (has_peekable_read_buffer_from_fd.exchange(false))
-            read_buffer = std::move(peekable_read_buffer_from_fd);
+        if (storage->has_peekable_read_buffer_from_fd.exchange(false))
+            read_buffer = std::move(storage->peekable_read_buffer_from_fd);
 
         pipes.emplace_back(std::make_shared<StorageFileSource>(
-            read_from_format_info,
-            this_ptr,
-            storage_snapshot,
+            info,
+            storage,
             context,
-            query_info,
             max_block_size,
             files_iterator,
             std::move(read_buffer),
             need_only_count));
     }
 
-    return Pipe::unitePipes(std::move(pipes));
+    pipeline.init(Pipe::unitePipes(std::move(pipes)));
 }
 
 
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index 1fd3f2e0edf..ecb9e01b862 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -53,7 +53,8 @@ public:
 
     std::string getName() const override { return "File"; }
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
@@ -137,6 +138,7 @@ public:
 protected:
     friend class StorageFileSource;
     friend class StorageFileSink;
+    friend class ReadFromFile;
 
 private:
     void setStorageMetadata(CommonArguments args);
@@ -199,6 +201,14 @@ public:
             ContextPtr context_,
             bool distributed_processing_ = false);
 
+        explicit FilesIterator(
+            const Strings & files_,
+            std::optional<StorageFile::ArchiveInfo> archive_info_,
+            const ActionsDAG::Node * predicate,
+            const NamesAndTypesList & virtual_columns,
+            ContextPtr context_,
+            bool distributed_processing_ = false);
+
         String next();
 
         bool isReadFromArchive() const
@@ -234,9 +244,7 @@ private:
     StorageFileSource(
         const ReadFromFormatInfo & info,
         std::shared_ptr<StorageFile> storage_,
-        const StorageSnapshotPtr & storage_snapshot_,
         ContextPtr context_,
-        const SelectQueryInfo & query_info_,
         UInt64 max_block_size_,
         FilesIteratorPtr files_iterator_,
         std::unique_ptr<ReadBuffer> read_buf_,
@@ -269,7 +277,6 @@ private:
     std::optional<size_t> tryGetNumRowsFromCache(const String & path, time_t last_mod_time) const;
 
     std::shared_ptr<StorageFile> storage;
-    StorageSnapshotPtr storage_snapshot;
     FilesIteratorPtr files_iterator;
     String current_path;
     std::optional<size_t> current_file_size;
@@ -290,7 +297,6 @@ private:
     Block block_for_format;
 
     ContextPtr context;    /// TODO Untangle potential issues with context lifetime.
-    SelectQueryInfo query_info;
     UInt64 max_block_size;
 
     bool finished_generate = false;
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index aed06fb0540..7690e160255 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -390,6 +390,42 @@ static void addPathAndFileToVirtualColumns(Block & block, const String & path, s
     block.getByName("_idx").column->assumeMutableRef().insert(idx);
 }
 
+ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path_example)
+{
+    if (!predicate || virtual_columns.empty())
+        return {};
+
+    Block block;
+    for (const auto & column : virtual_columns)
+    {
+        if (column.name == "_file" || column.name == "_path")
+            block.insert({column.type->createColumn(), column.type, column.name});
+    }
+    /// Create a block with one row to construct filter
+    /// Append "idx" column as the filter result
+    block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
+    addPathAndFileToVirtualColumns(block, path_example, 0);
+    return splitFilterDagForAllowedInputs(predicate, block);
+}
+
+ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+{
+    Block block;
+    for (const auto & column : virtual_columns)
+    {
+        if (column.name == "_file" || column.name == "_path")
+            block.insert({column.type->createColumn(), column.type, column.name});
+    }
+    block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
+
+    for (size_t i = 0; i != paths.size(); ++i)
+        addPathAndFileToVirtualColumns(block, paths[i], i);
+
+    filterBlockWithDAG(dag, block, context);
+
+    return block.getByName("_idx").column;
+}
+
 ASTPtr createPathAndFileFilterAst(const ASTPtr & query, const NamesAndTypesList & virtual_columns, const String & path_example, const ContextPtr & context)
 {
     if (!query || virtual_columns.empty())
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index e22b9742888..4f9636b4213 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -77,6 +77,25 @@ void filterByPathOrFile(std::vector<T> & sources, const std::vector<String> & pa
     sources = std::move(filtered_sources);
 }
 
+ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path_example);
+
+ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
+
+template <typename T>
+void filterByPathOrFile(std::vector<T> & sources, const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+{
+    auto indexes_column = getFilterByPathAndFileIndexes(paths, dag, virtual_columns, context);
+    const auto & indexes = typeid_cast<const ColumnUInt64 &>(*indexes_column).getData();
+    if (indexes.size() == sources.size())
+        return;
+
+    std::vector<T> filtered_sources;
+    filtered_sources.reserve(indexes.size());
+    for (auto index : indexes)
+        filtered_sources.emplace_back(std::move(sources[index]));
+    sources = std::move(filtered_sources);
+}
+
 void addRequestedPathFileAndSizeVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional<size_t> size, const String * filename = nullptr);
 }

From db97a6998901aeb0a60f2a9cb57bcb98a75881e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 17:00:01 +0100
Subject: [PATCH 035/105] Add perf tests with tuples

---
 tests/performance/agg_functions_min_max_any.xml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/performance/agg_functions_min_max_any.xml b/tests/performance/agg_functions_min_max_any.xml
index 2926a5ed3c8..f8469244643 100644
--- a/tests/performance/agg_functions_min_max_any.xml
+++ b/tests/performance/agg_functions_min_max_any.xml
@@ -87,4 +87,9 @@
 <query>select any(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
 <query>select anyHeavy(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % {group_scale} FORMAT Null</query>
 
+<!-- Test with tuples (useful when you want to keep 2 columns of the same row) -->
+<query>select min((WatchID, CounterID)) from hits_100m_single FORMAT Null</query>
+<query>select max((WatchID, CounterID)) from hits_100m_single FORMAT Null</query>
+<query>select any((WatchID, CounterID)) from hits_100m_single FORMAT Null</query>
+<query>select anyHeavy((WatchID, CounterID)) from hits_100m_single FORMAT Null</query>
 </test>

From a38b3b9a7945fcca64f42a230ac9df808790a70a Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 29 Dec 2023 17:02:02 +0100
Subject: [PATCH 036/105] Fix test

---
 ..._materialized_view_with_dropped_target_table_no_exception.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql
index 744b2578617..af6dbf24473 100644
--- a/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql
+++ b/tests/queries/0_stateless/02932_materialized_view_with_dropped_target_table_no_exception.sql
@@ -1,3 +1,4 @@
+set ignore_materialized_views_with_dropped_target_table = 1;
 drop table if exists from_table;
 drop table if exists to_table;
 drop table if exists mv;

From 4b7fcfbc75d5ffe5d4331f2370d43537e504bc44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 14:56:08 +0100
Subject: [PATCH 037/105] Use iota in more places

---
 src/AggregateFunctions/QuantilesCommon.h |  4 ++--
 src/Columns/IColumnImpl.h                | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/AggregateFunctions/QuantilesCommon.h b/src/AggregateFunctions/QuantilesCommon.h
index 3dda0119485..afbca84b827 100644
--- a/src/AggregateFunctions/QuantilesCommon.h
+++ b/src/AggregateFunctions/QuantilesCommon.h
@@ -6,6 +6,7 @@
 
 #include <Common/FieldVisitorConvertToNumber.h>
 #include <Common/NaNUtils.h>
+#include <Common/iota.h>
 
 
 namespace DB
@@ -63,10 +64,9 @@ struct QuantileLevels
 
             if (isNaN(levels[i]) || levels[i] < 0 || levels[i] > 1)
                 throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Quantile level is out of range [0..1]");
-
-            permutation[i] = i;
         }
 
+        iota(permutation.data(), size, Permutation::value_type(0));
         ::sort(permutation.begin(), permutation.end(), [this] (size_t a, size_t b) { return levels[a] < levels[b]; });
     }
 };
diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h
index 0eab9452813..8e0bf0014f2 100644
--- a/src/Columns/IColumnImpl.h
+++ b/src/Columns/IColumnImpl.h
@@ -6,10 +6,11 @@
   * implementation.
   */
 
-#include <Columns/IColumn.h>
-#include <Common/PODArray.h>
-#include <base/sort.h>
 #include <algorithm>
+#include <Columns/IColumn.h>
+#include <base/sort.h>
+#include <Common/PODArray.h>
+#include <Common/iota.h>
 
 
 namespace DB
@@ -299,8 +300,7 @@ void IColumn::getPermutationImpl(
     if (limit >= data_size)
         limit = 0;
 
-    for (size_t i = 0; i < data_size; ++i)
-        res[i] = i;
+    iota(res.data(), data_size, Permutation::value_type(0));
 
     if (limit)
     {

From ed6b9703a1a4848949f6e6f37a241a0cffb17c96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 15:57:36 +0100
Subject: [PATCH 038/105] More iota

---
 src/Columns/ColumnAggregateFunction.cpp        | 18 +++++++++---------
 src/Columns/ColumnConst.cpp                    |  8 ++++----
 src/Columns/ColumnDecimal.cpp                  | 15 +++++++--------
 src/Columns/ColumnSparse.cpp                   | 14 +++++++-------
 src/Columns/ColumnTuple.cpp                    | 12 ++++++------
 src/Columns/ColumnVector.cpp                   | 18 +++++++++---------
 src/Columns/IColumnDummy.cpp                   | 10 +++++-----
 .../tests/gtest_column_stable_permutation.cpp  | 10 +++-------
 src/Common/levenshteinDistance.cpp             |  6 +++---
 src/Functions/FunctionsStringDistance.cpp      |  4 ++--
 src/Functions/array/arraySort.cpp              |  7 +++----
 src/Functions/rowNumberInBlock.cpp             |  3 +--
 src/Interpreters/sortBlock.cpp                 |  4 ++--
 .../Transforms/PartialSortingTransform.cpp     |  9 ++++-----
 src/QueryPipeline/QueryPipelineBuilder.cpp     | 14 +++++++-------
 15 files changed, 72 insertions(+), 80 deletions(-)

diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp
index 0ec5db6c69d..2018015b46d 100644
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@@ -1,18 +1,19 @@
 #include <Columns/ColumnAggregateFunction.h>
 #include <Columns/ColumnsCommon.h>
 #include <Columns/MaskOperations.h>
-#include <Common/assert_cast.h>
-#include <Processors/Transforms/ColumnGathererTransform.h>
+#include <IO/Operators.h>
 #include <IO/WriteBufferFromArena.h>
 #include <IO/WriteBufferFromString.h>
-#include <IO/Operators.h>
-#include <Common/FieldVisitorToString.h>
-#include <Common/SipHash.h>
+#include <Processors/Transforms/ColumnGathererTransform.h>
 #include <Common/AlignedBuffer.h>
-#include <Common/typeid_cast.h>
 #include <Common/Arena.h>
-#include <Common/WeakHash.h>
+#include <Common/FieldVisitorToString.h>
 #include <Common/HashTable/Hash.h>
+#include <Common/SipHash.h>
+#include <Common/WeakHash.h>
+#include <Common/assert_cast.h>
+#include <Common/iota.h>
+#include <Common/typeid_cast.h>
 
 
 namespace DB
@@ -626,8 +627,7 @@ void ColumnAggregateFunction::getPermutation(PermutationSortDirection /*directio
 {
     size_t s = data.size();
     res.resize(s);
-    for (size_t i = 0; i < s; ++i)
-        res[i] = i;
+    iota(res.data(), s, IColumn::Permutation::value_type(0));
 }
 
 void ColumnAggregateFunction::updatePermutation(PermutationSortDirection, PermutationSortStability,
diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp
index 10e960ea244..9aa0f5cfa49 100644
--- a/src/Columns/ColumnConst.cpp
+++ b/src/Columns/ColumnConst.cpp
@@ -2,9 +2,10 @@
 
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnsCommon.h>
-#include <Common/typeid_cast.h>
-#include <Common/WeakHash.h>
 #include <Common/HashTable/Hash.h>
+#include <Common/WeakHash.h>
+#include <Common/iota.h>
+#include <Common/typeid_cast.h>
 
 #include <base/defines.h>
 
@@ -128,8 +129,7 @@ void ColumnConst::getPermutation(PermutationSortDirection /*direction*/, Permuta
                                 size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const
 {
     res.resize(s);
-    for (size_t i = 0; i < s; ++i)
-        res[i] = i;
+    iota(res.data(), s, IColumn::Permutation::value_type(0));
 }
 
 void ColumnConst::updatePermutation(PermutationSortDirection /*direction*/, PermutationSortStability /*stability*/,
diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index baccfc69147..20fc5d8e1fe 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -1,10 +1,11 @@
-#include <Common/Exception.h>
 #include <Common/Arena.h>
-#include <Common/SipHash.h>
-#include <Common/assert_cast.h>
-#include <Common/WeakHash.h>
+#include <Common/Exception.h>
 #include <Common/HashTable/Hash.h>
 #include <Common/RadixSort.h>
+#include <Common/SipHash.h>
+#include <Common/WeakHash.h>
+#include <Common/assert_cast.h>
+#include <Common/iota.h>
 
 #include <base/sort.h>
 
@@ -163,8 +164,7 @@ void ColumnDecimal<T>::getPermutation(IColumn::PermutationSortDirection directio
     if (limit >= data_size)
         limit = 0;
 
-    for (size_t i = 0; i < data_size; ++i)
-        res[i] = i;
+    iota(res.data(), data_size, IColumn::Permutation::value_type(0));
 
     if constexpr (is_arithmetic_v<NativeT> && !is_big_int_v<NativeT>)
     {
@@ -183,8 +183,7 @@ void ColumnDecimal<T>::getPermutation(IColumn::PermutationSortDirection directio
             /// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
             if (data_size >= 256 && data_size <= std::numeric_limits<UInt32>::max() && use_radix_sort)
             {
-                for (size_t i = 0; i < data_size; ++i)
-                    res[i] = i;
+                iota(res.data(), data_size, IColumn::Permutation::value_type(0));
 
                 bool try_sort = false;
 
diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 057c0cd7112..02e6e9e56b4 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -1,11 +1,12 @@
-#include <Columns/ColumnSparse.h>
-#include <Columns/ColumnsCommon.h>
 #include <Columns/ColumnCompressed.h>
+#include <Columns/ColumnSparse.h>
 #include <Columns/ColumnTuple.h>
-#include <Common/WeakHash.h>
-#include <Common/SipHash.h>
-#include <Common/HashTable/Hash.h>
+#include <Columns/ColumnsCommon.h>
 #include <Processors/Transforms/ColumnGathererTransform.h>
+#include <Common/HashTable/Hash.h>
+#include <Common/SipHash.h>
+#include <Common/WeakHash.h>
+#include <Common/iota.h>
 
 #include <algorithm>
 #include <bit>
@@ -499,8 +500,7 @@ void ColumnSparse::getPermutationImpl(IColumn::PermutationSortDirection directio
     res.resize(_size);
     if (offsets->empty())
     {
-        for (size_t i = 0; i < _size; ++i)
-            res[i] = i;
+        iota(res.data(), _size, IColumn::Permutation::value_type(0));
         return;
     }
 
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index d8992125be4..356bb0493d2 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -1,16 +1,17 @@
 #include <Columns/ColumnTuple.h>
 
-#include <base/sort.h>
-#include <Columns/IColumnImpl.h>
 #include <Columns/ColumnCompressed.h>
+#include <Columns/IColumnImpl.h>
 #include <Core/Field.h>
-#include <Processors/Transforms/ColumnGathererTransform.h>
+#include <DataTypes/Serializations/SerializationInfoTuple.h>
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromString.h>
+#include <Processors/Transforms/ColumnGathererTransform.h>
+#include <base/sort.h>
 #include <Common/WeakHash.h>
 #include <Common/assert_cast.h>
+#include <Common/iota.h>
 #include <Common/typeid_cast.h>
-#include <DataTypes/Serializations/SerializationInfoTuple.h>
 
 
 namespace DB
@@ -378,8 +379,7 @@ void ColumnTuple::getPermutationImpl(IColumn::PermutationSortDirection direction
 {
     size_t rows = size();
     res.resize(rows);
-    for (size_t i = 0; i < rows; ++i)
-        res[i] = i;
+    iota(res.data(), rows, IColumn::Permutation::value_type(0));
 
     if (limit >= rows)
         limit = 0;
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index 37e62c76596..b1cf449dfde 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -1,24 +1,25 @@
 #include "ColumnVector.h"
 
-#include <Columns/ColumnsCommon.h>
 #include <Columns/ColumnCompressed.h>
+#include <Columns/ColumnsCommon.h>
 #include <Columns/MaskOperations.h>
 #include <Columns/RadixSortHelper.h>
-#include <Processors/Transforms/ColumnGathererTransform.h>
 #include <IO/WriteHelpers.h>
+#include <Processors/Transforms/ColumnGathererTransform.h>
+#include <base/bit_cast.h>
+#include <base/scope_guard.h>
+#include <base/sort.h>
+#include <base/unaligned.h>
 #include <Common/Arena.h>
 #include <Common/Exception.h>
 #include <Common/HashTable/Hash.h>
 #include <Common/NaNUtils.h>
 #include <Common/RadixSort.h>
 #include <Common/SipHash.h>
-#include <Common/WeakHash.h>
 #include <Common/TargetSpecific.h>
+#include <Common/WeakHash.h>
 #include <Common/assert_cast.h>
-#include <base/sort.h>
-#include <base/unaligned.h>
-#include <base/bit_cast.h>
-#include <base/scope_guard.h>
+#include <Common/iota.h>
 
 #include <bit>
 #include <cmath>
@@ -244,8 +245,7 @@ void ColumnVector<T>::getPermutation(IColumn::PermutationSortDirection direction
     if (limit >= data_size)
         limit = 0;
 
-    for (size_t i = 0; i < data_size; ++i)
-        res[i] = i;
+    iota(res.data(), data_size, IColumn::Permutation::value_type(0));
 
     if constexpr (is_arithmetic_v<T> && !is_big_int_v<T>)
     {
diff --git a/src/Columns/IColumnDummy.cpp b/src/Columns/IColumnDummy.cpp
index 01091a87049..7c237536f94 100644
--- a/src/Columns/IColumnDummy.cpp
+++ b/src/Columns/IColumnDummy.cpp
@@ -1,7 +1,8 @@
-#include <Common/Arena.h>
-#include <Core/Field.h>
-#include <Columns/IColumnDummy.h>
 #include <Columns/ColumnsCommon.h>
+#include <Columns/IColumnDummy.h>
+#include <Core/Field.h>
+#include <Common/Arena.h>
+#include <Common/iota.h>
 
 
 namespace DB
@@ -87,8 +88,7 @@ void IColumnDummy::getPermutation(IColumn::PermutationSortDirection /*direction*
                 size_t /*limit*/, int /*nan_direction_hint*/, Permutation & res) const
 {
     res.resize(s);
-    for (size_t i = 0; i < s; ++i)
-        res[i] = i;
+    iota(res.data(), s, IColumn::Permutation::value_type(0));
 }
 
 ColumnPtr IColumnDummy::replicate(const Offsets & offsets) const
diff --git a/src/Columns/tests/gtest_column_stable_permutation.cpp b/src/Columns/tests/gtest_column_stable_permutation.cpp
index df898cffa04..0dabd4d1fc2 100644
--- a/src/Columns/tests/gtest_column_stable_permutation.cpp
+++ b/src/Columns/tests/gtest_column_stable_permutation.cpp
@@ -9,7 +9,6 @@
 #include <Columns/ColumnUnique.h>
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnsNumber.h>
-
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeMap.h>
@@ -17,6 +16,7 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <Common/iota.h>
 
 
 using namespace DB;
@@ -32,8 +32,7 @@ void stableGetColumnPermutation(
 
     size_t size = column.size();
     out_permutation.resize(size);
-    for (size_t i = 0; i < size; ++i)
-        out_permutation[i] = i;
+    iota(out_permutation.data(), size, IColumn::Permutation::value_type(0));
 
     std::stable_sort(
         out_permutation.begin(),
@@ -146,10 +145,7 @@ void assertColumnPermutations(ColumnCreateFunc column_create_func, ValueTransfor
 
     std::vector<std::vector<Field>> ranges(ranges_size);
     std::vector<size_t> ranges_permutations(ranges_size);
-    for (size_t i = 0; i < ranges_size; ++i)
-    {
-        ranges_permutations[i] = i;
-    }
+    iota(ranges_permutations.data(), ranges_size, IColumn::Permutation::value_type(0));
 
     IColumn::Permutation actual_permutation;
     IColumn::Permutation expected_permutation;
diff --git a/src/Common/levenshteinDistance.cpp b/src/Common/levenshteinDistance.cpp
index 9eb6c0f9050..3ab80af94bb 100644
--- a/src/Common/levenshteinDistance.cpp
+++ b/src/Common/levenshteinDistance.cpp
@@ -1,5 +1,6 @@
-#include <Common/levenshteinDistance.h>
 #include <Common/PODArray.h>
+#include <Common/iota.h>
+#include <Common/levenshteinDistance.h>
 
 namespace DB
 {
@@ -11,8 +12,7 @@ size_t levenshteinDistance(const String & lhs, const String & rhs)
 
     PODArrayWithStackMemory<size_t, 64> row(n + 1);
 
-    for (size_t i = 1; i <= n; ++i)
-        row[i] = i;
+    iota(row.data() + 1, n, size_t(1));
 
     for (size_t j = 1; j <= m; ++j)
     {
diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp
index 3098d02630a..a5e819179d6 100644
--- a/src/Functions/FunctionsStringDistance.cpp
+++ b/src/Functions/FunctionsStringDistance.cpp
@@ -6,6 +6,7 @@
 #include <Functions/FunctionsStringSimilarity.h>
 #include <Common/PODArray.h>
 #include <Common/UTF8Helpers.h>
+#include <Common/iota.h>
 
 #ifdef __SSE4_2__
 #    include <nmmintrin.h>
@@ -246,8 +247,7 @@ struct ByteEditDistanceImpl
         ResultType insertion = 0;
         ResultType deletion = 0;
 
-        for (size_t i = 0; i <= haystack_size; ++i)
-            distances0[i] = i;
+        iota(distances0.data(), haystack_size + 1, ResultType(0));
 
         for (size_t pos_needle = 0; pos_needle < needle_size; ++pos_needle)
         {
diff --git a/src/Functions/array/arraySort.cpp b/src/Functions/array/arraySort.cpp
index a853289e8cc..184b1f82280 100644
--- a/src/Functions/array/arraySort.cpp
+++ b/src/Functions/array/arraySort.cpp
@@ -1,5 +1,6 @@
-#include <Functions/array/arraySort.h>
 #include <Functions/FunctionFactory.h>
+#include <Functions/array/arraySort.h>
+#include <Common/iota.h>
 
 namespace DB
 {
@@ -55,9 +56,7 @@ ColumnPtr ArraySortImpl<positive, is_partial>::execute(
     size_t size = offsets.size();
     size_t nested_size = array.getData().size();
     IColumn::Permutation permutation(nested_size);
-
-    for (size_t i = 0; i < nested_size; ++i)
-        permutation[i] = i;
+    iota(permutation.data(), nested_size, IColumn::Permutation::value_type(0));
 
     ColumnArray::Offset current_offset = 0;
     for (size_t i = 0; i < size; ++i)
diff --git a/src/Functions/rowNumberInBlock.cpp b/src/Functions/rowNumberInBlock.cpp
index e5fe2aeb178..25c9e9c56f3 100644
--- a/src/Functions/rowNumberInBlock.cpp
+++ b/src/Functions/rowNumberInBlock.cpp
@@ -56,8 +56,7 @@ public:
         auto column = ColumnUInt64::create();
         auto & data = column->getData();
         data.resize(input_rows_count);
-        for (size_t i = 0; i < input_rows_count; ++i)
-            data[i] = i;
+        iota(data.data(), input_rows_count, UInt64(0));
 
         return column;
     }
diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp
index 89c4220ccdf..d75786f33b9 100644
--- a/src/Interpreters/sortBlock.cpp
+++ b/src/Interpreters/sortBlock.cpp
@@ -4,6 +4,7 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnTuple.h>
 #include <Functions/FunctionHelpers.h>
+#include <Common/iota.h>
 
 #ifdef __SSE2__
     #include <emmintrin.h>
@@ -155,8 +156,7 @@ void getBlockSortPermutationImpl(const Block & block, const SortDescription & de
     {
         size_t size = block.rows();
         permutation.resize(size);
-        for (size_t i = 0; i < size; ++i)
-            permutation[i] = i;
+        iota(permutation.data(), size, IColumn::Permutation::value_type(0));
 
         if (limit >= size)
             limit = 0;
diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp
index 3fc9a4e71db..e79673f6645 100644
--- a/src/Processors/Transforms/PartialSortingTransform.cpp
+++ b/src/Processors/Transforms/PartialSortingTransform.cpp
@@ -1,7 +1,8 @@
-#include <Processors/Transforms/PartialSortingTransform.h>
-#include <Interpreters/sortBlock.h>
 #include <Core/SortCursor.h>
+#include <Interpreters/sortBlock.h>
+#include <Processors/Transforms/PartialSortingTransform.h>
 #include <Common/PODArray.h>
+#include <Common/iota.h>
 
 namespace DB
 {
@@ -36,9 +37,7 @@ size_t getFilterMask(const ColumnRawPtrs & raw_block_columns, const Columns & th
     else
     {
         rows_to_compare.resize(num_rows);
-
-        for (size_t i = 0; i < num_rows; ++i)
-            rows_to_compare[i] = i;
+        iota(rows_to_compare.data(), num_rows, UInt64(0));
 
         size_t size = description.size();
         for (size_t i = 0; i < size; ++i)
diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp
index a0fabe3273c..46c6a77f60f 100644
--- a/src/QueryPipeline/QueryPipelineBuilder.cpp
+++ b/src/QueryPipeline/QueryPipelineBuilder.cpp
@@ -1,14 +1,12 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
-#include <Common/CurrentThread.h>
-#include <Common/typeid_cast.h>
-#include "Core/UUID.h"
 #include <Core/SortDescription.h>
+#include <Core/UUID.h>
+#include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/IJoin.h>
 #include <Interpreters/TableJoin.h>
-#include <IO/WriteHelpers.h>
 #include <Processors/ConcatProcessor.h>
 #include <Processors/DelayedPortsProcessor.h>
 #include <Processors/Executors/PipelineExecutor.h>
@@ -25,11 +23,14 @@
 #include <Processors/Transforms/ExtremesTransform.h>
 #include <Processors/Transforms/JoiningTransform.h>
 #include <Processors/Transforms/MergeJoinTransform.h>
-#include <Processors/Transforms/PasteJoinTransform.h>
 #include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
 #include <Processors/Transforms/PartialSortingTransform.h>
+#include <Processors/Transforms/PasteJoinTransform.h>
 #include <Processors/Transforms/TotalsHavingTransform.h>
 #include <QueryPipeline/narrowPipe.h>
+#include <Common/CurrentThread.h>
+#include <Common/iota.h>
+#include <Common/typeid_cast.h>
 
 namespace DB
 {
@@ -619,8 +620,7 @@ void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline)
     bool has_extremes = pipe.getExtremesPort();
     size_t num_extra_ports = (has_totals ? 1 : 0) + (has_extremes ? 1 : 0);
     IProcessor::PortNumbers delayed_streams(pipe.numOutputPorts() + num_extra_ports);
-    for (size_t i = 0; i < delayed_streams.size(); ++i)
-        delayed_streams[i] = i;
+    iota(delayed_streams.data(), delayed_streams.size(), IProcessor::PortNumbers::value_type(0));
 
     auto * collected_processors = pipe.collected_processors;
 

From 22ef5443bb0d0c2a1e6c2fa2b178765dc3cb761b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 14:44:16 +0100
Subject: [PATCH 039/105] Move findNumeric to .cpp

---
 .../AggregateFunctionMax.cpp                  | 10 +--
 .../AggregateFunctionMin.cpp                  | 10 +--
 src/AggregateFunctions/findNumeric.cpp        | 15 -----
 .../findNumeric.h => Common/findExtreme.cpp}  | 65 ++++++++-----------
 src/Common/findExtreme.h                      | 45 +++++++++++++
 5 files changed, 82 insertions(+), 63 deletions(-)
 delete mode 100644 src/AggregateFunctions/findNumeric.cpp
 rename src/{AggregateFunctions/findNumeric.h => Common/findExtreme.cpp} (57%)
 create mode 100644 src/Common/findExtreme.h

diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp
index a440aedb62c..3d4d23136a1 100644
--- a/src/AggregateFunctions/AggregateFunctionMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -1,7 +1,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/HelpersMinMaxAny.h>
-#include <AggregateFunctions/findNumeric.h>
+#include <Common/findExtreme.h>
 
 namespace DB
 {
@@ -53,10 +53,10 @@ void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<Sing
     if (if_argument_pos >= 0) \
     { \
         const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
-        opt = findNumericMaxIf(column.getData().data(), flags.data(), row_begin, row_end); \
+        opt = findExtremeMaxIf(column.getData().data(), flags.data(), row_begin, row_end); \
     } \
     else \
-        opt = findNumericMax(column.getData().data(), row_begin, row_end); \
+        opt = findExtremeMax(column.getData().data(), row_begin, row_end); \
     if (opt.has_value()) \
         this->data(place).changeIfGreater(opt.value()); \
 }
@@ -140,10 +140,10 @@ void AggregateFunctionsSingleValueMax<typename DB::AggregateFunctionMaxData<Sing
         auto final_flags = std::make_unique<UInt8[]>(row_end); \
         for (size_t i = row_begin; i < row_end; ++i) \
             final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
-        opt = findNumericMaxIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
+        opt = findExtremeMaxIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
     } \
     else \
-        opt = findNumericMaxNotNull(column.getData().data(), null_map, row_begin, row_end); \
+        opt = findExtremeMaxNotNull(column.getData().data(), null_map, row_begin, row_end); \
     if (opt.has_value()) \
         this->data(place).changeIfGreater(opt.value()); \
 }
diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp
index 8d5d12fa626..02d041ad12b 100644
--- a/src/AggregateFunctions/AggregateFunctionMin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -1,7 +1,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/HelpersMinMaxAny.h>
-#include <AggregateFunctions/findNumeric.h>
+#include <Common/findExtreme.h>
 
 
 namespace DB
@@ -54,10 +54,10 @@ public:
         if (if_argument_pos >= 0) \
         { \
             const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData(); \
-            opt = findNumericMinIf(column.getData().data(), flags.data(), row_begin, row_end); \
+            opt = findExtremeMinIf(column.getData().data(), flags.data(), row_begin, row_end); \
         } \
         else \
-            opt = findNumericMin(column.getData().data(), row_begin, row_end); \
+            opt = findExtremeMin(column.getData().data(), row_begin, row_end); \
         if (opt.has_value()) \
             this->data(place).changeIfLess(opt.value()); \
     }
@@ -141,10 +141,10 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
             auto final_flags = std::make_unique<UInt8[]>(row_end); \
             for (size_t i = row_begin; i < row_end; ++i) \
                 final_flags[i] = (!null_map[i]) & !!if_flags[i]; \
-            opt = findNumericMinIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
+            opt = findExtremeMinIf(column.getData().data(), final_flags.get(), row_begin, row_end); \
         } \
         else \
-            opt = findNumericMinNotNull(column.getData().data(), null_map, row_begin, row_end); \
+            opt = findExtremeMinNotNull(column.getData().data(), null_map, row_begin, row_end); \
         if (opt.has_value()) \
             this->data(place).changeIfLess(opt.value()); \
     }
diff --git a/src/AggregateFunctions/findNumeric.cpp b/src/AggregateFunctions/findNumeric.cpp
deleted file mode 100644
index bbad8c1fe3d..00000000000
--- a/src/AggregateFunctions/findNumeric.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <AggregateFunctions/findNumeric.h>
-
-namespace DB
-{
-#define INSTANTIATION(T) \
-    template std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end); \
-    template std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    template std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    template std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end); \
-    template std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    template std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
-
-FOR_BASIC_NUMERIC_TYPES(INSTANTIATION)
-#undef INSTANTIATION
-}
diff --git a/src/AggregateFunctions/findNumeric.h b/src/Common/findExtreme.cpp
similarity index 57%
rename from src/AggregateFunctions/findNumeric.h
rename to src/Common/findExtreme.cpp
index df7c325569a..e1f1e199d56 100644
--- a/src/AggregateFunctions/findNumeric.h
+++ b/src/Common/findExtreme.cpp
@@ -1,18 +1,9 @@
-#pragma once
-
 #include <DataTypes/IDataType.h>
-#include <base/defines.h>
-#include <base/types.h>
-#include <Common/Concepts.h>
 #include <Common/TargetSpecific.h>
-
-#include <algorithm>
-#include <optional>
+#include <Common/findExtreme.h>
 
 namespace DB
 {
-template <typename T>
-concept is_any_native_number = (is_any_of<T, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64>);
 
 template <is_any_native_number T>
 struct MinComparator
@@ -28,7 +19,7 @@ struct MaxComparator
 
 MULTITARGET_FUNCTION_AVX2_SSE42(
     MULTITARGET_FUNCTION_HEADER(template <is_any_native_number T, typename ComparatorClass, bool add_all_elements, bool add_if_cond_zero> static std::optional<T> NO_INLINE),
-    findNumericExtremeImpl,
+    findExtremeImpl,
     MULTITARGET_FUNCTION_BODY((const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t row_begin, size_t row_end)
     {
         size_t count = row_end - row_begin;
@@ -86,69 +77,67 @@ MULTITARGET_FUNCTION_AVX2_SSE42(
     }
 ))
 
-
 /// Given a vector of T finds the extreme (MIN or MAX) value
 template <is_any_native_number T, class ComparatorClass, bool add_all_elements, bool add_if_cond_zero>
 static std::optional<T>
-findNumericExtreme(const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t start, size_t end)
+findExtreme(const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t start, size_t end)
 {
 #if USE_MULTITARGET_CODE
     /// We see no benefit from using AVX512BW or AVX512F (over AVX2), so we only declare SSE and AVX2
     if (isArchSupported(TargetArch::AVX2))
-        return findNumericExtremeImplAVX2<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
+        return findExtremeImplAVX2<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
 
     if (isArchSupported(TargetArch::SSE42))
-        return findNumericExtremeImplSSE42<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
+        return findExtremeImplSSE42<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
 #endif
-    return findNumericExtremeImpl<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
+    return findExtremeImpl<T, ComparatorClass, add_all_elements, add_if_cond_zero>(ptr, condition_map, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end)
+std::optional<T> findExtremeMin(const T * __restrict ptr, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MinComparator<T>, true, false>(ptr, nullptr, start, end);
+    return findExtreme<T, MinComparator<T>, true, false>(ptr, nullptr, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
+std::optional<T> findExtremeMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MinComparator<T>, false, true>(ptr, condition_map, start, end);
+    return findExtreme<T, MinComparator<T>, false, true>(ptr, condition_map, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
+std::optional<T> findExtremeMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MinComparator<T>, false, false>(ptr, condition_map, start, end);
+    return findExtreme<T, MinComparator<T>, false, false>(ptr, condition_map, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end)
+std::optional<T> findExtremeMax(const T * __restrict ptr, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MaxComparator<T>, true, false>(ptr, nullptr, start, end);
+    return findExtreme<T, MaxComparator<T>, true, false>(ptr, nullptr, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
+std::optional<T> findExtremeMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MaxComparator<T>, false, true>(ptr, condition_map, start, end);
+    return findExtreme<T, MaxComparator<T>, false, true>(ptr, condition_map, start, end);
 }
 
 template <is_any_native_number T>
-std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
+std::optional<T> findExtremeMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
 {
-    return findNumericExtreme<T, MaxComparator<T>, false, false>(ptr, condition_map, start, end);
+    return findExtreme<T, MaxComparator<T>, false, false>(ptr, condition_map, start, end);
 }
 
 
-#define EXTERN_INSTANTIATION(T) \
-    extern template std::optional<T> findNumericMin(const T * __restrict ptr, size_t start, size_t end); \
-    extern template std::optional<T> findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    extern template std::optional<T> findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    extern template std::optional<T> findNumericMax(const T * __restrict ptr, size_t start, size_t end); \
-    extern template std::optional<T> findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
-    extern template std::optional<T> findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
-
-    FOR_BASIC_NUMERIC_TYPES(EXTERN_INSTANTIATION)
-#undef EXTERN_INSTANTIATION
+#define INSTANTIATION(T) \
+    template std::optional<T> findExtremeMin(const T * __restrict ptr, size_t start, size_t end); \
+    template std::optional<T> findExtremeMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    template std::optional<T> findExtremeMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    template std::optional<T> findExtremeMax(const T * __restrict ptr, size_t start, size_t end); \
+    template std::optional<T> findExtremeMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    template std::optional<T> findExtremeMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
 
+FOR_BASIC_NUMERIC_TYPES(INSTANTIATION)
+#undef INSTANTIATION
 }
diff --git a/src/Common/findExtreme.h b/src/Common/findExtreme.h
new file mode 100644
index 00000000000..b38c24697c0
--- /dev/null
+++ b/src/Common/findExtreme.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+#include <base/defines.h>
+#include <base/types.h>
+#include <Common/Concepts.h>
+
+#include <algorithm>
+#include <optional>
+
+namespace DB
+{
+template <typename T>
+concept is_any_native_number = (is_any_of<T, Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64>);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMin(const T * __restrict ptr, size_t start, size_t end);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMax(const T * __restrict ptr, size_t start, size_t end);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
+
+template <is_any_native_number T>
+std::optional<T> findExtremeMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
+
+#define EXTERN_INSTANTIATION(T) \
+    extern template std::optional<T> findExtremeMin(const T * __restrict ptr, size_t start, size_t end); \
+    extern template std::optional<T> findExtremeMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    extern template std::optional<T> findExtremeMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    extern template std::optional<T> findExtremeMax(const T * __restrict ptr, size_t start, size_t end); \
+    extern template std::optional<T> findExtremeMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \
+    extern template std::optional<T> findExtremeMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end);
+
+    FOR_BASIC_NUMERIC_TYPES(EXTERN_INSTANTIATION)
+#undef EXTERN_INSTANTIATION
+
+}

From 5fb7f9f861ea5adaece97c1afbd4ba1283957049 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 29 Dec 2023 18:09:20 +0100
Subject: [PATCH 040/105] Ignore other numeric types for now

---
 src/AggregateFunctions/AggregateFunctionMax.cpp | 14 ++++++++++++++
 src/AggregateFunctions/AggregateFunctionMin.cpp | 14 ++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp
index 3d4d23136a1..2577c932592 100644
--- a/src/AggregateFunctions/AggregateFunctionMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -74,6 +74,13 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
+    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    {
+        /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
+        /// faster than doing a permutation
+        return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
+    }
+
     constexpr int nan_direction_hint = 1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)
@@ -162,6 +169,13 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
+    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    {
+        /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
+        /// faster than doing a permutation
+        return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
+    }
+
     constexpr int nan_direction_hint = 1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)
diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp
index 02d041ad12b..701101e7207 100644
--- a/src/AggregateFunctions/AggregateFunctionMin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -75,6 +75,13 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
+    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    {
+        /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
+        /// faster than doing a permutation
+        return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos);
+    }
+
     constexpr int nan_direction_hint = 1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)
@@ -163,6 +170,13 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
+    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    {
+        /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
+        /// faster than doing a permutation
+        return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos);
+    }
+
     constexpr int nan_direction_hint = 1;
     auto const & column = *columns[0];
     if (if_argument_pos >= 0)

From b95bdef09ee9474193beaba8c6eab078bb9970eb Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 29 Dec 2023 17:41:11 +0000
Subject: [PATCH 041/105] Update StorageS3 and StorageS3Cluster

---
 src/Storages/HDFS/StorageHDFS.cpp       |  17 +--
 src/Storages/S3Queue/StorageS3Queue.cpp | 117 +++++++++++++++---
 src/Storages/S3Queue/StorageS3Queue.h   |  11 +-
 src/Storages/StorageFile.cpp            |   4 +-
 src/Storages/StorageS3.cpp              | 152 ++++++++++++++++++------
 src/Storages/StorageS3.h                |  14 ++-
 src/Storages/VirtualColumnUtils.cpp     |   6 +-
 src/Storages/VirtualColumnUtils.h       |   2 +-
 8 files changed, 238 insertions(+), 85 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 9d719413c8d..fe37b2eb57a 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -417,7 +417,7 @@ public:
         uris = getPathsList(path_from_uri, uri_without_path, context);
         ActionsDAGPtr filter_dag;
         if (!uris.empty())
-             filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, uris[0].path);
+             filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
 
         if (filter_dag)
         {
@@ -492,7 +492,7 @@ public:
     {
         ActionsDAGPtr filter_dag;
         if (!uris.empty())
-            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, getPathFromUriAndUriWithoutPath(uris[0]).first);
+            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
 
         if (filter_dag)
         {
@@ -893,9 +893,6 @@ public:
         ReadFromFormatInfo info_,
         bool need_only_count_,
         std::shared_ptr<StorageHDFS> storage_,
-        // StorageSnapshotPtr storage_snapshot_,
-        // const StorageEmbeddedRocksDB & storage_,
-        // SelectQueryInfo query_info_,
         ContextPtr context_,
         size_t max_block_size_,
         size_t num_streams_)
@@ -907,9 +904,6 @@ public:
         , info(std::move(info_))
         , need_only_count(need_only_count_)
         , storage(std::move(storage_))
-        // , storage_snapshot(std::move(storage_snapshot_))
-        // , storage(storage_)
-        // , query_info(std::move(query_info_))
         , context(std::move(context_))
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
@@ -925,19 +919,12 @@ private:
     const bool need_only_count;
     std::shared_ptr<StorageHDFS> storage;
 
-    // StorageSnapshotPtr storage_snapshot;
-    // const StorageEmbeddedRocksDB & storage;
-    // SelectQueryInfo query_info;
     ContextPtr context;
-
     size_t max_block_size;
     size_t num_streams;
 
     std::shared_ptr<HDFSSource::IteratorWrapper> iterator_wrapper;
 
-    // FieldVectorPtr keys;
-    // bool all_scan = false;
-
     void createIterator(const ActionsDAG::Node * predicate);
 };
 
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 33e63d45c8d..1a6666c00d0 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -1,3 +1,6 @@
+#include "Processors/QueryPlan/QueryPlan.h"
+#include "Processors/QueryPlan/SourceStepWithFilter.h"
+#include "QueryPipeline/QueryPipelineBuilder.h"
 #include "config.h"
 
 #if USE_AWS_S3
@@ -204,10 +207,71 @@ bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const
     return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context_, format_settings);
 }
 
-Pipe StorageS3Queue::read(
+class ReadFromS3Queue : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromS3Queue"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromS3Queue(
+        Block sample_block,
+        ReadFromFormatInfo info_,
+        std::shared_ptr<StorageS3Queue> storage_,
+        // StorageSnapshotPtr storage_snapshot_,
+        // Names column_names_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , info(std::move(info_))
+        , storage(std::move(storage_))
+        // , storage_snapshot(std::move(storage_snapshot_))
+        // , column_names(std::move(column_names_))
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+    {
+    }
+
+private:
+    ReadFromFormatInfo info;
+    std::shared_ptr<StorageS3Queue> storage;
+    // StorageSnapshotPtr storage_snapshot;
+    // Names column_names;
+    ContextPtr context;
+    size_t max_block_size;
+    size_t num_streams;
+
+    std::shared_ptr<StorageS3Queue::FileIterator> iterator;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+void ReadFromS3Queue::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (iterator)
+        return;
+
+    iterator = storage->createFileIterator(context, predicate);
+}
+
+
+void ReadFromS3Queue::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void StorageS3Queue::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
-    SelectQueryInfo & query_info,
+    SelectQueryInfo & /*query_info*/,
     ContextPtr local_context,
     QueryProcessingStage::Enum /*processed_stage*/,
     size_t max_block_size,
@@ -225,27 +289,43 @@ Pipe StorageS3Queue::read(
                         "Cannot read from {} with attached materialized views", getName());
     }
 
-    Pipes pipes;
-    const size_t adjusted_num_streams = std::min<size_t>(num_streams, s3queue_settings->s3queue_processing_threads_num);
+    auto this_ptr = std::static_pointer_cast<StorageS3Queue>(shared_from_this());
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
 
-    auto file_iterator = createFileIterator(local_context, query_info.query);
+    auto reading = std::make_unique<ReadFromS3Queue>(
+        read_from_format_info.source_header,
+        read_from_format_info,
+        std::move(this_ptr),
+        // storage_snapshot,
+        // column_names,
+        local_context,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    Pipes pipes;
+    const size_t adjusted_num_streams = std::min<size_t>(num_streams, storage->s3queue_settings->s3queue_processing_threads_num);
+
+    createIterator(nullptr);
     for (size_t i = 0; i < adjusted_num_streams; ++i)
-        pipes.emplace_back(createSource(file_iterator, column_names, storage_snapshot, max_block_size, local_context));
-    return Pipe::unitePipes(std::move(pipes));
+        pipes.emplace_back(storage->createSource(info, iterator, max_block_size, context));
+    pipeline.init(Pipe::unitePipes(std::move(pipes)));
 }
 
 std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
+    const ReadFromFormatInfo & info,
     std::shared_ptr<StorageS3Queue::FileIterator> file_iterator,
-    const Names & column_names,
-    const StorageSnapshotPtr & storage_snapshot,
     size_t max_block_size,
     ContextPtr local_context)
 {
     auto configuration_snapshot = updateConfigurationAndGetCopy(local_context);
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
 
     auto internal_source = std::make_unique<StorageS3Source>(
-        read_from_format_info, configuration.format, getName(), local_context, format_settings,
+        info, configuration.format, getName(), local_context, format_settings,
         max_block_size,
         configuration_snapshot.request_settings,
         configuration_snapshot.compression_method,
@@ -253,7 +333,7 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
         configuration_snapshot.url.bucket,
         configuration_snapshot.url.version_id,
         configuration_snapshot.url.uri.getHost() + std::to_string(configuration_snapshot.url.uri.getPort()),
-        file_iterator, local_context->getSettingsRef().max_download_threads, false, /* query_info */ std::nullopt);
+        file_iterator, local_context->getSettingsRef().max_download_threads, false);
 
     auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client, blob_storage_log = BlobStorageLogWriter::create()](const std::string & path) mutable
     {
@@ -277,8 +357,8 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
     };
     auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr;
     return std::make_shared<StorageS3QueueSource>(
-        getName(), read_from_format_info.source_header, std::move(internal_source),
-        files_metadata, after_processing, file_deleter, read_from_format_info.requested_virtual_columns,
+        getName(), info.source_header, std::move(internal_source),
+        files_metadata, after_processing, file_deleter, info.requested_virtual_columns,
         local_context, shutdown_called, table_is_being_dropped, s3_queue_log, getStorageID(), log);
 }
 
@@ -375,13 +455,14 @@ bool StorageS3Queue::streamToViews()
     auto block_io = interpreter.execute();
     auto file_iterator = createFileIterator(s3queue_context, nullptr);
 
+    auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context), getVirtuals());
+
     Pipes pipes;
     pipes.reserve(s3queue_settings->s3queue_processing_threads_num);
     for (size_t i = 0; i < s3queue_settings->s3queue_processing_threads_num; ++i)
     {
         auto source = createSource(
-            file_iterator, block_io.pipeline.getHeader().getNames(),
-            storage_snapshot, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context);
+            read_from_format_info, file_iterator, DBMS_DEFAULT_BUFFER_SIZE, s3queue_context);
 
         pipes.emplace_back(std::move(source));
     }
@@ -479,10 +560,10 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
     }
 }
 
-std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, ASTPtr query)
+std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate)
 {
     auto glob_iterator = std::make_unique<StorageS3QueueSource::GlobIterator>(
-        *configuration.client, configuration.url, query, virtual_columns, local_context,
+        *configuration.client, configuration.url, predicate, virtual_columns, local_context,
         /* read_keys */nullptr, configuration.request_settings);
     return std::make_shared<FileIterator>(files_metadata, std::move(glob_iterator), shutdown_called);
 }
diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h
index f26b1175150..3d3594dc2ab 100644
--- a/src/Storages/S3Queue/StorageS3Queue.h
+++ b/src/Storages/S3Queue/StorageS3Queue.h
@@ -39,10 +39,11 @@ public:
 
     String getName() const override { return "S3Queue"; }
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
-        SelectQueryInfo & query_info,
+        SelectQueryInfo & /*query_info*/,
         ContextPtr context,
         QueryProcessingStage::Enum processed_stage,
         size_t max_block_size,
@@ -57,6 +58,7 @@ public:
     zkutil::ZooKeeperPtr getZooKeeper() const;
 
 private:
+    friend class ReadFromS3Queue;
     using FileIterator = StorageS3QueueSource::FileIterator;
 
     const std::unique_ptr<S3QueueSettings> s3queue_settings;
@@ -85,11 +87,10 @@ private:
     bool supportsSubsetOfColumns(const ContextPtr & context_) const;
     bool supportsSubcolumns() const override { return true; }
 
-    std::shared_ptr<FileIterator> createFileIterator(ContextPtr local_context, ASTPtr query);
+    std::shared_ptr<FileIterator> createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate);
     std::shared_ptr<StorageS3QueueSource> createSource(
+        const ReadFromFormatInfo & info,
         std::shared_ptr<StorageS3Queue::FileIterator> file_iterator,
-        const Names & column_names,
-        const StorageSnapshotPtr & storage_snapshot,
         size_t max_block_size,
         ContextPtr local_context);
 
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index b040f452410..e4619d64ae3 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -960,8 +960,8 @@ StorageFileSource::FilesIterator::FilesIterator(
     : files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_), context(context_)
 {
     ActionsDAGPtr filter_dag;
-    if (!distributed_processing && !archive_info && !files.empty() && !files[0].empty())
-        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns, files[0]);
+    if (!distributed_processing && !archive_info && !files.empty())
+        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
 
     if (filter_dag)
         VirtualColumnUtils::filterByPathOrFile(files, files, filter_dag, virtual_columns, context_);
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 096e2e88f91..780a2755bcf 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -159,6 +159,8 @@ public:
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
     {
+        query_configuration = storage.updateConfigurationAndGetCopy(local_context);
+        virtual_columns = storage.getVirtuals();
     }
 
 private:
@@ -166,10 +168,17 @@ private:
     StorageSnapshotPtr storage_snapshot;
     StorageS3 & storage;
     SelectQueryInfo query_info;
+    StorageS3::Configuration query_configuration;
+    NamesAndTypesList virtual_columns;
+
     ContextPtr local_context;
 
     size_t max_block_size;
     size_t num_streams;
+
+    std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper;
+
+    void createIterator(const ActionsDAG::Node * predicate);
 };
 
 
@@ -231,24 +240,14 @@ static std::vector<String> filterKeysForPartitionPruning(
     const std::vector<String> & keys,
     const String & bucket,
     const NamesAndTypesList & virtual_columns,
-    const std::vector<ActionsDAGPtr> & filter_dags,
+    const ActionsDAG::Node * predicate,
     ContextPtr context)
 {
     std::unordered_set<String> result_keys(keys.begin(), keys.end());
-    for (const auto & filter_dag : filter_dags)
-    {
-        if (result_keys.empty())
-            break;
 
-        auto block = getBlockWithVirtuals(virtual_columns, bucket, result_keys);
-
-        auto filter_actions = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), block);
-        if (!filter_actions)
-            continue;
-        VirtualColumnUtils::filterBlockWithDAG(filter_actions, block, context);
-
-        result_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, "_key");
-    }
+    auto block = getBlockWithVirtuals(virtual_columns, bucket, result_keys);
+    VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context);
+    result_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, "_key");
 
     LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Applied partition pruning {} from {} keys left", result_keys.size(), keys.size());
     return std::vector<String>(result_keys.begin(), result_keys.end());
@@ -309,6 +308,57 @@ public:
         fillInternalBufferAssumeLocked();
     }
 
+    Impl(
+        const S3::Client & client_,
+        const S3::URI & globbed_uri_,
+        const ActionsDAG::Node * predicate,
+        const NamesAndTypesList & virtual_columns_,
+        ContextPtr context_,
+        KeysWithInfo * read_keys_,
+        const S3Settings::RequestSettings & request_settings_,
+        std::function<void(FileProgress)> file_progress_callback_)
+        : WithContext(context_)
+        , client(client_.clone())
+        , globbed_uri(globbed_uri_)
+        , virtual_columns(virtual_columns_)
+        , read_keys(read_keys_)
+        , request_settings(request_settings_)
+        , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
+        , list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
+        , file_progress_callback(file_progress_callback_)
+    {
+        if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
+            throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name");
+
+        const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{"));
+
+        /// We don't have to list bucket, because there is no asterisks.
+        if (key_prefix.size() == globbed_uri.key.size())
+        {
+            buffer.emplace_back(std::make_shared<KeyWithInfo>(globbed_uri.key, std::nullopt));
+            buffer_iter = buffer.begin();
+            is_finished = true;
+            return;
+        }
+
+        request.SetBucket(globbed_uri.bucket);
+        request.SetPrefix(key_prefix);
+        request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
+
+        outcome_future = listObjectsAsync();
+
+        matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
+        if (!matcher->ok())
+            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
+                "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
+
+        recursive = globbed_uri.key == "/**" ? true : false;
+        fillInternalBufferAssumeLocked();
+
+        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+        is_initialized = true;
+    }
+
     KeyWithInfoPtr next()
     {
         std::lock_guard lock(mutex);
@@ -439,6 +489,15 @@ private:
 
             VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, query, virtual_columns, getContext(), filter_ast);
         }
+        else if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(temp_buffer.size());
+            for (const auto & key_with_info : temp_buffer)
+                paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key);
+
+            VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext());
+        }
 
         buffer = std::move(temp_buffer);
 
@@ -481,6 +540,7 @@ private:
     NamesAndTypesList virtual_columns;
     bool is_initialized{false};
     ASTPtr filter_ast;
+    ActionsDAGPtr filter_dag;
     std::unique_ptr<re2::RE2> matcher;
     bool recursive{false};
     bool is_finished{false};
@@ -508,6 +568,19 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
 {
 }
 
+StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
+    const S3::Client & client_,
+    const S3::URI & globbed_uri_,
+    const ActionsDAG::Node * predicate,
+    const NamesAndTypesList & virtual_columns_,
+    ContextPtr context,
+    KeysWithInfo * read_keys_,
+    const S3Settings::RequestSettings & request_settings_,
+    std::function<void(FileProgress)> file_progress_callback_)
+    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
+{
+}
+
 StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next()
 {
     return pimpl->next();
@@ -646,8 +719,7 @@ StorageS3Source::StorageS3Source(
     const String & url_host_and_port_,
     std::shared_ptr<IIterator> file_iterator_,
     const size_t max_parsing_threads_,
-    bool need_only_count_,
-    std::optional<SelectQueryInfo> query_info_)
+    bool need_only_count_)
     : SourceWithKeyCondition(info.source_header, false)
     , WithContext(context_)
     , name(std::move(name_))
@@ -663,7 +735,6 @@ StorageS3Source::StorageS3Source(
     , client(client_)
     , sample_block(info.format_header)
     , format_settings(format_settings_)
-    , query_info(std::move(query_info_))
     , requested_virtual_columns(info.requested_virtual_columns)
     , file_iterator(file_iterator_)
     , max_parsing_threads(max_parsing_threads_)
@@ -1151,8 +1222,7 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     const StorageS3::Configuration & configuration,
     bool distributed_processing,
     ContextPtr local_context,
-    ASTPtr query,
-    const std::vector<ActionsDAGPtr> & filter_dags,
+    const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns,
     StorageS3::KeysWithInfo * read_keys = nullptr,
     std::function<void(FileProgress)> file_progress_callback = {})
@@ -1165,12 +1235,12 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     {
         /// Iterate through disclosed globs and make a source for each file
         return std::make_shared<StorageS3Source::DisclosedGlobIterator>(
-            *configuration.client, configuration.url, query, virtual_columns,
+            *configuration.client, configuration.url, predicate, virtual_columns,
             local_context, read_keys, configuration.request_settings, file_progress_callback);
     }
     else
     {
-        Strings keys = filterKeysForPartitionPruning(configuration.keys, configuration.url.bucket, virtual_columns, filter_dags, local_context);
+        Strings keys = filterKeysForPartitionPruning(configuration.keys, configuration.url.bucket, virtual_columns, predicate, local_context);
         return std::make_shared<StorageS3Source::KeysIterator>(
             *configuration.client, configuration.url.version_id, keys,
             configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback);
@@ -1217,19 +1287,34 @@ void StorageS3::read(
     query_plan.addStep(std::move(reading));
 }
 
+void ReadFromStorageS3Step::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, local_context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (iterator_wrapper)
+        return;
+
+    iterator_wrapper = createFileIterator(
+        query_configuration, storage.distributed_processing, local_context, predicate,
+        virtual_columns, nullptr, local_context->getFileProgressCallback());
+}
+
 void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    auto query_configuration = storage.updateConfigurationAndGetCopy(local_context);
-
     if (storage.partition_by && query_configuration.withWildcard())
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet");
 
-    auto virtual_columns = storage.getVirtuals();
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, storage.supportsSubsetOfColumns(local_context), virtual_columns);
+    createIterator(nullptr);
 
-    std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
-        query_configuration, storage.distributed_processing, local_context, query_info.query, filter_dags,
-        virtual_columns, nullptr, local_context->getFileProgressCallback());
+    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, storage.supportsSubsetOfColumns(local_context), virtual_columns);
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
     if (estimated_keys_count > 1)
@@ -1264,19 +1349,12 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
             query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()),
             iterator_wrapper,
             max_parsing_threads,
-            need_only_count,
-            query_info));
+            need_only_count));
     }
 
     pipeline.init(Pipe::unitePipes(std::move(pipes)));
 }
 
-
-void ReadFromStorageS3Step::applyFilters()
-{
-    /// We will use filter_dags in filterKeysForPartitionPruning called from initializePipeline, nothing to do here
-}
-
 SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
 {
     auto query_configuration = updateConfigurationAndGetCopy(local_context);
@@ -1853,7 +1931,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(
 {
     KeysWithInfo read_keys;
 
-    auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, {}, &read_keys);
+    auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys);
 
     ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format_settings, ctx);
     return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx);
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index 07d965d8bb3..dd7e0edb2d9 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -85,6 +85,16 @@ public:
             const S3Settings::RequestSettings & request_settings_ = {},
             std::function<void(FileProgress)> progress_callback_ = {});
 
+        DisclosedGlobIterator(
+            const S3::Client & client_,
+            const S3::URI & globbed_uri_,
+            const ActionsDAG::Node * predicate,
+            const NamesAndTypesList & virtual_columns,
+            ContextPtr context,
+            KeysWithInfo * read_keys_ = nullptr,
+            const S3Settings::RequestSettings & request_settings_ = {},
+            std::function<void(FileProgress)> progress_callback_ = {});
+
         KeyWithInfoPtr next() override;
         size_t estimatedKeysCount() override;
 
@@ -145,8 +155,7 @@ public:
         const String & url_host_and_port,
         std::shared_ptr<IIterator> file_iterator_,
         size_t max_parsing_threads,
-        bool need_only_count_,
-        std::optional<SelectQueryInfo> query_info);
+        bool need_only_count_);
 
     ~StorageS3Source() override;
 
@@ -180,7 +189,6 @@ private:
     std::shared_ptr<const S3::Client> client;
     Block sample_block;
     std::optional<FormatSettings> format_settings;
-    std::optional<SelectQueryInfo> query_info;
 
     struct ReaderHolder
     {
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 7690e160255..b63b4e7cca7 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -390,7 +390,7 @@ static void addPathAndFileToVirtualColumns(Block & block, const String & path, s
     block.getByName("_idx").column->assumeMutableRef().insert(idx);
 }
 
-ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path_example)
+ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns)
 {
     if (!predicate || virtual_columns.empty())
         return {};
@@ -401,10 +401,8 @@ ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, con
         if (column.name == "_file" || column.name == "_path")
             block.insert({column.type->createColumn(), column.type, column.name});
     }
-    /// Create a block with one row to construct filter
-    /// Append "idx" column as the filter result
+
     block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
-    addPathAndFileToVirtualColumns(block, path_example, 0);
     return splitFilterDagForAllowedInputs(predicate, block);
 }
 
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index 4f9636b4213..6e1af0995cc 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -77,7 +77,7 @@ void filterByPathOrFile(std::vector<T> & sources, const std::vector<String> & pa
     sources = std::move(filtered_sources);
 }
 
-ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const String & path_example);
+ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns);
 
 ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
 

From 47c3696a46a19674e04c6a1099e0a1429b90933e Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Fri, 29 Dec 2023 20:41:33 +0100
Subject: [PATCH 042/105] Fix build

---
 src/Interpreters/DDLTask.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index e7796c5d3a5..85bf6fec655 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -221,7 +221,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
     std::exception_ptr first_exception = nullptr;
 
     const auto maybe_secure_port = global_context->getTCPPortSecure();
-    const auto port = global_context->getTCPPort()
+    const auto port = global_context->getTCPPort();
 
     if (config_host_name)
     {

From a4ac45f2ccc4a737f20dfe5a97f8b2085f4b6a24 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 30 Dec 2023 10:56:55 +0100
Subject: [PATCH 043/105] Fix 02943_rmt_alter_metadata_merge_checksum_mismatch
 flakiness

Disable keeper injection to make part names static, unlike here [1]:

    azat@s1:~/ch/tmp/57755 [1] {elapsed: 301s}$ zstd -cdq clickhouse-server.log.zst | grep c287beae-b56e-4193-b4c2-812ca5c52919
    2023.12.30 03:20:15.984668 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> executeQuery: (from [::1]:34846) (comment: 02943_rmt_alter_metadata_merge_checksum_mismatch.sh) insert into data_r2 (key) values  (stage: Complete)
    2023.12.30 03:20:15.987023 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> ContextAccess (default): Access granted: INSERT(key) ON test_y82swg5w.data_r2
    2023.12.30 03:20:16.007771 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c): Trying to reserve 1.00 MiB using storage policy from min volume index 0
    2023.12.30 03:20:16.008769 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> DiskLocal: Reserved 1.00 MiB on local disk `default`, having unreserved 94.58 GiB.
    2023.12.30 03:20:16.034847 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> MergedBlockOutputStream: filled checksums all_1_1_0 (state Temporary)
    2023.12.30 03:20:16.037815 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c) (Replicated OutputStream): Wrote block with ID 'all_16201685294980115408_4608068419994166055', 1 rows
    2023.12.30 03:20:16.144259 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c) (Replicated OutputStream): ZooKeeperWithFaultInjection call FAILED: seed=8123498043031264807 func=tryMulti path=/clickhouse/tables/test_y82swg5w/data/blocks/all_16201685294980115408_4608068419994166055 code=Operation timeout message=Fault injection after operation
    2023.12.30 03:20:16.148988 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c) (Replicated OutputStream): ZooKeeperRetriesControl: commitPart: setKeeperError: error=Operation timeout message=Fault injection after operation
    2023.12.30 03:20:16.149232 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c) (Replicated OutputStream): ZooKeeperRetriesControl: commitPart: will retry due to error: retry_count=1/100 timeout=1ms error=Operation timeout message=Fault injection after operation
    2023.12.30 03:20:16.155148 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Trace> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c): Renaming temporary part tmp_insert_all_1_1_0 to all_3_3_0 with tid (1, 1, 00000000-0000-0000-0000-000000000000).
    2023.12.30 03:20:16.161514 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> test_y82swg5w.data_r2 (fcf801a4-4edd-4209-b52a-4400eb4c4a4c) (Replicated OutputStream): ZooKeeperRetriesControl: commitPart: succeeded after: Iterations=2 Total keeper failures=1/100
    2023.12.30 03:20:16.166390 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> executeQuery: Read 1 rows, 4.00 B in 0.181492 sec., 5.509884733211382 rows/sec., 22.04 B/sec.
    2023.12.30 03:20:16.166950 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> MemoryTracker: Peak memory usage (for query): 3.13 MiB.
    2023.12.30 03:20:16.168314 [ 1956 ] {c287beae-b56e-4193-b4c2-812ca5c52919} <Debug> TCPHandler: Processed in 0.186244473 sec.

  [1]: https://s3.amazonaws.com/clickhouse-test-reports/57755/a12df35be4c6954e683dbea53c00599ca6a96d5d/stateless_tests_flaky_check__asan_.html

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../02943_rmt_alter_metadata_merge_checksum_mismatch.sh       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh
index 20cffcd9f65..431f59d7918 100755
--- a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh
+++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh
@@ -34,7 +34,7 @@ function restore_failpoints()
 }
 trap restore_failpoints EXIT
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q "
     drop table if exists data_r1;
     drop table if exists data_r2;
 
@@ -80,7 +80,7 @@ fi
 # This will create MERGE_PARTS, on failed replica it will be fetched from source replica (since it does not have all parts to execute merge)
 $CLICKHOUSE_CLIENT -q "optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1" # part all_0_0_1_1
 
-$CLICKHOUSE_CLIENT -nm -q "
+$CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q "
     insert into $success_replica (key) values (2); -- part all_2_2_0
     optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1; -- part all_0_2_2_1
     system sync replica $failed_replica pull;

From 6e2c4f04aaa17a70c37279461be84382e3c8970d Mon Sep 17 00:00:00 2001
From: Bharat Nallan Chakravarthy <bharatnc@gmail.com>
Date: Mon, 1 Jan 2024 21:31:22 -0800
Subject: [PATCH 044/105] support hints for database engine

---
 src/Databases/DatabaseFactory.cpp | 11 +++++++++--
 src/Databases/DatabaseFactory.h   | 11 ++++++++++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp
index 2c2e4030821..fc8073eac3b 100644
--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@@ -92,9 +92,16 @@ void validate(const ASTCreateQuery & create_query)
 
 DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context)
 {
+    const auto engine_name = create.storage->engine->name;
     /// check if the database engine is a valid one before proceeding
-    if (!database_engines.contains(create.storage->engine->name))
-        throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", create.storage->engine->name);
+    if (!database_engines.contains(engine_name))
+    {
+        auto hints = getHints(engine_name);
+        if (!hints.empty())
+            throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine {}. Maybe you meant: {}", engine_name, toString(hints));
+        else
+            throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Unknown database engine: {}", create.storage->engine->name);
+    }
 
     /// if the engine is found (i.e. registered with the factory instance), then validate if the
     /// supplied engine arguments, settings and table overrides are valid for the engine.
diff --git a/src/Databases/DatabaseFactory.h b/src/Databases/DatabaseFactory.h
index c86eaddb29d..6b92963f46e 100644
--- a/src/Databases/DatabaseFactory.h
+++ b/src/Databases/DatabaseFactory.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <Common/NamePrompter.h>
 #include <Interpreters/Context_fwd.h>
 #include <Databases/IDatabase.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -24,7 +25,7 @@ static inline ValueType safeGetLiteralValue(const ASTPtr &ast, const String &eng
     return ast->as<ASTLiteral>()->value.safeGet<ValueType>();
 }
 
-class DatabaseFactory : private boost::noncopyable
+class DatabaseFactory : private boost::noncopyable, public IHints<>
 {
 public:
 
@@ -52,6 +53,14 @@ public:
 
     const DatabaseEngines & getDatabaseEngines() const { return database_engines; }
 
+    std::vector<String> getAllRegisteredNames() const override
+    {
+        std::vector<String> result;
+        auto getter = [](const auto & pair) { return pair.first; };
+        std::transform(database_engines.begin(), database_engines.end(), std::back_inserter(result), getter);
+        return result;
+    }
+
 private:
     DatabaseEngines database_engines;
 

From b7cc6d4615c6ce47c695962747044fb1e49c099b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 13:08:04 +0000
Subject: [PATCH 045/105] Fixing tests.

---
 src/Storages/StorageFile.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index e4619d64ae3..12a8eed106e 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1517,7 +1517,16 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
             need_only_count));
     }
 
-    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    size_t output_ports = pipe.numOutputPorts();
+    const bool parallelize_output = context->getSettingsRef().parallelize_output_from_storages;
+    if (parallelize_output && storage->parallelizeOutputAfterReading(context) && output_ports > 0 && output_ports < max_num_streams)
+        pipe.resize(max_num_streams);
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 

From 0f76967f9755d3b15eb530a1a6e2dc00e653b9d9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 13:45:41 +0000
Subject: [PATCH 046/105] Add reading step to Azure.

---
 src/Storages/StorageAzureBlob.cpp | 207 ++++++++++++++++++++++++------
 src/Storages/StorageAzureBlob.h   |  24 +++-
 2 files changed, 188 insertions(+), 43 deletions(-)

diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 9564bad485c..048248ef334 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1,4 +1,6 @@
 #include <Storages/StorageAzureBlob.h>
+#include "Processors/QueryPlan/QueryPlan.h"
+#include "Processors/QueryPlan/SourceStepWithFilter.h"
 
 
 #if USE_AZURE_BLOB_STORAGE
@@ -666,7 +668,58 @@ private:
 
 }
 
-Pipe StorageAzureBlob::read(
+class ReadFromAzureBlob : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromAzureBlob"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromAzureBlob(
+        Block sample_block,
+        std::shared_ptr<StorageAzureBlob> storage_,
+        ReadFromFormatInfo info_,
+        const bool need_only_count_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , storage(std::move(storage_))
+        , info(std::move(info_))
+        , need_only_count(need_only_count_)
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+    {
+    }
+
+private:
+    std::shared_ptr<StorageAzureBlob> storage;
+    ReadFromFormatInfo info;
+    const bool need_only_count;
+
+    ContextPtr context;
+
+    size_t max_block_size;
+    const size_t num_streams;
+
+    std::shared_ptr<StorageAzureBlobSource::IIterator> iterator_wrapper;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+void ReadFromAzureBlob::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void StorageAzureBlob::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -678,51 +731,76 @@ Pipe StorageAzureBlob::read(
     if (partition_by && configuration.withWildcard())
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned Azure storage is not implemented yet");
 
-    Pipes pipes;
-
-    std::shared_ptr<StorageAzureBlobSource::IIterator> iterator_wrapper;
-    if (distributed_processing)
-    {
-        iterator_wrapper = std::make_shared<StorageAzureBlobSource::ReadIterator>(local_context,
-            local_context->getReadTaskCallback());
-    }
-    else if (configuration.withGlobs())
-    {
-        /// Iterate through disclosed globs and make a source for each file
-        iterator_wrapper = std::make_shared<StorageAzureBlobSource::GlobIterator>(
-            object_storage.get(), configuration.container, configuration.blob_path,
-            query_info.query, virtual_columns, local_context, nullptr, local_context->getFileProgressCallback());
-    }
-    else
-    {
-        iterator_wrapper = std::make_shared<StorageAzureBlobSource::KeysIterator>(
-            object_storage.get(), configuration.container, configuration.blobs_paths,
-            query_info.query, virtual_columns, local_context, nullptr, local_context->getFileProgressCallback());
-    }
+    auto this_ptr = std::static_pointer_cast<StorageAzureBlob>(shared_from_this());
 
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
     bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
         && local_context->getSettingsRef().optimize_count_from_files;
 
+    auto reading = std::make_unique<ReadFromAzureBlob>(
+        read_from_format_info.source_header,
+        std::move(this_ptr),
+        std::move(read_from_format_info),
+        need_only_count,
+        local_context,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (iterator_wrapper)
+        return;
+
+    const auto & configuration = storage->configuration;
+
+    if (storage->distributed_processing)
+    {
+        iterator_wrapper = std::make_shared<StorageAzureBlobSource::ReadIterator>(context,
+            context->getReadTaskCallback());
+    }
+    else if (configuration.withGlobs())
+    {
+        /// Iterate through disclosed globs and make a source for each file
+        iterator_wrapper = std::make_shared<StorageAzureBlobSource::GlobIterator>(
+            storage->object_storage.get(), configuration.container, configuration.blob_path,
+            predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback());
+    }
+    else
+    {
+        iterator_wrapper = std::make_shared<StorageAzureBlobSource::KeysIterator>(
+            storage->object_storage.get(), configuration.container, configuration.blobs_paths,
+            predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback());
+    }
+}
+
+void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    createIterator(nullptr);
+
+    const auto & configuration = storage->configuration;
+    Pipes pipes;
+
     for (size_t i = 0; i < num_streams; ++i)
     {
         pipes.emplace_back(std::make_shared<StorageAzureBlobSource>(
-            read_from_format_info,
+            info,
             configuration.format,
             getName(),
-            local_context,
-            format_settings,
+            context,
+            storage->format_settings,
             max_block_size,
             configuration.compression_method,
-            object_storage.get(),
+            storage->object_storage.get(),
             configuration.container,
             configuration.connection_url,
             iterator_wrapper,
-            need_only_count,
-            query_info));
+            need_only_count));
     }
 
-    return Pipe::unitePipes(std::move(pipes));
+    pipeline.init(Pipe::unitePipes(std::move(pipes)));
 }
 
 SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
@@ -872,6 +950,55 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
     recursive = blob_path_with_globs == "/**" ? true : false;
 }
 
+StorageAzureBlobSource::GlobIterator::GlobIterator(
+    AzureObjectStorage * object_storage_,
+    const std::string & container_,
+    String blob_path_with_globs_,
+    const ActionsDAG::Node * predicate,
+    const NamesAndTypesList & virtual_columns_,
+    ContextPtr context_,
+    RelativePathsWithMetadata * outer_blobs_,
+    std::function<void(FileProgress)> file_progress_callback_)
+    : IIterator(context_)
+    , object_storage(object_storage_)
+    , container(container_)
+    , blob_path_with_globs(blob_path_with_globs_)
+    , virtual_columns(virtual_columns_)
+    , outer_blobs(outer_blobs_)
+    , file_progress_callback(file_progress_callback_)
+{
+
+    const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{"));
+
+    /// We don't have to list bucket, because there is no asterisks.
+    if (key_prefix.size() == blob_path_with_globs.size())
+    {
+        auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs);
+        blobs_with_metadata.emplace_back(
+            blob_path_with_globs,
+            object_metadata);
+        if (outer_blobs)
+            outer_blobs->emplace_back(blobs_with_metadata.back());
+        if (file_progress_callback)
+            file_progress_callback(FileProgress(0, object_metadata.size_bytes));
+        is_finished = true;
+        return;
+    }
+
+    object_storage_iterator = object_storage->iterate(key_prefix);
+
+    matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(blob_path_with_globs));
+
+    if (!matcher->ok())
+        throw Exception(
+            ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error());
+
+    recursive = blob_path_with_globs == "/**" ? true : false;
+
+    filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+    is_initialized = true;
+}
+
 RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
 {
     std::lock_guard lock(next_mutex);
@@ -924,6 +1051,15 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
 
             VirtualColumnUtils::filterByPathOrFile(new_batch, paths, query, virtual_columns, getContext(), filter_ast);
         }
+        else if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(new_batch.size());
+            for (auto & path_with_metadata : new_batch)
+                paths.push_back(fs::path(container) / path_with_metadata.relative_path);
+
+            VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext());
+        }
 
         if (outer_blobs)
             outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end());
@@ -948,7 +1084,7 @@ StorageAzureBlobSource::KeysIterator::KeysIterator(
     AzureObjectStorage * object_storage_,
     const std::string & container_,
     const Strings & keys_,
-    ASTPtr query_,
+    const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns_,
     ContextPtr context_,
     RelativePathsWithMetadata * outer_blobs,
@@ -956,23 +1092,22 @@ StorageAzureBlobSource::KeysIterator::KeysIterator(
     : IIterator(context_)
     , object_storage(object_storage_)
     , container(container_)
-    , query(query_)
     , virtual_columns(virtual_columns_)
 {
     Strings all_keys = keys_;
 
     ASTPtr filter_ast;
     if (!all_keys.empty())
-        filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(container) / all_keys[0], getContext());
+        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
 
-    if (filter_ast)
+    if (filter_dag)
     {
         Strings paths;
         paths.reserve(all_keys.size());
         for (const auto & key : all_keys)
             paths.push_back(fs::path(container) / key);
 
-        VirtualColumnUtils::filterByPathOrFile(all_keys, paths, query, virtual_columns, getContext(), filter_ast);
+        VirtualColumnUtils::filterByPathOrFile(all_keys, paths, filter_dag, virtual_columns, getContext());
     }
 
     for (auto && key : all_keys)
@@ -1078,8 +1213,7 @@ StorageAzureBlobSource::StorageAzureBlobSource(
     const String & container_,
     const String & connection_url_,
     std::shared_ptr<IIterator> file_iterator_,
-    bool need_only_count_,
-    const SelectQueryInfo & query_info_)
+    bool need_only_count_)
     :ISource(info.source_header, false)
     , WithContext(context_)
     , requested_columns(info.requested_columns)
@@ -1096,7 +1230,6 @@ StorageAzureBlobSource::StorageAzureBlobSource(
     , connection_url(connection_url_)
     , file_iterator(file_iterator_)
     , need_only_count(need_only_count_)
-    , query_info(query_info_)
     , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1)
     , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "AzureReader"))
 {
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index bf4f6f37efe..30b91b7f85a 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -88,7 +88,8 @@ public:
         return name;
     }
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names &,
         const StorageSnapshotPtr &,
         SelectQueryInfo &,
@@ -126,6 +127,8 @@ public:
         bool distributed_processing = false);
 
 private:
+    friend class ReadFromAzureBlob;
+
     std::string name;
     Configuration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
@@ -162,6 +165,16 @@ public:
             RelativePathsWithMetadata * outer_blobs_,
             std::function<void(FileProgress)> file_progress_callback_ = {});
 
+        GlobIterator(
+            AzureObjectStorage * object_storage_,
+            const std::string & container_,
+            String blob_path_with_globs_,
+            const ActionsDAG::Node * predicate,
+            const NamesAndTypesList & virtual_columns_,
+            ContextPtr context_,
+            RelativePathsWithMetadata * outer_blobs_,
+            std::function<void(FileProgress)> file_progress_callback_ = {});
+
         RelativePathWithMetadata next() override;
         ~GlobIterator() override = default;
 
@@ -171,6 +184,7 @@ public:
         String blob_path_with_globs;
         ASTPtr query;
         ASTPtr filter_ast;
+        ActionsDAGPtr filter_dag;
         NamesAndTypesList virtual_columns;
 
         size_t index = 0;
@@ -212,7 +226,7 @@ public:
             AzureObjectStorage * object_storage_,
             const std::string & container_,
             const Strings & keys_,
-            ASTPtr query_,
+            const ActionsDAG::Node * predicate,
             const NamesAndTypesList & virtual_columns_,
             ContextPtr context_,
             RelativePathsWithMetadata * outer_blobs,
@@ -226,7 +240,7 @@ public:
         std::string container;
         RelativePathsWithMetadata keys;
 
-        ASTPtr query;
+        ActionsDAGPtr filter_dag;
         NamesAndTypesList virtual_columns;
 
         std::atomic<size_t> index = 0;
@@ -244,8 +258,7 @@ public:
         const String & container_,
         const String & connection_url_,
         std::shared_ptr<IIterator> file_iterator_,
-        bool need_only_count_,
-        const SelectQueryInfo & query_info_);
+        bool need_only_count_);
     ~StorageAzureBlobSource() override;
 
     Chunk generate() override;
@@ -271,7 +284,6 @@ private:
     std::shared_ptr<IIterator> file_iterator;
     bool need_only_count;
     size_t total_rows_in_file = 0;
-    SelectQueryInfo query_info;
 
     struct ReaderHolder
     {

From d3d5976d3e93a9fa7f14462ce84a1136e3437fee Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 2 Jan 2024 15:13:25 +0100
Subject: [PATCH 047/105] fix

---
 src/Interpreters/executeDDLQueryOnCluster.cpp               | 3 ++-
 .../0_stateless/02447_drop_database_replica.reference       | 6 ++++--
 tests/queries/0_stateless/02447_drop_database_replica.sh    | 6 ++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp
index ba7638cd83f..6b6054fdae3 100644
--- a/src/Interpreters/executeDDLQueryOnCluster.cpp
+++ b/src/Interpreters/executeDDLQueryOnCluster.cpp
@@ -312,7 +312,8 @@ DDLQueryStatusSource::DDLQueryStatusSource(
     , log(&Poco::Logger::get("DDLQueryStatusSource"))
 {
     auto output_mode = context->getSettingsRef().distributed_ddl_output_mode;
-    throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::NONE;
+    throw_on_timeout = output_mode == DistributedDDLOutputMode::THROW || output_mode == DistributedDDLOutputMode::THROW_ONLY_ACTIVE
+        || output_mode == DistributedDDLOutputMode::NONE;
 
     if (hosts_to_wait)
     {
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference
index 8ad9008057f..7be5dde1998 100644
--- a/tests/queries/0_stateless/02447_drop_database_replica.reference
+++ b/tests/queries/0_stateless/02447_drop_database_replica.reference
@@ -13,11 +13,12 @@ t
 rdb_default	1	1	s1	r1	1
 2
 s1	r1	OK	2	0
-s2	r1	QUEUED	2	0
 s1	r2	QUEUED	2	0
+s2	r1	QUEUED	2	0
+2
 s1	r1	OK	2	0
-s2	r1	QUEUED	2	0
 s1	r2	QUEUED	2	0
+s2	r1	QUEUED	2	0
 2
 rdb_default	1	1	s1	r1	1
 rdb_default	1	2	s1	r2	0
@@ -26,4 +27,5 @@ rdb_default	1	2	s1	r2	0
 t
 t2
 t3
+t4
 rdb_default_4	1	1	s1	r1	1
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh
index 388af3fad74..d12f173f388 100755
--- a/tests/queries/0_stateless/02447_drop_database_replica.sh
+++ b/tests/queries/0_stateless/02447_drop_database_replica.sh
@@ -32,8 +32,10 @@ $CLICKHOUSE_CLIENT -q "system sync database replica $db"
 $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db' and shard_num=1 and replica_num=1"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it"
 
-$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log"
-$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t3 (n int) engine=Log"
+# Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas
+timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log" 2>/dev/null | sort
+timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED"
+timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort
 
 $CLICKHOUSE_CLIENT -q "detach database $db3"
 $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db"

From 3e3fed1cbe2b6b67c02a852164653a8b241c672a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 15:18:13 +0000
Subject: [PATCH 048/105] Add reading step to URL

---
 src/Storages/HDFS/StorageHDFS.cpp       |  14 +-
 src/Storages/S3Queue/StorageS3Queue.cpp |  19 +-
 src/Storages/StorageAzureBlob.cpp       |  10 +-
 src/Storages/StorageFile.cpp            |   5 +
 src/Storages/StorageS3.cpp              |  10 +-
 src/Storages/StorageURL.cpp             | 285 +++++++++++++++++++-----
 src/Storages/StorageURL.h               |  10 +-
 src/Storages/StorageXDBC.cpp            |   5 +-
 src/Storages/StorageXDBC.h              |   3 +-
 9 files changed, 280 insertions(+), 81 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index fe37b2eb57a..c7cbaa1e561 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -1,3 +1,4 @@
+#include "Processors/Sources/NullSource.h"
 #include "config.h"
 
 #if USE_HDFS
@@ -1014,10 +1015,17 @@ void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
             context,
             max_block_size,
             iterator_wrapper,
-            need_only_count)); //,
-            //query_info));
+            need_only_count));
     }
-    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/)
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 1a6666c00d0..6d078e1aa1b 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -1,5 +1,6 @@
 #include "Processors/QueryPlan/QueryPlan.h"
 #include "Processors/QueryPlan/SourceStepWithFilter.h"
+#include "Processors/Sources/NullSource.h"
 #include "QueryPipeline/QueryPipelineBuilder.h"
 #include "config.h"
 
@@ -218,16 +219,12 @@ public:
         Block sample_block,
         ReadFromFormatInfo info_,
         std::shared_ptr<StorageS3Queue> storage_,
-        // StorageSnapshotPtr storage_snapshot_,
-        // Names column_names_,
         ContextPtr context_,
         size_t max_block_size_,
         size_t num_streams_)
         : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
         , info(std::move(info_))
         , storage(std::move(storage_))
-        // , storage_snapshot(std::move(storage_snapshot_))
-        // , column_names(std::move(column_names_))
         , context(std::move(context_))
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
@@ -237,8 +234,6 @@ public:
 private:
     ReadFromFormatInfo info;
     std::shared_ptr<StorageS3Queue> storage;
-    // StorageSnapshotPtr storage_snapshot;
-    // Names column_names;
     ContextPtr context;
     size_t max_block_size;
     size_t num_streams;
@@ -296,8 +291,6 @@ void StorageS3Queue::read(
         read_from_format_info.source_header,
         read_from_format_info,
         std::move(this_ptr),
-        // storage_snapshot,
-        // column_names,
         local_context,
         max_block_size,
         num_streams);
@@ -313,7 +306,15 @@ void ReadFromS3Queue::initializePipeline(QueryPipelineBuilder & pipeline, const
     createIterator(nullptr);
     for (size_t i = 0; i < adjusted_num_streams; ++i)
         pipes.emplace_back(storage->createSource(info, iterator, max_block_size, context));
-    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 048248ef334..defff830411 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1,6 +1,7 @@
 #include <Storages/StorageAzureBlob.h>
 #include "Processors/QueryPlan/QueryPlan.h"
 #include "Processors/QueryPlan/SourceStepWithFilter.h"
+#include "Processors/Sources/NullSource.h"
 
 
 #if USE_AZURE_BLOB_STORAGE
@@ -800,7 +801,14 @@ void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, cons
             need_only_count));
     }
 
-    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 12a8eed106e..18acbfc7153 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1477,6 +1477,8 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate)
 
 void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
+    createIterator(nullptr);
+
     size_t num_streams = max_num_streams;
 
     size_t files_to_read = 0;
@@ -1523,6 +1525,9 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
     if (parallelize_output && storage->parallelizeOutputAfterReading(context) && output_ports > 0 && output_ports < max_num_streams)
         pipe.resize(max_num_streams);
 
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
     for (const auto & processor : pipe.getProcessors())
         processors.emplace_back(processor);
 
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 780a2755bcf..375a367bfab 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1,3 +1,4 @@
+#include "Processors/Sources/NullSource.h"
 #include "config.h"
 #include <Common/ProfileEvents.h>
 #include "Parsers/ASTCreateQuery.h"
@@ -1352,7 +1353,14 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
             need_only_count));
     }
 
-    pipeline.init(Pipe::unitePipes(std::move(pipes)));
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(read_from_format_info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index d6b6f5af61c..3f88966e3d3 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -34,6 +34,8 @@
 #include <Common/ProfileEvents.h>
 #include <Common/thread_local_rng.h>
 #include <Common/logger_useful.h>
+#include "Processors/QueryPlan/QueryPlan.h"
+#include "Processors/QueryPlan/SourceStepWithFilter.h"
 #include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/HTTPHeaderEntries.h>
 
@@ -201,6 +203,25 @@ public:
         }
     }
 
+    Impl(const String & uri_, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+    {
+        uris = parseRemoteDescription(uri_, 0, uri_.size(), ',', max_addresses);
+
+        ActionsDAGPtr filter_dag;
+        if (!uris.empty())
+            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+
+        if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(uris.size());
+            for (const auto & uri : uris)
+                paths.push_back(Poco::URI(uri).getPath());
+
+            VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context);
+        }
+    }
+
     String next()
     {
         size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
@@ -223,6 +244,9 @@ private:
 StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     : pimpl(std::make_shared<StorageURLSource::DisclosedGlobIterator::Impl>(uri, max_addresses, query, virtual_columns, context)) {}
 
+StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
+    : pimpl(std::make_shared<StorageURLSource::DisclosedGlobIterator::Impl>(uri, max_addresses, predicate, virtual_columns, context)) {}
+
 String StorageURLSource::DisclosedGlobIterator::next()
 {
     return pimpl->next();
@@ -260,7 +284,6 @@ StorageURLSource::StorageURLSource(
     const ConnectionTimeouts & timeouts,
     CompressionMethod compression_method,
     size_t max_parsing_threads,
-    const SelectQueryInfo &,
     const HTTPHeaderEntries & headers_,
     const URIParams & params,
     bool glob_url,
@@ -874,7 +897,86 @@ bool IStorageURLBase::parallelizeOutputAfterReading(ContextPtr context) const
     return FormatFactory::instance().checkParallelizeOutputAfterReading(format_name, context);
 }
 
-Pipe IStorageURLBase::read(
+class ReadFromURL : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromURL"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromURL(
+        Block sample_block,
+        std::shared_ptr<StorageURL> storage_,
+        std::vector<String> * uri_options_,
+        ReadFromFormatInfo info_,
+        const bool need_only_count_,
+        std::vector<std::pair<std::string, std::string>> read_uri_params_,
+        std::function<void(std::ostream &)> read_post_data_callback_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , storage(std::move(storage_))
+        , uri_options(uri_options_)
+        // , paths(std::move(paths_))
+        // , archive_info(std::move(archive_info_))
+        // , virtual_columns(std::move(virtual_columns_))
+        // , distributed_processing(distributed_processing_)
+        , info(std::move(info_))
+        , need_only_count(need_only_count_)
+        , read_uri_params(std::move(read_uri_params_))
+        , read_post_data_callback(std::move(read_post_data_callback_))
+        // , total_bytes_to_read(total_bytes_to_read_)
+        , context(std::move(context_))
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+    {
+    }
+
+private:
+    std::shared_ptr<StorageURL> storage;
+    std::vector<String> * uri_options;
+
+    // std::vector<std::string> paths;
+    // std::optional<StorageFile::ArchiveInfo> archive_info;
+
+    // NamesAndTypesList virtual_columns;
+    // const bool distributed_processing;
+
+    ReadFromFormatInfo info;
+    const bool need_only_count;
+    std::vector<std::pair<std::string, std::string>> read_uri_params;
+    std::function<void(std::ostream &)> read_post_data_callback;
+
+    // size_t total_bytes_to_read;
+
+    ContextPtr context;
+
+    size_t max_block_size;
+    size_t num_streams;
+
+    std::shared_ptr<StorageURLSource::IteratorWrapper> iterator_wrapper;
+    bool is_url_with_globs = false;
+    bool is_empty_glob = false;
+
+    // FieldVectorPtr keys;
+    // bool all_scan = false;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+void ReadFromURL::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void IStorageURLBase::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -884,16 +986,61 @@ Pipe IStorageURLBase::read(
     size_t num_streams)
 {
     auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size);
-
-    std::shared_ptr<StorageURLSource::IteratorWrapper> iterator_wrapper{nullptr};
-    bool is_url_with_globs = urlWithGlobs(uri);
-    size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements;
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
 
-    if (distributed_processing)
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && local_context->getSettingsRef().optimize_count_from_files;
+
+    auto read_post_data_callback = getReadPOSTDataCallback(
+        read_from_format_info.columns_description.getNamesOfPhysical(),
+        read_from_format_info.columns_description,
+        query_info,
+        local_context,
+        processed_stage,
+        max_block_size);
+
+    auto this_ptr = std::static_pointer_cast<StorageURL>(shared_from_this());
+
+    auto reading = std::make_unique<ReadFromURL>(
+        read_from_format_info.source_header,
+        std::move(this_ptr),
+        nullptr,
+        std::move(read_from_format_info),
+        need_only_count,
+        std::move(params),
+        std::move(read_post_data_callback),
+        local_context,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromURL::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (iterator_wrapper || is_empty_glob)
+        return;
+
+    if (uri_options)
+    {
+        iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>([&, done = false]() mutable
+        {
+            if (done)
+                return StorageURLSource::FailoverOptions{};
+            done = true;
+            return *uri_options;
+        });
+
+        return;
+    }
+
+    size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements;
+    is_url_with_globs = urlWithGlobs(storage->uri);
+
+    if (storage->distributed_processing)
     {
         iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>(
-            [callback = local_context->getReadTaskCallback(), max_addresses]()
+            [callback = context->getReadTaskCallback(), max_addresses]()
             {
                 String next_uri = callback();
                 if (next_uri.empty())
@@ -904,11 +1051,14 @@ Pipe IStorageURLBase::read(
     else if (is_url_with_globs)
     {
         /// Iterate through disclosed globs and make a source for each file
-        auto glob_iterator = std::make_shared<StorageURLSource::DisclosedGlobIterator>(uri, max_addresses, query_info.query, virtual_columns, local_context);
+        auto glob_iterator = std::make_shared<StorageURLSource::DisclosedGlobIterator>(storage->uri, max_addresses, predicate, storage->virtual_columns, context);
 
         /// check if we filtered out all the paths
         if (glob_iterator->size() == 0)
-            return Pipe(std::make_shared<NullSource>(read_from_format_info.source_header));
+        {
+            is_empty_glob = true;
+            return;
+        }
 
         iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>([glob_iterator, max_addresses]()
         {
@@ -928,53 +1078,70 @@ Pipe IStorageURLBase::read(
             if (done)
                 return StorageURLSource::FailoverOptions{};
             done = true;
-            return getFailoverOptions(uri, max_addresses);
+            return getFailoverOptions(storage->uri, max_addresses);
         });
         num_streams = 1;
     }
+}
 
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && local_context->getSettingsRef().optimize_count_from_files;
+void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    createIterator(nullptr);
+
+    if (is_empty_glob)
+    {
+        pipeline.init(Pipe(std::make_shared<NullSource>(info.source_header)));
+        return;
+    }
 
     Pipes pipes;
     pipes.reserve(num_streams);
 
-    const size_t max_threads = local_context->getSettingsRef().max_threads;
+    const size_t max_threads = context->getSettingsRef().max_threads;
     const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / num_streams);
 
     for (size_t i = 0; i < num_streams; ++i)
     {
         pipes.emplace_back(std::make_shared<StorageURLSource>(
-            read_from_format_info,
+            info,
             iterator_wrapper,
-            getReadMethod(),
-            getReadPOSTDataCallback(
-                read_from_format_info.columns_description.getNamesOfPhysical(),
-                read_from_format_info.columns_description,
-                query_info,
-                local_context,
-                processed_stage,
-                max_block_size),
-            format_name,
-            format_settings,
-            getName(),
-            local_context,
+            storage->getReadMethod(),
+            read_post_data_callback,
+            storage->format_name,
+            storage->format_settings,
+            storage->getName(),
+            context,
             max_block_size,
-            getHTTPTimeouts(local_context),
-            compression_method,
+            getHTTPTimeouts(context),
+            storage->compression_method,
             max_parsing_threads,
-            query_info,
-            headers,
-            params,
+            storage->headers,
+            read_uri_params,
             is_url_with_globs,
             need_only_count));
     }
 
-    return Pipe::unitePipes(std::move(pipes));
+    if (uri_options)
+        std::shuffle(uri_options->begin(), uri_options->end(), thread_local_rng);
+
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    size_t output_ports = pipe.numOutputPorts();
+    const bool parallelize_output = context->getSettingsRef().parallelize_output_from_storages;
+    if (parallelize_output && storage->parallelizeOutputAfterReading(context) && output_ports > 0 && output_ports < num_streams)
+        pipe.resize(num_streams);
+
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 
-Pipe StorageURLWithFailover::read(
+void StorageURLWithFailover::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -984,38 +1151,34 @@ Pipe StorageURLWithFailover::read(
     size_t num_streams)
 {
     auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size);
-
-    auto iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>([&, done = false]() mutable
-    {
-        if (done)
-            return StorageURLSource::FailoverOptions{};
-        done = true;
-        return uri_options;
-    });
-
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
 
-    const size_t max_threads = local_context->getSettingsRef().max_threads;
-    const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / num_streams);
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && local_context->getSettingsRef().optimize_count_from_files;
 
-    auto pipe = Pipe(std::make_shared<StorageURLSource>(
-        read_from_format_info,
-        iterator_wrapper,
-        getReadMethod(),
-        getReadPOSTDataCallback(read_from_format_info.columns_description.getNamesOfPhysical(), read_from_format_info.columns_description, query_info, local_context, processed_stage, max_block_size),
-        format_name,
-        format_settings,
-        getName(),
+    auto read_post_data_callback = getReadPOSTDataCallback(
+        read_from_format_info.columns_description.getNamesOfPhysical(),
+        read_from_format_info.columns_description,
+        query_info,
+        local_context,
+        processed_stage,
+        max_block_size);
+
+    auto this_ptr = std::static_pointer_cast<StorageURL>(shared_from_this());
+
+    auto reading = std::make_unique<ReadFromURL>(
+        read_from_format_info.source_header,
+        std::move(this_ptr),
+        &uri_options,
+        std::move(read_from_format_info),
+        need_only_count,
+        std::move(params),
+        std::move(read_post_data_callback),
         local_context,
         max_block_size,
-        getHTTPTimeouts(local_context),
-        compression_method,
-        max_parsing_threads,
-        query_info,
-        headers,
-        params));
-    std::shuffle(uri_options.begin(), uri_options.end(), thread_local_rng);
-    return pipe;
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
 }
 
 
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index 8d027025882..1b2fb97cb28 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -34,7 +34,8 @@ class PullingPipelineExecutor;
 class IStorageURLBase : public IStorage
 {
 public:
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
@@ -67,6 +68,8 @@ public:
         const ContextPtr & context);
 
 protected:
+    friend class ReadFromURL;
+
     IStorageURLBase(
         const String & uri_,
         ContextPtr context_,
@@ -137,6 +140,7 @@ public:
     {
     public:
         DisclosedGlobIterator(const String & uri_, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
+        DisclosedGlobIterator(const String & uri_, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
 
         String next();
         size_t size();
@@ -162,7 +166,6 @@ public:
         const ConnectionTimeouts & timeouts,
         CompressionMethod compression_method,
         size_t max_parsing_threads,
-        const SelectQueryInfo & query_info,
         const HTTPHeaderEntries & headers_ = {},
         const URIParams & params = {},
         bool glob_url = false,
@@ -317,7 +320,8 @@ public:
         ContextPtr context_,
         const String & compression_method_);
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp
index a569c50835c..a274b1ba4db 100644
--- a/src/Storages/StorageXDBC.cpp
+++ b/src/Storages/StorageXDBC.cpp
@@ -102,7 +102,8 @@ std::function<void(std::ostream &)> StorageXDBC::getReadPOSTDataCallback(
     return write_body_callback;
 }
 
-Pipe StorageXDBC::read(
+void StorageXDBC::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -114,7 +115,7 @@ Pipe StorageXDBC::read(
     storage_snapshot->check(column_names);
 
     bridge_helper->startBridgeSync();
-    return IStorageURLBase::read(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
+    IStorageURLBase::read(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
 }
 
 SinkToStoragePtr StorageXDBC::write(const ASTPtr & /* query */, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h
index 1c1651cb333..fe678785dc2 100644
--- a/src/Storages/StorageXDBC.h
+++ b/src/Storages/StorageXDBC.h
@@ -19,7 +19,8 @@ namespace DB
 class StorageXDBC : public IStorageURLBase
 {
 public:
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,

From f2dfe8bddabb05194d0c380df13e8ae836fc24fa Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 2 Jan 2024 16:42:17 +0100
Subject: [PATCH 049/105] Fix build

---
 src/Storages/StorageMaterializedView.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index 458e0c9ab6b..59f1d5eee1b 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -72,7 +72,7 @@ public:
 
     StoragePtr getTargetTable() const;
     StoragePtr tryGetTargetTable() const;
-    const StorageID & getTargetTableId() const { return target_table_id; }
+    StorageID getTargetTableId() const { return target_table_id; }
 
     /// Get the virtual column of the target table;
     NamesAndTypesList getVirtuals() const override;
@@ -119,7 +119,6 @@ private:
     std::tuple<ContextMutablePtr, std::shared_ptr<ASTInsertQuery>> prepareRefresh() const;
     StorageID exchangeTargetTable(StorageID fresh_table, ContextPtr refresh_context);
 
-    StorageID getTargetTableId() const;
     void setTargetTableId(StorageID id);
     void updateTargetTableId(std::optional<String> database_name, std::optional<String> table_name);
 };

From 8936c8376a05030b5559364cd65ef4db5ab7af87 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 17:14:16 +0000
Subject: [PATCH 050/105] Use predicate in getTaskIteratorExtension.

---
 src/Storages/HDFS/StorageHDFSCluster.cpp |   4 +-
 src/Storages/HDFS/StorageHDFSCluster.h   |   2 +-
 src/Storages/IStorageCluster.cpp         | 110 ++++++++++++++++++++---
 src/Storages/IStorageCluster.h           |   7 +-
 src/Storages/StorageAzureBlobCluster.cpp |   4 +-
 src/Storages/StorageAzureBlobCluster.h   |   2 +-
 src/Storages/StorageDistributed.cpp      |  61 ++++++++++++-
 src/Storages/StorageFileCluster.cpp      |   4 +-
 src/Storages/StorageFileCluster.h        |   2 +-
 src/Storages/StorageS3Cluster.cpp        |   4 +-
 src/Storages/StorageS3Cluster.h          |   2 +-
 src/Storages/StorageURL.cpp              |   6 +-
 src/Storages/StorageURLCluster.cpp       |   4 +-
 src/Storages/StorageURLCluster.h         |   2 +-
 14 files changed, 174 insertions(+), 40 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp
index bff22936e95..2e8129b9845 100644
--- a/src/Storages/HDFS/StorageHDFSCluster.cpp
+++ b/src/Storages/HDFS/StorageHDFSCluster.cpp
@@ -79,9 +79,9 @@ void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String
 }
 
 
-RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
+RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
 {
-    auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uri, query, virtual_columns, context);
+    auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uri, predicate, virtual_columns, context);
     auto callback = std::make_shared<std::function<String()>>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; });
     return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
 }
diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h
index 8ad4a83c5b9..7c4c41a573a 100644
--- a/src/Storages/HDFS/StorageHDFSCluster.h
+++ b/src/Storages/HDFS/StorageHDFSCluster.h
@@ -35,7 +35,7 @@ public:
 
     NamesAndTypesList getVirtuals() const override;
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
     bool supportsSubcolumns() const override { return true; }
 
diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp
index 1447dad1374..c59b74255b2 100644
--- a/src/Storages/IStorageCluster.cpp
+++ b/src/Storages/IStorageCluster.cpp
@@ -2,6 +2,9 @@
 
 #include "Common/Exception.h"
 #include "Core/QueryProcessingStage.h"
+#include "Processors/QueryPlan/SourceStepWithFilter.h"
+#include "Processors/Sources/NullSource.h"
+#include "QueryPipeline/QueryPipelineBuilder.h"
 #include <DataTypes/DataTypeString.h>
 #include <IO/ConnectionTimeouts.h>
 #include <Interpreters/Context.h>
@@ -38,9 +41,66 @@ IStorageCluster::IStorageCluster(
 {
 }
 
+class ReadFromCluster : public SourceStepWithFilter
+{
+public:
+    std::string getName() const override { return "ReadFromCluster"; }
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+    void applyFilters() override;
+
+    ReadFromCluster(
+        Block sample_block,
+        std::shared_ptr<IStorageCluster> storage_,
+        ASTPtr query_to_send_,
+        QueryProcessingStage::Enum processed_stage_,
+        ClusterPtr cluster_,
+        Poco::Logger * log_,
+        ContextPtr context_)
+        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
+        , storage(std::move(storage_))
+        , query_to_send(std::move(query_to_send_))
+        , processed_stage(processed_stage_)
+        , cluster(std::move(cluster_))
+        , log(log_)
+        , context(std::move(context_))
+    {
+    }
+
+private:
+    std::shared_ptr<IStorageCluster> storage;
+    ASTPtr query_to_send;
+    QueryProcessingStage::Enum processed_stage;
+    ClusterPtr cluster;
+    Poco::Logger * log;
+    ContextPtr context;
+
+    std::optional<RemoteQueryExecutor::Extension> extension;
+
+    void createExtension(const ActionsDAG::Node * predicate);
+    ContextPtr updateSettings(const Settings & settings);
+};
+
+void ReadFromCluster::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes, {}, context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createExtension(predicate);
+}
+
+void ReadFromCluster::createExtension(const ActionsDAG::Node * predicate)
+{
+    if (extension)
+        return;
+
+    extension = storage->getTaskIteratorExtension(predicate, context);
+}
 
 /// The code executes on initiator
-Pipe IStorageCluster::read(
+void IStorageCluster::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
@@ -49,10 +109,10 @@ Pipe IStorageCluster::read(
     size_t /*max_block_size*/,
     size_t /*num_streams*/)
 {
-    updateBeforeRead(context);
+    storage_snapshot->check(column_names);
 
+    updateBeforeRead(context);
     auto cluster = getCluster(context);
-    auto extension = getTaskIteratorExtension(query_info.query, context);
 
     /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*)
 
@@ -70,12 +130,6 @@ Pipe IStorageCluster::read(
         query_to_send = interpreter.getQueryInfo().query->clone();
     }
 
-    const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{};
-
-    Pipes pipes;
-
-    const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
-
     if (!structure_argument_was_provided)
         addColumnsStructureToQuery(query_to_send, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), context);
 
@@ -89,7 +143,29 @@ Pipe IStorageCluster::read(
                                       /* only_replace_in_join_= */true);
     visitor.visit(query_to_send);
 
-    auto new_context = updateSettings(context, context->getSettingsRef());
+    auto this_ptr = std::static_pointer_cast<IStorageCluster>(shared_from_this());
+
+    auto reading = std::make_unique<ReadFromCluster>(
+        sample_block,
+        std::move(this_ptr),
+        std::move(query_to_send),
+        processed_stage,
+        cluster,
+        log,
+        context);
+
+    query_plan.addStep(std::move(reading));
+}
+
+void ReadFromCluster::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    createExtension(nullptr);
+
+    const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{};
+    const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
+
+    Pipes pipes;
+    auto new_context = updateSettings(context->getSettingsRef());
     const auto & current_settings = new_context->getSettingsRef();
     auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
     for (const auto & shard_info : cluster->getShardsInfo())
@@ -100,7 +176,7 @@ Pipe IStorageCluster::read(
             auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
                 std::vector<IConnectionPool::Entry>{try_result},
                 queryToString(query_to_send),
-                sample_block,
+                getOutputStream().header,
                 new_context,
                 /*throttler=*/nullptr,
                 scalars,
@@ -113,8 +189,14 @@ Pipe IStorageCluster::read(
         }
     }
 
-    storage_snapshot->check(column_names);
-    return Pipe::unitePipes(std::move(pipes));
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(getOutputStream().header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
 }
 
 QueryProcessingStage::Enum IStorageCluster::getQueryProcessingStage(
@@ -129,7 +211,7 @@ QueryProcessingStage::Enum IStorageCluster::getQueryProcessingStage(
     return QueryProcessingStage::Enum::FetchColumns;
 }
 
-ContextPtr IStorageCluster::updateSettings(ContextPtr context, const Settings & settings)
+ContextPtr ReadFromCluster::updateSettings(const Settings & settings)
 {
     Settings new_settings = settings;
 
diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h
index b15ed37202a..b233f20103d 100644
--- a/src/Storages/IStorageCluster.h
+++ b/src/Storages/IStorageCluster.h
@@ -22,7 +22,8 @@ public:
         Poco::Logger * log_,
         bool structure_argument_was_provided_);
 
-    Pipe read(
+    void read(
+        QueryPlan & query_plan,
         const Names & column_names,
         const StorageSnapshotPtr & storage_snapshot,
         SelectQueryInfo & query_info,
@@ -33,7 +34,7 @@ public:
 
     ClusterPtr getCluster(ContextPtr context) const;
     /// Query is needed for pruning by virtual columns (_file, _path)
-    virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const = 0;
+    virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const = 0;
 
     QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override;
 
@@ -45,8 +46,6 @@ protected:
     virtual void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) = 0;
 
 private:
-    ContextPtr updateSettings(ContextPtr context, const Settings & settings);
-
     Poco::Logger * log;
     String cluster_name;
     bool structure_argument_was_provided;
diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp
index b8f95458379..a6372577fb0 100644
--- a/src/Storages/StorageAzureBlobCluster.cpp
+++ b/src/Storages/StorageAzureBlobCluster.cpp
@@ -69,11 +69,11 @@ void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const S
     TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
 }
 
-RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
+RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
 {
     auto iterator = std::make_shared<StorageAzureBlobSource::GlobIterator>(
         object_storage.get(), configuration.container, configuration.blob_path,
-        query, virtual_columns, context, nullptr);
+        predicate, virtual_columns, context, nullptr);
     auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String{ return iterator->next().relative_path; });
     return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
 }
diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h
index 2900243708c..2831b94f825 100644
--- a/src/Storages/StorageAzureBlobCluster.h
+++ b/src/Storages/StorageAzureBlobCluster.h
@@ -34,7 +34,7 @@ public:
 
     NamesAndTypesList getVirtuals() const override;
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
     bool supportsSubcolumns() const override { return true; }
 
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index a928a4daf63..c914388e55e 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -30,6 +30,7 @@
 #include <Common/randomSeed.h>
 #include <Common/formatReadable.h>
 #include <Common/CurrentMetrics.h>
+#include "Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h"
 
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
@@ -1068,15 +1069,67 @@ std::optional<QueryPipeline> StorageDistributed::distributedWriteBetweenDistribu
     return pipeline;
 }
 
+static ActionsDAGPtr getFilterFromQuery(const ASTPtr & ast, ContextPtr context)
+{
+    QueryPlan plan;
+    SelectQueryOptions options;
+    options.only_analyze = true;
+    if (context->getSettingsRef().allow_experimental_analyzer)
+    {
+        InterpreterSelectQueryAnalyzer interpreter(ast, context, options);
+        plan = std::move(interpreter).extractQueryPlan();
+    }
+    else
+    {
+        InterpreterSelectWithUnionQuery interpreter(ast, context, options);
+        interpreter.buildQueryPlan(plan);
+    }
+
+    plan.optimize(QueryPlanOptimizationSettings::fromContext(context));
+
+    std::stack<QueryPlan::Node *> nodes;
+    nodes.push(plan.getRootNode());
+
+    SourceStepWithFilter * source = nullptr;
+
+    while (!nodes.empty())
+    {
+        const auto * node = nodes.top();
+        nodes.pop();
+
+        if (auto * with_filter = dynamic_cast<SourceStepWithFilter *>(node->step.get()))
+        {
+            if (source)
+            {
+                WriteBufferFromOwnString buf;
+                plan.explainPlan(buf, {});
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    "Found multiple source steps for query\n{}\nPlan\n{}",
+                    queryToString(ast), buf.str());
+            }
+
+            source = with_filter;
+        }
+    }
+
+    if (!source)
+        return nullptr;
+
+    return ActionsDAG::buildFilterActionsDAG(source->getFilterNodes().nodes, {}, context);
+}
+
 
 std::optional<QueryPipeline> StorageDistributed::distributedWriteFromClusterStorage(const IStorageCluster & src_storage_cluster, const ASTInsertQuery & query, ContextPtr local_context) const
 {
     const auto & settings = local_context->getSettingsRef();
-    auto & select = query.select->as<ASTSelectWithUnionQuery &>();
+
+    auto filter = getFilterFromQuery(query.select, local_context);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter)
+        predicate = filter->getOutputs().at(0);
+
     /// Select query is needed for pruining on virtual columns
-    auto extension = src_storage_cluster.getTaskIteratorExtension(
-        select.list_of_selects->children.at(0)->as<ASTSelectQuery>()->clone(),
-        local_context);
+    auto extension = src_storage_cluster.getTaskIteratorExtension(predicate, local_context);
 
     auto dst_cluster = getCluster();
 
diff --git a/src/Storages/StorageFileCluster.cpp b/src/Storages/StorageFileCluster.cpp
index 782c36c9819..c12124f1e07 100644
--- a/src/Storages/StorageFileCluster.cpp
+++ b/src/Storages/StorageFileCluster.cpp
@@ -71,9 +71,9 @@ void StorageFileCluster::addColumnsStructureToQuery(ASTPtr & query, const String
     TableFunctionFileCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
 }
 
-RemoteQueryExecutor::Extension StorageFileCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
+RemoteQueryExecutor::Extension StorageFileCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
 {
-    auto iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, std::nullopt, query, virtual_columns, context);
+    auto iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, std::nullopt, predicate, virtual_columns, context);
     auto callback = std::make_shared<TaskIterator>([iter = std::move(iterator)]() mutable -> String { return iter->next(); });
     return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
 }
diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h
index e907fbad0de..a6e57c3bb4f 100644
--- a/src/Storages/StorageFileCluster.h
+++ b/src/Storages/StorageFileCluster.h
@@ -31,7 +31,7 @@ public:
 
     NamesAndTypesList getVirtuals() const override { return virtual_columns; }
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
     bool supportsSubcolumns() const override { return true; }
 
diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp
index 702b1f14ae7..e1738056e9d 100644
--- a/src/Storages/StorageS3Cluster.cpp
+++ b/src/Storages/StorageS3Cluster.cpp
@@ -78,10 +78,10 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context)
     s3_configuration.update(local_context);
 }
 
-RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
+RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
 {
     auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
-        *s3_configuration.client, s3_configuration.url, query, virtual_columns, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback());
+        *s3_configuration.client, s3_configuration.url, predicate, virtual_columns, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback());
 
     auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String
     {
diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h
index 81fb48d2398..c526f14834a 100644
--- a/src/Storages/StorageS3Cluster.h
+++ b/src/Storages/StorageS3Cluster.h
@@ -34,7 +34,7 @@ public:
 
     NamesAndTypesList getVirtuals() const override;
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
     bool supportsSubcolumns() const override { return true; }
 
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 3f88966e3d3..ac17f880738 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -906,7 +906,7 @@ public:
 
     ReadFromURL(
         Block sample_block,
-        std::shared_ptr<StorageURL> storage_,
+        std::shared_ptr<IStorageURLBase> storage_,
         std::vector<String> * uri_options_,
         ReadFromFormatInfo info_,
         const bool need_only_count_,
@@ -934,7 +934,7 @@ public:
     }
 
 private:
-    std::shared_ptr<StorageURL> storage;
+    std::shared_ptr<IStorageURLBase> storage;
     std::vector<String> * uri_options;
 
     // std::vector<std::string> paths;
@@ -999,7 +999,7 @@ void IStorageURLBase::read(
         processed_stage,
         max_block_size);
 
-    auto this_ptr = std::static_pointer_cast<StorageURL>(shared_from_this());
+    auto this_ptr = std::static_pointer_cast<IStorageURLBase>(shared_from_this());
 
     auto reading = std::make_unique<ReadFromURL>(
         read_from_format_info.source_header,
diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp
index c052e781877..a0b5fcd6f28 100644
--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@@ -81,9 +81,9 @@ void StorageURLCluster::addColumnsStructureToQuery(ASTPtr & query, const String
     TableFunctionURLCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
 }
 
-RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const
+RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
 {
-    auto iterator = std::make_shared<StorageURLSource::DisclosedGlobIterator>(uri, context->getSettingsRef().glob_expansion_max_elements, query, virtual_columns, context);
+    auto iterator = std::make_shared<StorageURLSource::DisclosedGlobIterator>(uri, context->getSettingsRef().glob_expansion_max_elements, predicate, virtual_columns, context);
     auto callback = std::make_shared<TaskIterator>([iter = std::move(iterator)]() mutable -> String { return iter->next(); });
     return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
 }
diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h
index ddf7e6f0790..07978040029 100644
--- a/src/Storages/StorageURLCluster.h
+++ b/src/Storages/StorageURLCluster.h
@@ -34,7 +34,7 @@ public:
 
     NamesAndTypesList getVirtuals() const override { return virtual_columns; }
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override;
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
     bool supportsSubcolumns() const override { return true; }
 

From c808b03e55882beaff7e9e58208546af9cd34760 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 17:27:33 +0000
Subject: [PATCH 051/105] Remove unneeded code

---
 src/Storages/HDFS/StorageHDFS.cpp | 33 -------------
 src/Storages/HDFS/StorageHDFS.h   |  1 -
 src/Storages/StorageAzureBlob.cpp | 64 +-----------------------
 src/Storages/StorageAzureBlob.h   | 13 -----
 src/Storages/StorageFile.cpp      | 18 -------
 src/Storages/StorageFile.h        |  8 ---
 src/Storages/StorageS3.cpp        | 82 +------------------------------
 src/Storages/StorageS3.h          | 10 ----
 src/Storages/StorageURL.cpp       | 22 ---------
 src/Storages/StorageURL.h         |  1 -
 10 files changed, 2 insertions(+), 250 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index c7cbaa1e561..430ecc7a585 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -440,34 +440,6 @@ public:
         uris_iter = uris.begin();
     }
 
-    Impl(const String & uri, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    {
-        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
-        uris = getPathsList(path_from_uri, uri_without_path, context);
-        ASTPtr filter_ast;
-        if (!uris.empty())
-             filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, uris[0].path, context);
-
-        if (filter_ast)
-        {
-            std::vector<String> paths;
-            paths.reserve(uris.size());
-            for (const auto & path_with_info : uris)
-                paths.push_back(path_with_info.path);
-
-            VirtualColumnUtils::filterByPathOrFile(uris, paths, query, virtual_columns, context, filter_ast);
-        }
-        auto file_progress_callback = context->getFileProgressCallback();
-
-        for (auto & elem : uris)
-        {
-            elem.path = uri_without_path + elem.path;
-            if (file_progress_callback && elem.info)
-                file_progress_callback(FileProgress(0, elem.info->size));
-        }
-        uris_iter = uris.begin();
-    }
-
     StorageHDFS::PathWithInfo next()
     {
         std::lock_guard lock(mutex);
@@ -549,9 +521,6 @@ private:
     std::function<void(FileProgress)> file_progress_callback;
 };
 
-HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(uri, query, virtual_columns, context)) {}
-
 HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(uri, predicate, virtual_columns, context)) {}
 
@@ -577,7 +546,6 @@ HDFSSource::HDFSSource(
     UInt64 max_block_size_,
     std::shared_ptr<IteratorWrapper> file_iterator_,
     bool need_only_count_)
-    //const SelectQueryInfo & query_info_)
     : ISource(info.source_header, false)
     , WithContext(context_)
     , storage(std::move(storage_))
@@ -588,7 +556,6 @@ HDFSSource::HDFSSource(
     , file_iterator(file_iterator_)
     , columns_description(info.columns_description)
     , need_only_count(need_only_count_)
-    //, query_info(query_info_)
 {
     initialize();
 }
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index cee1b674eb7..9d9a857bf4e 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -115,7 +115,6 @@ public:
     class DisclosedGlobIterator
     {
         public:
-            DisclosedGlobIterator(const String & uri_, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
             DisclosedGlobIterator(const String & uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
             StorageHDFS::PathWithInfo next();
         private:
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index defff830411..294a65c067f 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -911,53 +911,6 @@ bool StorageAzureBlob::parallelizeOutputAfterReading(ContextPtr context) const
     return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context);
 }
 
-StorageAzureBlobSource::GlobIterator::GlobIterator(
-    AzureObjectStorage * object_storage_,
-    const std::string & container_,
-    String blob_path_with_globs_,
-    ASTPtr query_,
-    const NamesAndTypesList & virtual_columns_,
-    ContextPtr context_,
-    RelativePathsWithMetadata * outer_blobs_,
-    std::function<void(FileProgress)> file_progress_callback_)
-    : IIterator(context_)
-    , object_storage(object_storage_)
-    , container(container_)
-    , blob_path_with_globs(blob_path_with_globs_)
-    , query(query_)
-    , virtual_columns(virtual_columns_)
-    , outer_blobs(outer_blobs_)
-    , file_progress_callback(file_progress_callback_)
-{
-
-    const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{"));
-
-    /// We don't have to list bucket, because there is no asterisks.
-    if (key_prefix.size() == blob_path_with_globs.size())
-    {
-        auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs);
-        blobs_with_metadata.emplace_back(
-            blob_path_with_globs,
-            object_metadata);
-        if (outer_blobs)
-            outer_blobs->emplace_back(blobs_with_metadata.back());
-        if (file_progress_callback)
-            file_progress_callback(FileProgress(0, object_metadata.size_bytes));
-        is_finished = true;
-        return;
-    }
-
-    object_storage_iterator = object_storage->iterate(key_prefix);
-
-    matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(blob_path_with_globs));
-
-    if (!matcher->ok())
-        throw Exception(
-            ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error());
-
-    recursive = blob_path_with_globs == "/**" ? true : false;
-}
-
 StorageAzureBlobSource::GlobIterator::GlobIterator(
     AzureObjectStorage * object_storage_,
     const std::string & container_,
@@ -1004,7 +957,6 @@ StorageAzureBlobSource::GlobIterator::GlobIterator(
     recursive = blob_path_with_globs == "/**" ? true : false;
 
     filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-    is_initialized = true;
 }
 
 RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
@@ -1044,22 +996,8 @@ RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
         }
 
         index = 0;
-        if (!is_initialized)
-        {
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(container) / new_batch.front().relative_path, getContext());
-            is_initialized = true;
-        }
 
-        if (filter_ast)
-        {
-            std::vector<String> paths;
-            paths.reserve(new_batch.size());
-            for (auto & path_with_metadata : new_batch)
-                paths.push_back(fs::path(container) / path_with_metadata.relative_path);
-
-            VirtualColumnUtils::filterByPathOrFile(new_batch, paths, query, virtual_columns, getContext(), filter_ast);
-        }
-        else if (filter_dag)
+        if (filter_dag)
         {
             std::vector<String> paths;
             paths.reserve(new_batch.size());
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index 30b91b7f85a..cc908fa3215 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -155,16 +155,6 @@ public:
     class GlobIterator : public IIterator
     {
     public:
-        GlobIterator(
-            AzureObjectStorage * object_storage_,
-            const std::string & container_,
-            String blob_path_with_globs_,
-            ASTPtr query_,
-            const NamesAndTypesList & virtual_columns_,
-            ContextPtr context_,
-            RelativePathsWithMetadata * outer_blobs_,
-            std::function<void(FileProgress)> file_progress_callback_ = {});
-
         GlobIterator(
             AzureObjectStorage * object_storage_,
             const std::string & container_,
@@ -182,8 +172,6 @@ public:
         AzureObjectStorage * object_storage;
         std::string container;
         String blob_path_with_globs;
-        ASTPtr query;
-        ASTPtr filter_ast;
         ActionsDAGPtr filter_dag;
         NamesAndTypesList virtual_columns;
 
@@ -198,7 +186,6 @@ public:
 
         void createFilterAST(const String & any_key);
         bool is_finished = false;
-        bool is_initialized = false;
         std::mutex next_mutex;
 
         std::function<void(FileProgress)> file_progress_callback;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 18acbfc7153..47d29c3c501 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -932,24 +932,6 @@ static std::chrono::seconds getLockTimeout(ContextPtr context)
 
 using StorageFilePtr = std::shared_ptr<StorageFile>;
 
-
-StorageFileSource::FilesIterator::FilesIterator(
-    const Strings & files_,
-    std::optional<StorageFile::ArchiveInfo> archive_info_,
-    ASTPtr query,
-    const NamesAndTypesList & virtual_columns,
-    ContextPtr context_,
-    bool distributed_processing_)
-    : files(files_), archive_info(std::move(archive_info_)), distributed_processing(distributed_processing_), context(context_)
-{
-    ASTPtr filter_ast;
-    if (!distributed_processing && !archive_info && !files.empty() && !files[0].empty())
-        filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, files[0], context_);
-
-    if (filter_ast)
-        VirtualColumnUtils::filterByPathOrFile(files, files, query, virtual_columns, context_, filter_ast);
-}
-
 StorageFileSource::FilesIterator::FilesIterator(
     const Strings & files_,
     std::optional<StorageFile::ArchiveInfo> archive_info_,
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index ecb9e01b862..4f8cbfd4795 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -193,14 +193,6 @@ public:
     class FilesIterator
     {
     public:
-        explicit FilesIterator(
-            const Strings & files_,
-            std::optional<StorageFile::ArchiveInfo> archive_info_,
-            ASTPtr query,
-            const NamesAndTypesList & virtual_columns,
-            ContextPtr context_,
-            bool distributed_processing_ = false);
-
         explicit FilesIterator(
             const Strings & files_,
             std::optional<StorageFile::ArchiveInfo> archive_info_,
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 375a367bfab..88ea57e21cc 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -260,55 +260,6 @@ using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
 class StorageS3Source::DisclosedGlobIterator::Impl : WithContext
 {
 public:
-    Impl(
-        const S3::Client & client_,
-        const S3::URI & globbed_uri_,
-        ASTPtr & query_,
-        const NamesAndTypesList & virtual_columns_,
-        ContextPtr context_,
-        KeysWithInfo * read_keys_,
-        const S3Settings::RequestSettings & request_settings_,
-        std::function<void(FileProgress)> file_progress_callback_)
-        : WithContext(context_)
-        , client(client_.clone())
-        , globbed_uri(globbed_uri_)
-        , query(query_)
-        , virtual_columns(virtual_columns_)
-        , read_keys(read_keys_)
-        , request_settings(request_settings_)
-        , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
-        , list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
-        , file_progress_callback(file_progress_callback_)
-    {
-        if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
-            throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name");
-
-        const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{"));
-
-        /// We don't have to list bucket, because there is no asterisks.
-        if (key_prefix.size() == globbed_uri.key.size())
-        {
-            buffer.emplace_back(std::make_shared<KeyWithInfo>(globbed_uri.key, std::nullopt));
-            buffer_iter = buffer.begin();
-            is_finished = true;
-            return;
-        }
-
-        request.SetBucket(globbed_uri.bucket);
-        request.SetPrefix(key_prefix);
-        request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
-
-        outcome_future = listObjectsAsync();
-
-        matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
-        if (!matcher->ok())
-            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
-                "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
-
-        recursive = globbed_uri.key == "/**" ? true : false;
-        fillInternalBufferAssumeLocked();
-    }
-
     Impl(
         const S3::Client & client_,
         const S3::URI & globbed_uri_,
@@ -357,7 +308,6 @@ public:
         fillInternalBufferAssumeLocked();
 
         filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-        is_initialized = true;
     }
 
     KeyWithInfoPtr next()
@@ -475,22 +425,7 @@ private:
             return;
         }
 
-        if (!is_initialized)
-        {
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, fs::path(globbed_uri.bucket) / temp_buffer.front()->key, getContext());
-            is_initialized = true;
-        }
-
-        if (filter_ast)
-        {
-            std::vector<String> paths;
-            paths.reserve(temp_buffer.size());
-            for (const auto & key_with_info : temp_buffer)
-                paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key);
-
-            VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, query, virtual_columns, getContext(), filter_ast);
-        }
-        else if (filter_dag)
+        if (filter_dag)
         {
             std::vector<String> paths;
             paths.reserve(temp_buffer.size());
@@ -539,8 +474,6 @@ private:
     S3::URI globbed_uri;
     ASTPtr query;
     NamesAndTypesList virtual_columns;
-    bool is_initialized{false};
-    ASTPtr filter_ast;
     ActionsDAGPtr filter_dag;
     std::unique_ptr<re2::RE2> matcher;
     bool recursive{false};
@@ -556,19 +489,6 @@ private:
     std::function<void(FileProgress)> file_progress_callback;
 };
 
-StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
-    const S3::Client & client_,
-    const S3::URI & globbed_uri_,
-    ASTPtr query,
-    const NamesAndTypesList & virtual_columns_,
-    ContextPtr context,
-    KeysWithInfo * read_keys_,
-    const S3Settings::RequestSettings & request_settings_,
-    std::function<void(FileProgress)> file_progress_callback_)
-    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, query, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
-{
-}
-
 StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
     const S3::Client & client_,
     const S3::URI & globbed_uri_,
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index dd7e0edb2d9..f63bf3a8e90 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -75,16 +75,6 @@ public:
     class DisclosedGlobIterator : public IIterator
     {
     public:
-        DisclosedGlobIterator(
-            const S3::Client & client_,
-            const S3::URI & globbed_uri_,
-            ASTPtr query,
-            const NamesAndTypesList & virtual_columns,
-            ContextPtr context,
-            KeysWithInfo * read_keys_ = nullptr,
-            const S3Settings::RequestSettings & request_settings_ = {},
-            std::function<void(FileProgress)> progress_callback_ = {});
-
         DisclosedGlobIterator(
             const S3::Client & client_,
             const S3::URI & globbed_uri_,
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index ac17f880738..6ed535a0317 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -184,25 +184,6 @@ namespace
 class StorageURLSource::DisclosedGlobIterator::Impl
 {
 public:
-    Impl(const String & uri_, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    {
-        uris = parseRemoteDescription(uri_, 0, uri_.size(), ',', max_addresses);
-
-        ASTPtr filter_ast;
-        if (!uris.empty())
-            filter_ast = VirtualColumnUtils::createPathAndFileFilterAst(query, virtual_columns, Poco::URI(uris[0]).getPath(), context);
-
-        if (filter_ast)
-        {
-            std::vector<String> paths;
-            paths.reserve(uris.size());
-            for (const auto & uri : uris)
-                paths.push_back(Poco::URI(uri).getPath());
-
-            VirtualColumnUtils::filterByPathOrFile(uris, paths, query, virtual_columns, context, filter_ast);
-        }
-    }
-
     Impl(const String & uri_, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     {
         uris = parseRemoteDescription(uri_, 0, uri_.size(), ',', max_addresses);
@@ -241,9 +222,6 @@ private:
     std::atomic_size_t index = 0;
 };
 
-StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    : pimpl(std::make_shared<StorageURLSource::DisclosedGlobIterator::Impl>(uri, max_addresses, query, virtual_columns, context)) {}
-
 StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     : pimpl(std::make_shared<StorageURLSource::DisclosedGlobIterator::Impl>(uri, max_addresses, predicate, virtual_columns, context)) {}
 
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index 1b2fb97cb28..c9e6f6311bf 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -139,7 +139,6 @@ public:
     class DisclosedGlobIterator
     {
     public:
-        DisclosedGlobIterator(const String & uri_, size_t max_addresses, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
         DisclosedGlobIterator(const String & uri_, size_t max_addresses, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
 
         String next();

From 1b20ce51624f996fc3995c5c511ecce2e6de872a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 17:50:06 +0000
Subject: [PATCH 052/105] Cleanup

---
 src/Storages/HDFS/StorageHDFS.cpp       |  2 +-
 src/Storages/IStorageCluster.cpp        | 16 ++++----
 src/Storages/S3Queue/StorageS3Queue.cpp | 12 +++---
 src/Storages/StorageAzureBlob.cpp       |  7 ++--
 src/Storages/StorageDistributed.cpp     |  2 +-
 src/Storages/StorageFile.cpp            | 50 +++++++------------------
 src/Storages/StorageS3.cpp              |  6 +--
 src/Storages/StorageURL.cpp             |  4 +-
 8 files changed, 38 insertions(+), 61 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 430ecc7a585..c1c0f7d76bd 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -1,4 +1,3 @@
-#include "Processors/Sources/NullSource.h"
 #include "config.h"
 
 #if USE_HDFS
@@ -16,6 +15,7 @@
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 #include <Processors/Transforms/ExtractColumnsTransform.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/SourceStepWithFilter.h>
 
diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp
index c59b74255b2..6f42d8f855c 100644
--- a/src/Storages/IStorageCluster.cpp
+++ b/src/Storages/IStorageCluster.cpp
@@ -1,10 +1,7 @@
-#include "Storages/IStorageCluster.h"
+#include <Storages/IStorageCluster.h>
 
-#include "Common/Exception.h"
-#include "Core/QueryProcessingStage.h"
-#include "Processors/QueryPlan/SourceStepWithFilter.h"
-#include "Processors/Sources/NullSource.h"
-#include "QueryPipeline/QueryPipelineBuilder.h"
+#include <Common/Exception.h>
+#include <Core/QueryProcessingStage.h>
 #include <DataTypes/DataTypeString.h>
 #include <IO/ConnectionTimeouts.h>
 #include <Interpreters/Context.h>
@@ -14,11 +11,14 @@
 #include <Interpreters/AddDefaultDatabaseVisitor.h>
 #include <Interpreters/TranslateQualifiedNamesVisitor.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
+#include <Parsers/queryToString.h>
+#include <Processors/Sources/NullSource.h>
+#include <Processors/Sources/RemoteSource.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 #include <QueryPipeline/narrowPipe.h>
 #include <QueryPipeline/Pipe.h>
-#include <Processors/Sources/RemoteSource.h>
 #include <QueryPipeline/RemoteQueryExecutor.h>
-#include <Parsers/queryToString.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/IStorage.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/StorageDictionary.h>
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 6d078e1aa1b..bc33e8cf2a9 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -1,7 +1,3 @@
-#include "Processors/QueryPlan/QueryPlan.h"
-#include "Processors/QueryPlan/SourceStepWithFilter.h"
-#include "Processors/Sources/NullSource.h"
-#include "QueryPipeline/QueryPipelineBuilder.h"
 #include "config.h"
 
 #if USE_AWS_S3
@@ -10,11 +6,14 @@
 #include <IO/CompressionMethod.h>
 #include <Formats/FormatFactory.h>
 #include <Interpreters/InterpreterInsertQuery.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTInsertQuery.h>
 #include <Processors/Executors/CompletedPipelineExecutor.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Processors/ISource.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTInsertQuery.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
+#include <Processors/Sources/NullSource.h>
 #include <Storages/S3Queue/S3QueueTableMetadata.h>
 #include <Storages/S3Queue/StorageS3Queue.h>
 #include <Storages/S3Queue/S3QueueFilesMetadata.h>
@@ -24,6 +23,7 @@
 #include <Storages/StorageSnapshot.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Storages/prepareReadingFromFormat.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <filesystem>
 
 
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index 294a65c067f..4f63b144f43 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -1,8 +1,4 @@
 #include <Storages/StorageAzureBlob.h>
-#include "Processors/QueryPlan/QueryPlan.h"
-#include "Processors/QueryPlan/SourceStepWithFilter.h"
-#include "Processors/Sources/NullSource.h"
-
 
 #if USE_AZURE_BLOB_STORAGE
 #include <Formats/FormatFactory.h>
@@ -24,6 +20,9 @@
 #include <Processors/Transforms/ExtractColumnsTransform.h>
 #include <Processors/Formats/IOutputFormat.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/Sources/NullSource.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageSnapshot.h>
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index c914388e55e..7ef2ff08827 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -30,7 +30,6 @@
 #include <Common/randomSeed.h>
 #include <Common/formatReadable.h>
 #include <Common/CurrentMetrics.h>
-#include "Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h"
 
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
@@ -92,6 +91,7 @@
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
+#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 #include <Processors/Sources/NullSource.h>
 #include <Processors/Sources/RemoteSource.h>
 #include <Processors/Sinks/EmptySink.h>
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 47d29c3c501..60e06291200 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -38,6 +38,8 @@
 #include <Processors/Sources/NullSource.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <Common/escapeForFileName.h>
 #include <Common/typeid_cast.h>
@@ -45,8 +47,6 @@
 #include <Common/filesystemHelpers.h>
 #include <Common/logger_useful.h>
 #include <Common/ProfileEvents.h>
-#include <Processors/QueryPlan/QueryPlan.h>
-#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@@ -1330,25 +1330,15 @@ public:
     ReadFromFile(
         Block sample_block,
         std::shared_ptr<StorageFile> storage_,
-        std::vector<std::string> paths_,
-        std::optional<StorageFile::ArchiveInfo> archive_info_,
-        NamesAndTypesList virtual_columns_,
-        bool distributed_processing_,
         ReadFromFormatInfo info_,
         const bool need_only_count_,
-        size_t total_bytes_to_read_,
         ContextPtr context_,
         size_t max_block_size_,
         size_t num_streams_)
         : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
         , storage(std::move(storage_))
-        , paths(std::move(paths_))
-        , archive_info(std::move(archive_info_))
-        , virtual_columns(std::move(virtual_columns_))
-        , distributed_processing(distributed_processing_)
         , info(std::move(info_))
         , need_only_count(need_only_count_)
-        , total_bytes_to_read(total_bytes_to_read_)
         , context(std::move(context_))
         , max_block_size(max_block_size_)
         , max_num_streams(num_streams_)
@@ -1357,28 +1347,15 @@ public:
 
 private:
     std::shared_ptr<StorageFile> storage;
-
-    std::vector<std::string> paths;
-    std::optional<StorageFile::ArchiveInfo> archive_info;
-
-    NamesAndTypesList virtual_columns;
-    const bool distributed_processing;
-
     ReadFromFormatInfo info;
     const bool need_only_count;
 
-    size_t total_bytes_to_read;
-
     ContextPtr context;
-
     size_t max_block_size;
     const size_t max_num_streams;
 
     std::shared_ptr<StorageFileSource::FilesIterator> files_iterator;
 
-    // FieldVectorPtr keys;
-    // bool all_scan = false;
-
     void createIterator(const ActionsDAG::Node * predicate);
 };
 
@@ -1435,13 +1412,8 @@ void StorageFile::read(
     auto reading = std::make_unique<ReadFromFile>(
         read_from_format_info.source_header,
         std::move(this_ptr),
-        paths,
-        archive_info,
-        virtual_columns,
-        distributed_processing,
         std::move(read_from_format_info),
         need_only_count,
-        total_bytes_to_read,
         context,
         max_block_size,
         num_streams);
@@ -1454,7 +1426,13 @@ void ReadFromFile::createIterator(const ActionsDAG::Node * predicate)
     if (files_iterator)
         return;
 
-    files_iterator = std::make_shared<StorageFileSource::FilesIterator>(paths, archive_info, predicate, virtual_columns, context, distributed_processing);
+    files_iterator = std::make_shared<StorageFileSource::FilesIterator>(
+        storage->paths,
+        storage->archive_info,
+        predicate,
+        storage->virtual_columns,
+        context,
+        storage->distributed_processing);
 }
 
 void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
@@ -1464,10 +1442,10 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
     size_t num_streams = max_num_streams;
 
     size_t files_to_read = 0;
-    if (archive_info)
-        files_to_read = archive_info->paths_to_archives.size();
+    if (storage->archive_info)
+        files_to_read = storage->archive_info->paths_to_archives.size();
     else
-        files_to_read = paths.size();
+        files_to_read = storage->paths.size();
 
     if (max_num_streams > files_to_read)
         num_streams = files_to_read;
@@ -1478,8 +1456,8 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
     /// Set total number of bytes to process. For progress bar.
     auto progress_callback = context->getFileProgressCallback();
 
-    if (progress_callback && !archive_info)
-        progress_callback(FileProgress(0, total_bytes_to_read));
+    if (progress_callback && !storage->archive_info)
+        progress_callback(FileProgress(0, storage->total_bytes_to_read));
 
     for (size_t i = 0; i < num_streams; ++i)
     {
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 88ea57e21cc..3e1af2df4b0 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1,7 +1,4 @@
-#include "Processors/Sources/NullSource.h"
 #include "config.h"
-#include <Common/ProfileEvents.h>
-#include "Parsers/ASTCreateQuery.h"
 
 #if USE_AWS_S3
 
@@ -17,6 +14,7 @@
 
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTInsertQuery.h>
+#include <Parsers/ASTCreateQuery.h>
 
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageS3.h>
@@ -43,6 +41,7 @@
 #include <Processors/Formats/IOutputFormat.h>
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 
@@ -58,6 +57,7 @@
 #include <Common/parseGlobs.h>
 #include <Common/quoteString.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/ProfileEvents.h>
 
 #include <Processors/ISource.h>
 #include <Processors/Sinks/SinkToStorage.h>
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 6ed535a0317..3389ed1db86 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -26,6 +26,8 @@
 #include <Processors/Transforms/AddingDefaultsTransform.h>
 #include <Processors/Transforms/ExtractColumnsTransform.h>
 #include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 
 #include <Common/ThreadStatus.h>
 #include <Common/parseRemoteDescription.h>
@@ -34,8 +36,6 @@
 #include <Common/ProfileEvents.h>
 #include <Common/thread_local_rng.h>
 #include <Common/logger_useful.h>
-#include "Processors/QueryPlan/QueryPlan.h"
-#include "Processors/QueryPlan/SourceStepWithFilter.h"
 #include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/HTTPHeaderEntries.h>
 

From 4f99a8bc1f7f8a3d5e3ad9188ae649caefec8ed5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 17:54:20 +0000
Subject: [PATCH 053/105] Remove more unused code.

---
 src/Storages/VirtualColumnUtils.cpp | 38 -----------------------------
 src/Storages/VirtualColumnUtils.h   | 19 ---------------
 2 files changed, 57 deletions(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index b63b4e7cca7..e54528bbf01 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -424,44 +424,6 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
     return block.getByName("_idx").column;
 }
 
-ASTPtr createPathAndFileFilterAst(const ASTPtr & query, const NamesAndTypesList & virtual_columns, const String & path_example, const ContextPtr & context)
-{
-    if (!query || virtual_columns.empty())
-        return {};
-
-    Block block;
-    for (const auto & column : virtual_columns)
-    {
-        if (column.name == "_file" || column.name == "_path")
-            block.insert({column.type->createColumn(), column.type, column.name});
-    }
-    /// Create a block with one row to construct filter
-    /// Append "idx" column as the filter result
-    block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
-    addPathAndFileToVirtualColumns(block, path_example, 0);
-    ASTPtr filter_ast;
-    prepareFilterBlockWithQuery(query, context, block, filter_ast);
-    return filter_ast;
-}
-
-ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context, ASTPtr filter_ast)
-{
-    Block block;
-    for (const auto & column : virtual_columns)
-    {
-        if (column.name == "_file" || column.name == "_path")
-            block.insert({column.type->createColumn(), column.type, column.name});
-    }
-    block.insert({ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "_idx"});
-
-    for (size_t i = 0; i != paths.size(); ++i)
-        addPathAndFileToVirtualColumns(block, paths[i], i);
-
-    filterBlockWithQuery(query, block, context, filter_ast);
-
-    return block.getByName("_idx").column;
-}
-
 void addRequestedPathFileAndSizeVirtualsToChunk(
     Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional<size_t> size, const String * filename)
 {
diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h
index 6e1af0995cc..3c07e33a177 100644
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@@ -58,25 +58,6 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
 
 NamesAndTypesList getPathFileAndSizeVirtualsForStorage(NamesAndTypesList storage_columns);
 
-ASTPtr createPathAndFileFilterAst(const ASTPtr & query, const NamesAndTypesList & virtual_columns, const String & path_example, const ContextPtr & context);
-
-ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context, ASTPtr filter_ast);
-
-template <typename T>
-void filterByPathOrFile(std::vector<T> & sources, const std::vector<String> & paths, const ASTPtr & query, const NamesAndTypesList & virtual_columns, const ContextPtr & context, ASTPtr filter_ast)
-{
-    auto indexes_column = getFilterByPathAndFileIndexes(paths, query, virtual_columns, context, filter_ast);
-    const auto & indexes = typeid_cast<const ColumnUInt64 &>(*indexes_column).getData();
-    if (indexes.size() == sources.size())
-        return;
-
-    std::vector<T> filtered_sources;
-    filtered_sources.reserve(indexes.size());
-    for (auto index : indexes)
-        filtered_sources.emplace_back(std::move(sources[index]));
-    sources = std::move(filtered_sources);
-}
-
 ActionsDAGPtr createPathAndFileFilterDAG(const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns);
 
 ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const ActionsDAGPtr & dag, const NamesAndTypesList & virtual_columns, const ContextPtr & context);

From 9c25cb6692cfdcf410c9d735a77e9c2eb01fff78 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 2 Jan 2024 18:08:04 +0000
Subject: [PATCH 054/105] Cleanup

---
 src/Storages/HDFS/StorageHDFS.cpp | 25 ++++---------------------
 src/Storages/HDFS/StorageHDFS.h   |  3 +--
 src/Storages/StorageURL.cpp       | 16 ----------------
 3 files changed, 5 insertions(+), 39 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index c1c0f7d76bd..974b2bb68cf 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -411,7 +411,6 @@ ColumnsDescription StorageHDFS::getTableStructureFromData(
 class HDFSSource::DisclosedGlobIterator::Impl
 {
 public:
-
     Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
     {
         const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
@@ -854,10 +853,6 @@ public:
 
     ReadFromHDFS(
         Block sample_block,
-        std::vector<String> uris_,
-        bool distributed_processing_,
-        NamesAndTypesList virtual_columns_,
-        bool is_path_with_globs_,
         ReadFromFormatInfo info_,
         bool need_only_count_,
         std::shared_ptr<StorageHDFS> storage_,
@@ -865,10 +860,6 @@ public:
         size_t max_block_size_,
         size_t num_streams_)
         : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
-        , uris(std::move(uris_))
-        , distributed_processing(distributed_processing_)
-        , virtual_columns(std::move(virtual_columns_))
-        , is_path_with_globs(is_path_with_globs_)
         , info(std::move(info_))
         , need_only_count(need_only_count_)
         , storage(std::move(storage_))
@@ -879,10 +870,6 @@ public:
     }
 
 private:
-    std::vector<String> uris;
-    const bool distributed_processing;
-    NamesAndTypesList virtual_columns;
-    bool is_path_with_globs;
     ReadFromFormatInfo info;
     const bool need_only_count;
     std::shared_ptr<StorageHDFS> storage;
@@ -924,10 +911,6 @@ void StorageHDFS::read(
 
     auto reading = std::make_unique<ReadFromHDFS>(
         read_from_format_info.source_header,
-        uris,
-        distributed_processing,
-        virtual_columns,
-        is_path_with_globs,
         std::move(read_from_format_info),
         need_only_count,
         std::move(this_ptr),
@@ -943,17 +926,17 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
     if (iterator_wrapper)
         return;
 
-    if (distributed_processing)
+    if (storage->distributed_processing)
     {
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>(
             [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo {
                 return StorageHDFS::PathWithInfo{callback(), std::nullopt};
         });
     }
-    else if (is_path_with_globs)
+    else if (storage->is_path_with_globs)
     {
         /// Iterate through disclosed globs and make a source for each file
-        auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uris[0], predicate, virtual_columns, context);
+        auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(storage->uris[0], predicate, storage->virtual_columns, context);
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([glob_iterator]()
         {
             return glob_iterator->next();
@@ -961,7 +944,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
     }
     else
     {
-        auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(uris, predicate, virtual_columns, context);
+        auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(storage->uris, predicate, storage->virtual_columns, context);
         iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([uris_iterator]()
         {
             return uris_iterator->next();
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index 9d9a857bf4e..f1f0019d3e0 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -94,6 +94,7 @@ public:
 
 protected:
     friend class HDFSSource;
+    friend class ReadFromHDFS;
 
 private:
     std::vector<String> uris;
@@ -144,7 +145,6 @@ public:
         UInt64 max_block_size_,
         std::shared_ptr<IteratorWrapper> file_iterator_,
         bool need_only_count_);
-        //const SelectQueryInfo & query_info_);
 
     String getName() const override;
 
@@ -163,7 +163,6 @@ private:
     ColumnsDescription columns_description;
     bool need_only_count;
     size_t total_rows_in_file = 0;
-    //SelectQueryInfo query_info;
 
     std::unique_ptr<ReadBuffer> read_buf;
     std::shared_ptr<IInputFormat> input_format;
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 3389ed1db86..36219d13a45 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -896,15 +896,10 @@ public:
         : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
         , storage(std::move(storage_))
         , uri_options(uri_options_)
-        // , paths(std::move(paths_))
-        // , archive_info(std::move(archive_info_))
-        // , virtual_columns(std::move(virtual_columns_))
-        // , distributed_processing(distributed_processing_)
         , info(std::move(info_))
         , need_only_count(need_only_count_)
         , read_uri_params(std::move(read_uri_params_))
         , read_post_data_callback(std::move(read_post_data_callback_))
-        // , total_bytes_to_read(total_bytes_to_read_)
         , context(std::move(context_))
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
@@ -915,19 +910,11 @@ private:
     std::shared_ptr<IStorageURLBase> storage;
     std::vector<String> * uri_options;
 
-    // std::vector<std::string> paths;
-    // std::optional<StorageFile::ArchiveInfo> archive_info;
-
-    // NamesAndTypesList virtual_columns;
-    // const bool distributed_processing;
-
     ReadFromFormatInfo info;
     const bool need_only_count;
     std::vector<std::pair<std::string, std::string>> read_uri_params;
     std::function<void(std::ostream &)> read_post_data_callback;
 
-    // size_t total_bytes_to_read;
-
     ContextPtr context;
 
     size_t max_block_size;
@@ -937,9 +924,6 @@ private:
     bool is_url_with_globs = false;
     bool is_empty_glob = false;
 
-    // FieldVectorPtr keys;
-    // bool all_scan = false;
-
     void createIterator(const ActionsDAG::Node * predicate);
 };
 

From 2da0a306269f94ce208c1ac20c3f98e7da345e89 Mon Sep 17 00:00:00 2001
From: Bharat Nallan Chakravarthy <bharatnc@gmail.com>
Date: Mon, 1 Jan 2024 21:32:00 -0800
Subject: [PATCH 055/105] add a test

---
 .../test_wrong_db_or_table_name/test.py       | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/integration/test_wrong_db_or_table_name/test.py b/tests/integration/test_wrong_db_or_table_name/test.py
index 641501eac84..a5096d80ca9 100644
--- a/tests/integration/test_wrong_db_or_table_name/test.py
+++ b/tests/integration/test_wrong_db_or_table_name/test.py
@@ -57,6 +57,31 @@ def test_drop_wrong_database_name(start):
     node.query("DROP DATABASE test;")
 
 
+def test_database_engine_name(start):
+    # test with a valid database engine
+    node.query(
+        """
+        CREATE DATABASE test_atomic ENGINE = Atomic;
+        CREATE TABLE test_atomic.table_test_atomic (i Int64) ENGINE = MergeTree() ORDER BY i;
+        INSERT INTO test_atomic.table_test_atomic SELECT 1;
+        """
+    )
+    assert 1 == int(node.query("SELECT * FROM test_atomic.table_test_atomic".strip()))
+    # test with a invalid database engine
+    with pytest.raises(
+        QueryRuntimeException,
+        match="DB::Exception: Unknown database engine Atomic123. Maybe you meant: \\['Atomic'\\].",
+    ):
+        node.query("CREATE DATABASE test_atomic123 ENGINE = Atomic123;")
+
+    node.query(
+        """
+        DROP TABLE test_atomic.table_test_atomic;
+        DROP DATABASE test_atomic;
+       """
+    )
+
+
 def test_wrong_table_name(start):
     node.query(
         """

From 30876b159afb21fc6a251c65b92d9ae793b59594 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 2 Jan 2024 20:56:19 +0100
Subject: [PATCH 056/105] fix

---
 .../0_stateless/01175_distributed_ddl_output_mode_long.sh    | 2 +-
 .../0_stateless/02447_drop_database_replica.reference        | 4 ----
 tests/queries/0_stateless/02447_drop_database_replica.sh     | 5 ++---
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh
index d2695e602c5..12e142adda9 100755
--- a/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh
+++ b/tests/queries/0_stateless/01175_distributed_ddl_output_mode_long.sh
@@ -33,7 +33,7 @@ function run_until_out_contains()
     done
 }
 
-RAND_COMMENT="01175_DDL_$RANDOM"
+RAND_COMMENT="01175_DDL_$CLICKHOUSE_DATABASE"
 LOG_COMMENT="${CLICKHOUSE_LOG_COMMENT}_$RAND_COMMENT"
 
 CLICKHOUSE_CLIENT_WITH_SETTINGS=${CLICKHOUSE_CLIENT/--log_comment ${CLICKHOUSE_LOG_COMMENT}/--log_comment ${LOG_COMMENT}}
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference
index 7be5dde1998..1af3ee244f1 100644
--- a/tests/queries/0_stateless/02447_drop_database_replica.reference
+++ b/tests/queries/0_stateless/02447_drop_database_replica.reference
@@ -12,9 +12,6 @@ t
 2
 rdb_default	1	1	s1	r1	1
 2
-s1	r1	OK	2	0
-s1	r2	QUEUED	2	0
-s2	r1	QUEUED	2	0
 2
 s1	r1	OK	2	0
 s1	r2	QUEUED	2	0
@@ -27,5 +24,4 @@ rdb_default	1	2	s1	r2	0
 t
 t2
 t3
-t4
 rdb_default_4	1	1	s1	r1	1
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh
index d12f173f388..fb89db5045b 100755
--- a/tests/queries/0_stateless/02447_drop_database_replica.sh
+++ b/tests/queries/0_stateless/02447_drop_database_replica.sh
@@ -33,9 +33,8 @@ $CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_na
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it"
 
 # Also check that it doesn't exceed distributed_ddl_task_timeout waiting for inactive replicas
-timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log" 2>/dev/null | sort
-timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t3 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED"
-timeout 10s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t4 (n int) engine=Log" | sort
+timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=throw_only_active -q "create table $db.t2 (n int) engine=Log" 2>&1| grep -Fac "TIMEOUT_EXCEEDED"
+timeout 60s $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=1000 --distributed_ddl_output_mode=null_status_on_timeout_only_active -q "create table $db.t3 (n int) engine=Log" | sort
 
 $CLICKHOUSE_CLIENT -q "detach database $db3"
 $CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db"

From eeed23b1bc2b789c0d3097595540b16ef7e788b0 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 3 Jan 2024 09:45:25 +0000
Subject: [PATCH 057/105] Fix sanitizer assert.

---
 src/Storages/StorageURL.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 36219d13a45..9ace7775d4b 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -1035,12 +1035,12 @@ void ReadFromURL::createIterator(const ActionsDAG::Node * predicate)
     }
     else
     {
-        iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>([&, max_addresses, done = false]() mutable
+        iterator_wrapper = std::make_shared<StorageURLSource::IteratorWrapper>([max_addresses, done = false, &uri = storage->uri]() mutable
         {
             if (done)
                 return StorageURLSource::FailoverOptions{};
             done = true;
-            return getFailoverOptions(storage->uri, max_addresses);
+            return getFailoverOptions(uri, max_addresses);
         });
         num_streams = 1;
     }

From 91fc3b3456590ffc9577b080357513e651303b1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 10:54:42 +0000
Subject: [PATCH 058/105] Linter

---
 src/Common/findExtreme.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/findExtreme.cpp b/src/Common/findExtreme.cpp
index e1f1e199d56..032ac75b79b 100644
--- a/src/Common/findExtreme.cpp
+++ b/src/Common/findExtreme.cpp
@@ -20,7 +20,7 @@ struct MaxComparator
 MULTITARGET_FUNCTION_AVX2_SSE42(
     MULTITARGET_FUNCTION_HEADER(template <is_any_native_number T, typename ComparatorClass, bool add_all_elements, bool add_if_cond_zero> static std::optional<T> NO_INLINE),
     findExtremeImpl,
-    MULTITARGET_FUNCTION_BODY((const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t row_begin, size_t row_end)
+    MULTITARGET_FUNCTION_BODY((const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t row_begin, size_t row_end) /// NOLINT
     {
         size_t count = row_end - row_begin;
         ptr += row_begin;

From cb4d571a453e1427ffdbb40d1cfe9bfb975aa611 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 3 Jan 2024 11:14:19 +0000
Subject: [PATCH 059/105] Support ALIAS columns in USING clause

---
 src/Planner/CollectTableExpressionData.cpp    | 55 +++++++++++++++-
 src/Planner/PlannerJoinTree.cpp               | 63 +++++++++++++++++++
 src/Planner/TableExpressionData.h             |  4 +-
 ...5_analyzer_using_functional_args.reference |  1 +
 .../02955_analyzer_using_functional_args.sql  |  1 +
 5 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp
index 5ba318dab6a..492120141a4 100644
--- a/src/Planner/CollectTableExpressionData.cpp
+++ b/src/Planner/CollectTableExpressionData.cpp
@@ -8,6 +8,8 @@
 #include <Analyzer/QueryNode.h>
 #include <Analyzer/TableNode.h>
 #include <Analyzer/TableFunctionNode.h>
+#include <Analyzer/JoinNode.h>
+#include <Analyzer/ListNode.h>
 
 #include <Planner/PlannerContext.h>
 #include <Planner/PlannerActionsVisitor.h>
@@ -33,6 +35,25 @@ public:
 
     void visitImpl(QueryTreeNodePtr & node)
     {
+        /// Special case for USING clause which contains references to ALIAS columns.
+        /// We can not modify such ColumnNode.
+        if (auto * join_node = node->as<JoinNode>())
+        {
+            if (!join_node->isUsingJoinExpression())
+                return;
+
+            auto & using_list = join_node->getJoinExpression()->as<ListNode&>();
+            for (auto & using_element : using_list)
+            {
+                auto & column_node = using_element->as<ColumnNode&>();
+                auto & columns_from_subtrees = column_node.getExpressionOrThrow()->as<ListNode&>().getNodes();
+
+                visitUsingColumn(columns_from_subtrees[0]);
+                visitUsingColumn(columns_from_subtrees[1]);
+            }
+            return;
+        }
+
         auto * column_node = node->as<ColumnNode>();
         if (!column_node)
             return;
@@ -55,7 +76,13 @@ public:
         if (column_node->hasExpression() && column_source_node_type != QueryTreeNodeType::ARRAY_JOIN)
         {
             /// Replace ALIAS column with expression
-            table_expression_data.addAliasColumnName(column_node->getColumnName());
+            bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName());
+            if (column_already_exists)
+                return;
+
+            auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
+
+            table_expression_data.addAliasColumnName(column_node->getColumnName(), column_identifier);
             node = column_node->getExpression();
             visitImpl(node);
             return;
@@ -78,13 +105,37 @@ public:
         table_expression_data.addColumn(column_node->getColumn(), column_identifier);
     }
 
-    static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node)
+    static bool needChildVisit(const QueryTreeNodePtr & parent, const QueryTreeNodePtr & child_node)
     {
+        if (auto * join_node = parent->as<JoinNode>())
+        {
+            return join_node->getJoinExpression() != child_node || !join_node->isUsingJoinExpression();
+        }
         auto child_node_type = child_node->getNodeType();
         return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
     }
 
 private:
+
+    void visitUsingColumn(QueryTreeNodePtr & node)
+    {
+        auto & column_node = node->as<ColumnNode&>();
+        if (column_node.hasExpression())
+        {
+            auto & table_expression_data = planner_context.getOrCreateTableExpressionData(column_node.getColumnSource());
+            bool column_already_exists = table_expression_data.hasColumn(column_node.getColumnName());
+            if (column_already_exists)
+                return;
+
+            auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
+            table_expression_data.addAliasColumnName(column_node.getColumnName(), column_identifier);
+
+            visitImpl(column_node.getExpressionOrThrow());
+        }
+        else
+            visitImpl(node);
+    }
+
     PlannerContext & planner_context;
 };
 
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index e2cdf146a69..548c151757e 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -978,6 +978,55 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP
     plan_to_add_cast.addStep(std::move(cast_join_columns_step));
 }
 
+struct UsingAliasKeyActions
+{
+    UsingAliasKeyActions(
+        const ColumnsWithTypeAndName & left_plan_output_columns,
+        const ColumnsWithTypeAndName & right_plan_output_columns
+    )
+        : left_alias_columns_keys(std::make_shared<ActionsDAG>(left_plan_output_columns))
+        , right_alias_columns_keys(std::make_shared<ActionsDAG>(right_plan_output_columns))
+    {}
+
+    void addLeftColumn(QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context)
+    {
+        addColumnImpl(left_alias_columns_keys, node, plan_output_columns, planner_context);
+    }
+
+    void addRightColumn(QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context)
+    {
+        addColumnImpl(right_alias_columns_keys, node, plan_output_columns, planner_context);
+    }
+
+    ActionsDAGPtr getLeftActions()
+    {
+        left_alias_columns_keys->projectInput();
+        return std::move(left_alias_columns_keys);
+    }
+
+    ActionsDAGPtr getRightActions()
+    {
+        right_alias_columns_keys->projectInput();
+        return std::move(right_alias_columns_keys);
+    }
+
+private:
+    void addColumnImpl(ActionsDAGPtr & alias_columns_keys, QueryTreeNodePtr & node, const ColumnsWithTypeAndName & plan_output_columns, const PlannerContextPtr & planner_context)
+    {
+        auto & column_node = node->as<ColumnNode&>();
+        if (column_node.hasExpression())
+        {
+            auto dag = buildActionsDAGFromExpressionNode(column_node.getExpressionOrThrow(), plan_output_columns, planner_context);
+            const auto & left_inner_column_identifier = planner_context->getColumnNodeIdentifierOrThrow(node);
+            dag->addOrReplaceInOutputs(dag->addAlias(*dag->getOutputs().front(), left_inner_column_identifier));
+            alias_columns_keys->mergeInplace(std::move(*dag));
+        }
+    }
+
+    ActionsDAGPtr left_alias_columns_keys;
+    ActionsDAGPtr right_alias_columns_keys;
+};
+
 JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression,
     JoinTreeQueryPlan left_join_tree_query_plan,
     JoinTreeQueryPlan right_join_tree_query_plan,
@@ -1034,6 +1083,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
 
     if (join_node.isUsingJoinExpression())
     {
+        UsingAliasKeyActions using_alias_key_actions{left_plan_output_columns, right_plan_output_columns};
+
         auto & join_node_using_columns_list = join_node.getJoinExpression()->as<ListNode &>();
         for (auto & join_node_using_node : join_node_using_columns_list.getNodes())
         {
@@ -1043,9 +1094,13 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             auto & left_inner_column_node = inner_columns_list.getNodes().at(0);
             auto & left_inner_column = left_inner_column_node->as<ColumnNode &>();
 
+            using_alias_key_actions.addLeftColumn(left_inner_column_node, left_plan_output_columns, planner_context);
+
             auto & right_inner_column_node = inner_columns_list.getNodes().at(1);
             auto & right_inner_column = right_inner_column_node->as<ColumnNode &>();
 
+            using_alias_key_actions.addRightColumn(right_inner_column_node, right_plan_output_columns, planner_context);
+
             const auto & join_node_using_column_node_type = join_node_using_column_node.getColumnType();
             if (!left_inner_column.getColumnType()->equals(*join_node_using_column_node_type))
             {
@@ -1059,6 +1114,14 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
                 right_plan_column_name_to_cast_type.emplace(right_inner_column_identifier, join_node_using_column_node_type);
             }
         }
+
+        auto left_alias_columns_keys_step = std::make_unique<ExpressionStep>(left_plan.getCurrentDataStream(), using_alias_key_actions.getLeftActions());
+        left_alias_columns_keys_step->setStepDescription("Actions for left table alias column keys");
+        left_plan.addStep(std::move(left_alias_columns_keys_step));
+
+        auto right_alias_columns_keys_step = std::make_unique<ExpressionStep>(right_plan.getCurrentDataStream(), using_alias_key_actions.getRightActions());
+        right_alias_columns_keys_step->setStepDescription("Actions for right table alias column keys");
+        right_plan.addStep(std::move(right_alias_columns_keys_step));
     }
 
     auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map<std::string, DataTypePtr> & plan_column_name_to_cast_type)
diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h
index 9f963dc182a..f6ef4017c98 100644
--- a/src/Planner/TableExpressionData.h
+++ b/src/Planner/TableExpressionData.h
@@ -80,9 +80,11 @@ public:
     }
 
     /// Add alias column name
-    void addAliasColumnName(const std::string & column_name)
+    void addAliasColumnName(const std::string & column_name, const ColumnIdentifier & column_identifier)
     {
         alias_columns_names.insert(column_name);
+
+        column_name_to_column_identifier.emplace(column_name, column_identifier);
     }
 
     /// Get alias columns names
diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
index d00491fd7e5..6ed281c757a 100644
--- a/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
+++ b/tests/queries/0_stateless/02955_analyzer_using_functional_args.reference
@@ -1 +1,2 @@
 1
+1
diff --git a/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql
index e4c1fd86b09..7983b43d7e5 100644
--- a/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql
+++ b/tests/queries/0_stateless/02955_analyzer_using_functional_args.sql
@@ -6,6 +6,7 @@ INSERT INTO t2 VALUES (6666, 48);
 INSERT INTO t2 VALUES (369, 50);
 
 SELECT count() FROM t1 INNER JOIN t2 USING (y);
+SELECT count() FROM t2 INNER JOIN t1 USING (y);
 
 DROP TABLE IF EXISTS t1;
 DROP TABLE IF EXISTS t2;

From 76f58fb49ebb80b5143435d0de9635280f72c73a Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 3 Jan 2024 11:17:24 +0000
Subject: [PATCH 060/105] Add a comment

---
 src/Planner/PlannerJoinTree.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 548c151757e..774e01839fc 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -978,6 +978,8 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP
     plan_to_add_cast.addStep(std::move(cast_join_columns_step));
 }
 
+/// Actions to calculate table columns that have a functional representation (ALIASes and subcolumns)
+/// and used in USING clause of JOIN expression.
 struct UsingAliasKeyActions
 {
     UsingAliasKeyActions(

From 89beb32e646e54012171d4388d874ba8b80fc839 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Wed, 3 Jan 2024 13:10:14 +0100
Subject: [PATCH 061/105] Edit docs for toWeek()

---
 docs/en/sql-reference/functions/date-time-functions.md | 4 +++-
 docs/ru/sql-reference/functions/date-time-functions.md | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 0261589b968..5622097537e 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1483,7 +1483,9 @@ For mode values with a meaning of “with 4 or more days this year,” weeks are
 
 - Otherwise, it is the last week of the previous year, and the next week is week 1.
 
-For mode values with a meaning of “contains January 1”, the week contains January 1 is week 1. It does not matter how many days in the new year the week contained, even if it contained only one day.
+For mode values with a meaning of “contains January 1”, the week contains January 1 is week 1.
+It does not matter how many days in the new year the week contained, even if it contained only one day.
+I.e. if the last week of December contains January 1 of the next year, it will be week 1 of the next year.
 
 **Syntax**
 
diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md
index fa5728a097d..cbbb456aa80 100644
--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@@ -578,7 +578,9 @@ SELECT
 
 - В противном случае это последняя неделя предыдущего года, а следующая неделя - неделя 1.
 
-Для режимов со значением «содержит 1 января», неделя 1 – это неделя содержащая 1 января. Не имеет значения, сколько дней в новом году содержала неделя, даже если она содержала только один день.
+Для режимов со значением «содержит 1 января», неделя 1 – это неделя, содержащая 1 января. 
+Не имеет значения, сколько дней нового года содержит эта неделя, даже если она содержит только один день. 
+Так, если последняя неделя декабря содержит 1 января следующего года, то она считается неделей 1 следующего года.
 
 **Пример**
 

From be825b129053f1c47762e08142ffebf5761c1df8 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 3 Jan 2024 12:20:08 +0000
Subject: [PATCH 062/105] fix segfault when graphite table does not have agg
 function

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Processors/Merges/Algorithms/Graphite.h   |  7 +-
 .../config/config.d/graphite_alternative.xml  | 24 +++++++
 ...cated_merge_parameters_must_consistent.sql | 70 ++++++++++++++++---
 3 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h
index 692e36d2eae..04bb4548c14 100644
--- a/src/Processors/Merges/Algorithms/Graphite.h
+++ b/src/Processors/Merges/Algorithms/Graphite.h
@@ -127,7 +127,12 @@ struct Pattern
     {
         hash.update(rule_type);
         hash.update(regexp_str);
-        hash.update(function->getName());
+        if (function)
+        {
+            hash.update(function->getName());
+            for (const auto & p : function->getParameters())
+                hash.update(toString(p));
+        }
         for (const auto & r : retentions)
         {
             hash.update(r.age);
diff --git a/tests/config/config.d/graphite_alternative.xml b/tests/config/config.d/graphite_alternative.xml
index 1a00de52af5..6c0bd13ce43 100644
--- a/tests/config/config.d/graphite_alternative.xml
+++ b/tests/config/config.d/graphite_alternative.xml
@@ -26,4 +26,28 @@
             </retention>
         </default>
     </graphite_rollup_alternative>
+    <graphite_rollup_alternative_no_function>
+        <version_column_name>Version</version_column_name>
+        <pattern>
+            <regexp>sum</regexp>
+            <retention>
+                <age>0</age>
+                <precision>600</precision>
+            </retention>
+            <retention>
+                <age>17280</age>
+                <precision>6000</precision>
+            </retention>
+        </pattern>
+        <default>
+            <retention>
+                <age>0</age>
+                <precision>600</precision>
+            </retention>
+            <retention>
+                <age>17280</age>
+                <precision>6000</precision>
+            </retention>
+        </default>
+    </graphite_rollup_alternative_no_function>
 </clickhouse>
diff --git a/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql
index 3c1bec4fb3f..0f452105e6d 100644
--- a/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql
+++ b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql
@@ -8,13 +8,22 @@ CREATE TABLE t
 ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r1', legacy_ver)
 ORDER BY id;
 
-CREATE TABLE t_r
+CREATE TABLE t_r_ok
+(
+    `id` UInt64,
+    `val` String,
+    `legacy_ver` UInt64,
+)
+ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r2', legacy_ver)
+ORDER BY id;
+
+CREATE TABLE t_r_error
 (
     `id` UInt64,
     `val` String,
     `legacy_ver` UInt64
 )
-ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r2')
+ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r3')
 ORDER BY id; -- { serverError METADATA_MISMATCH }
 
 CREATE TABLE t2
@@ -27,14 +36,24 @@ CREATE TABLE t2
 ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r1', legacy_ver)
 ORDER BY id;
 
-CREATE TABLE t2_r
+CREATE TABLE t2_r_ok
 (
     `id` UInt64,
     `val` String,
     `legacy_ver` UInt64,
     `deleted` UInt8
 )
-ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r2', legacy_ver, deleted)
+ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r2', legacy_ver)
+ORDER BY id;
+
+CREATE TABLE t2_r_error
+(
+    `id` UInt64,
+    `val` String,
+    `legacy_ver` UInt64,
+    `deleted` UInt8
+)
+ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r3', legacy_ver, deleted)
 ORDER BY id; -- { serverError METADATA_MISMATCH }
 
 CREATE TABLE t3
@@ -46,13 +65,23 @@ CREATE TABLE t3
 ENGINE = ReplicatedSummingMergeTree('/tables/{database}/t3/', 'r1', metrics1)
 ORDER BY key;
 
-CREATE TABLE t3_r
+CREATE TABLE t3_r_ok
 (
     `key` UInt64,
     `metrics1` UInt64,
     `metrics2` UInt64
 )
-ENGINE = ReplicatedSummingMergeTree('/tables/{database}/t3/', 'r2', metrics2)
+ENGINE = ReplicatedSummingMergeTree('/tables/{database}/t3/', 'r2', metrics1)
+ORDER BY key;
+
+
+CREATE TABLE t3_r_error
+(
+    `key` UInt64,
+    `metrics1` UInt64,
+    `metrics2` UInt64
+)
+ENGINE = ReplicatedSummingMergeTree('/tables/{database}/t3/', 'r3', metrics2)
 ORDER BY key; -- { serverError METADATA_MISMATCH }
 
 CREATE TABLE t4
@@ -67,7 +96,7 @@ CREATE TABLE t4
 ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r1', 'graphite_rollup')
 ORDER BY key;
 
-CREATE TABLE t4_r
+CREATE TABLE t4_r_ok
 (
     `key` UInt32,
     `Path` String,
@@ -76,5 +105,30 @@ CREATE TABLE t4_r
     `Version` UInt32,
     `col` UInt64
 )
-ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r2', 'graphite_rollup_alternative')
+ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r2', 'graphite_rollup')
+ORDER BY key;
+
+CREATE TABLE t4_r_error
+(
+    `key` UInt32,
+    `Path` String,
+    `Time` DateTime('UTC'),
+    `Value` Float64,
+    `Version` UInt32,
+    `col` UInt64
+)
+ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r3', 'graphite_rollup_alternative')
 ORDER BY key; -- { serverError METADATA_MISMATCH }
+
+-- https://github.com/ClickHouse/ClickHouse/issues/58451
+CREATE TABLE t4_r_error_2
+(
+    `key` UInt32,
+    `Path` String,
+    `Time` DateTime('UTC'),
+    `Value` Float64,
+    `Version` UInt32,
+    `col` UInt64
+)
+ENGINE = ReplicatedGraphiteMergeTree('/tables/{database}/t4/', 'r4', 'graphite_rollup_alternative_no_function')
+ORDER BY key; -- { serverError METADATA_MISMATCH }
\ No newline at end of file

From 1f960a32de2f63012fcba3f4cb1b28ebf596d64f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 14:04:30 +0100
Subject: [PATCH 063/105] Fix OSX build

---
 src/Common/iota.cpp                              |  3 +++
 src/Common/iota.h                                | 16 ++++++++++++++--
 src/Common/tests/gtest_hash_table.cpp            |  2 +-
 .../QueryPlan/ReadFromSystemNumbersStep.cpp      |  2 +-
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/Common/iota.cpp b/src/Common/iota.cpp
index 385d3b22207..98f18eb195b 100644
--- a/src/Common/iota.cpp
+++ b/src/Common/iota.cpp
@@ -30,4 +30,7 @@ void iota(T * begin, size_t count, T first_value)
 template void iota(UInt8 * begin, size_t count, UInt8 first_value);
 template void iota(UInt32 * begin, size_t count, UInt32 first_value);
 template void iota(UInt64 * begin, size_t count, UInt64 first_value);
+#if defined(OS_DARWIN)
+template void iota(size_t * begin, size_t count, size_t first_value);
+#endif
 }
diff --git a/src/Common/iota.h b/src/Common/iota.h
index 485df4bd4f0..7910274d15d 100644
--- a/src/Common/iota.h
+++ b/src/Common/iota.h
@@ -10,13 +10,25 @@ namespace DB
 {
 
 /// Make sure to add any new type to the extern declaration at the end of the file and instantiate it in iota.cpp
+
 template <typename T>
-concept iota_supported_types = (is_any_of<T, UInt8, UInt32, UInt64>);
+concept iota_supported_types = (is_any_of<
+                                T,
+                                UInt8,
+                                UInt32,
+                                UInt64
+#if defined(OS_DARWIN)
+                                ,
+                                size_t
+#endif
+                                >);
 
 template <iota_supported_types T> void iota(T * begin, size_t count, T first_value);
 
 extern template void iota(UInt8 * begin, size_t count, UInt8 first_value);
 extern template void iota(UInt32 * begin, size_t count, UInt32 first_value);
 extern template void iota(UInt64 * begin, size_t count, UInt64 first_value);
-
+#if defined(OS_DARWIN)
+extern template void iota(size_t * begin, size_t count, size_t first_value);
+#endif
 }
diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp
index ab7c3872170..ae432de7766 100644
--- a/src/Common/tests/gtest_hash_table.cpp
+++ b/src/Common/tests/gtest_hash_table.cpp
@@ -21,7 +21,7 @@ namespace
 std::vector<UInt64> getVectorWithNumbersUpToN(size_t n)
 {
     std::vector<UInt64> res(n);
-    iota(res.data(), res.size(), size_t(0));
+    iota(res.data(), res.size(), UInt64(0));
     return res;
 }
 
diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
index 329497d66d3..5ccde0ba5bc 100644
--- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
@@ -41,7 +41,7 @@ protected:
         auto column = ColumnUInt64::create(block_size);
         ColumnUInt64::Container & vec = column->getData();
 
-        size_t curr = next; /// The local variable for some reason works faster (>20%) than member of class.
+        UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class.
         UInt64 * pos = vec.data(); /// This also accelerates the code.
         UInt64 * end = &vec[block_size];
         iota(pos, static_cast<size_t>(end - pos), curr);

From c8acc7c2d1d51c39c3a20dbcebc2eb03d49f0994 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 3 Jan 2024 14:44:00 +0100
Subject: [PATCH 064/105] Fix build

---
 src/Storages/StorageMaterializedView.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index 59f1d5eee1b..934d57d40e2 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -72,7 +72,7 @@ public:
 
     StoragePtr getTargetTable() const;
     StoragePtr tryGetTargetTable() const;
-    StorageID getTargetTableId() const { return target_table_id; }
+    StorageID getTargetTableId() const;
 
     /// Get the virtual column of the target table;
     NamesAndTypesList getVirtuals() const override;

From 1c3364046e50e0c512eb84c58e3ee7e50998469c Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 3 Jan 2024 13:54:17 +0000
Subject: [PATCH 065/105] Fixup

---
 src/Planner/CollectTableExpressionData.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp
index 492120141a4..4c48202af0b 100644
--- a/src/Planner/CollectTableExpressionData.cpp
+++ b/src/Planner/CollectTableExpressionData.cpp
@@ -77,12 +77,12 @@ public:
         {
             /// Replace ALIAS column with expression
             bool column_already_exists = table_expression_data.hasColumn(column_node->getColumnName());
-            if (column_already_exists)
-                return;
+            if (!column_already_exists)
+            {
+                auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
+                table_expression_data.addAliasColumnName(column_node->getColumnName(), column_identifier);
+            }
 
-            auto column_identifier = planner_context.getGlobalPlannerContext()->createColumnIdentifier(node);
-
-            table_expression_data.addAliasColumnName(column_node->getColumnName(), column_identifier);
             node = column_node->getExpression();
             visitImpl(node);
             return;

From 5308e24b8cc9ad3339117e183318e4f372bf43ce Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 3 Jan 2024 14:12:56 +0000
Subject: [PATCH 066/105] Another fixup + reference update

---
 src/Planner/CollectTableExpressionData.cpp       |  3 ++-
 .../02514_analyzer_drop_join_on.reference        | 16 ++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp
index 4c48202af0b..38c986fd31f 100644
--- a/src/Planner/CollectTableExpressionData.cpp
+++ b/src/Planner/CollectTableExpressionData.cpp
@@ -109,7 +109,8 @@ public:
     {
         if (auto * join_node = parent->as<JoinNode>())
         {
-            return join_node->getJoinExpression() != child_node || !join_node->isUsingJoinExpression();
+            if (join_node->getJoinExpression() == child_node && join_node->isUsingJoinExpression())
+                return false;
         }
         auto child_node_type = child_node->getNodeType();
         return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION);
diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
index 51e009dcd91..7e94fdf1a42 100644
--- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
+++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
@@ -55,35 +55,35 @@ Header: a2 String
   Header: a2_0 String
           k_2 UInt64
           d2_1 String
-    Expression (DROP unused columns after JOIN)
+    Expression ((Actions for left table alias column keys + DROP unused columns after JOIN))
     Header: a2_0 String
             k_2 UInt64
       Join (JOIN FillRightFirst)
       Header: a2_0 String
               k_2 UInt64
-        Expression (DROP unused columns after JOIN)
+        Expression ((Actions for left table alias column keys + DROP unused columns after JOIN))
         Header: a2_0 String
                 k_2 UInt64
           Join (JOIN FillRightFirst)
           Header: a2_0 String
                   k_2 UInt64
-            Expression (Change column names to column identifiers)
+            Expression ((Actions for left table alias column keys + Change column names to column identifiers))
             Header: a2_0 String
                     k_2 UInt64
               ReadFromMemoryStorage
               Header: a2 String
                       k UInt64
-            Expression (Change column names to column identifiers)
-            Header: k_3 UInt64
+            Expression ((Actions for right table alias column keys + Change column names to column identifiers))
+            Header: k_5 UInt64
               ReadFromMemoryStorage
               Header: k UInt64
-        Expression (Change column names to column identifiers)
+        Expression ((Actions for right table alias column keys + Change column names to column identifiers))
         Header: k_4 UInt64
           ReadFromMemoryStorage
           Header: k UInt64
-    Expression (Change column names to column identifiers)
+    Expression ((Actions for right table alias column keys + Change column names to column identifiers))
     Header: d2_1 String
-            k_5 UInt64
+            k_3 UInt64
       ReadFromMemoryStorage
       Header: d2 String
               k UInt64

From 31254826314fffb56f02a486297f8ba54a55173d Mon Sep 17 00:00:00 2001
From: Mark Needham <m.h.needham@gmail.com>
Date: Wed, 3 Jan 2024 14:25:03 +0000
Subject: [PATCH 067/105] Add output_format_decimal_trailing_zeros setting for
 trailing spaces

---
 .../functions/rounding-functions.md           | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index 84839c2489c..3ede66cf316 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -53,7 +53,7 @@ The rounded number of the same type as the input number.
 **Example of use with Float**
 
 ``` sql
-SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3
+SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3;
 ```
 
 ``` text
@@ -67,7 +67,22 @@ SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3
 **Example of use with Decimal**
 
 ``` sql
-SELECT cast(number / 2 AS  Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3
+SELECT cast(number / 2 AS  Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3;
+```
+
+``` text
+┌───x─┬─round(CAST(divide(number, 2), 'Decimal(10, 4)'))─┐
+│   0 │                                                0 │
+│ 0.5 │                                                1 │
+│   1 │                                                1 │
+└─────┴──────────────────────────────────────────────────┘
+```
+
+If you want to keep the trailing zeros, you need to enable `output_format_decimal_trailing_zeros`
+
+``` sql
+SELECT cast(number / 2 AS  Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3 settings output_format_decimal_trailing_zeros=1;
+
 ```
 
 ``` text

From 7ee1697971e310d29aa00b4627f415b74b47b748 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 17:16:45 +0000
Subject: [PATCH 068/105] Reduce setup time of min_max_index.xml

---
 tests/performance/min_max_index.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/min_max_index.xml b/tests/performance/min_max_index.xml
index b7b5d4fb991..518696144e2 100644
--- a/tests/performance/min_max_index.xml
+++ b/tests/performance/min_max_index.xml
@@ -1,7 +1,7 @@
 <test>
     <create_query>CREATE TABLE index_test (z UInt32, INDEX i_x (mortonDecode(2, z).1) TYPE minmax, INDEX i_y (mortonDecode(2, z).2) TYPE minmax) ENGINE = MergeTree ORDER BY z</create_query>
 
-    <fill_query>INSERT INTO index_test SELECT number FROM numbers(0x100000000) WHERE rand() % 3 = 1</fill_query>
+    <fill_query>INSERT INTO index_test SELECT number * 10 FROM numbers_mt(toUInt64(0x100000000 / 10)) SETTINGS max_insert_threads=8</fill_query>
 
     <query><![CDATA[
     SELECT count() FROM index_test WHERE mortonDecode(2, z).1 >= 20000 AND mortonDecode(2, z).1 <= 20100 AND mortonDecode(2, z).2 >= 10000 AND mortonDecode(2, z).2 <= 10100

From b8305e1a6e976cb040454089e57e6db97310d0e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 17:19:44 +0000
Subject: [PATCH 069/105] Make test more reasonable

---
 tests/performance/group_by_sundy_li.xml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/performance/group_by_sundy_li.xml b/tests/performance/group_by_sundy_li.xml
index 694fafcbbcd..46f659d9cc0 100644
--- a/tests/performance/group_by_sundy_li.xml
+++ b/tests/performance/group_by_sundy_li.xml
@@ -16,10 +16,10 @@
         ORDER BY (d, n)
     </create_query>
 
-    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(100000000)</fill_query>
-    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(100000000)</fill_query>
+    <fill_query>insert into a select '2000-01-01', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(10000000)</fill_query>
+    <fill_query>insert into a select '2000-01-02', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(10000000)</fill_query>
+    <fill_query>insert into a select '2000-01-03', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(10000000)</fill_query>
+    <fill_query>insert into a select '2000-01-04', ['aa','bb','cc','dd'][number % 4 + 1], number from numbers_mt(10000000)</fill_query>
 
     <fill_query>OPTIMIZE TABLE a FINAL</fill_query>
 

From 910b3385841297e442f6d349244db0052cc1c3e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 17:24:15 +0000
Subject: [PATCH 070/105] Reduce polymorphic_parts_m

---
 tests/performance/polymorphic_parts_l.xml | 4 ++--
 tests/performance/polymorphic_parts_m.xml | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml
index d2ae9417bf7..66c5b73caa8 100644
--- a/tests/performance/polymorphic_parts_l.xml
+++ b/tests/performance/polymorphic_parts_l.xml
@@ -25,8 +25,8 @@
     </settings>
 
     <!-- 100 parts -->
-    <query>INSERT INTO hits_wide(UserID)    SELECT rand() FROM  numbers(100000)</query>
-    <query>INSERT INTO hits_compact(UserID) SELECT rand() FROM  numbers(100000)</query>
+    <query>INSERT INTO hits_wide(UserID)    SELECT rand() FROM numbers(100000)</query>
+    <query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000)</query>
     <query>INSERT INTO hits_buffer(UserID)  SELECT rand() FROM numbers(100000)</query>
 
     <drop_query>DROP TABLE IF EXISTS hits_wide</drop_query>
diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml
index 54a81def55e..0a44038ffbd 100644
--- a/tests/performance/polymorphic_parts_m.xml
+++ b/tests/performance/polymorphic_parts_m.xml
@@ -25,8 +25,8 @@
     </settings>
 
     <!-- 100 parts -->
-    <query>INSERT INTO hits_wide(UserID)    SELECT rand() FROM  numbers(10000)</query>
-    <query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000)</query>
+    <query>INSERT INTO hits_wide(UserID)    SELECT rand() FROM numbers(10000)</query>
+    <query>INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(10000)</query>
     <query>INSERT INTO hits_buffer(UserID)  SELECT rand() FROM numbers(10000)</query>
 
     <drop_query>DROP TABLE IF EXISTS hits_wide</drop_query>

From c223ae56d33723e52d331a19eb05d70b209792a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 17:29:30 +0000
Subject: [PATCH 071/105] Reduce the size of decimal_parse

---
 tests/performance/decimal_parse.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/decimal_parse.xml b/tests/performance/decimal_parse.xml
index 19e940b13df..966363d6fec 100644
--- a/tests/performance/decimal_parse.xml
+++ b/tests/performance/decimal_parse.xml
@@ -1,3 +1,3 @@
 <test>
-    <query>SELECT count() FROM zeros(10000000) WHERE NOT ignore(toDecimal32OrZero(toString(rand() % 10000), 5))</query>
+    <query>SELECT count() FROM zeros(3000000) WHERE NOT ignore(toDecimal32OrZero(toString(rand() % 10000), 5))</query>
 </test>

From c1953206123ba0d8337212596ca64cf220365bc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 3 Jan 2024 17:31:55 +0000
Subject: [PATCH 072/105] Reduce the size of join_used_flags.xml

---
 tests/performance/join_used_flags.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/join_used_flags.xml b/tests/performance/join_used_flags.xml
index cd2073ee106..70b0b45391d 100644
--- a/tests/performance/join_used_flags.xml
+++ b/tests/performance/join_used_flags.xml
@@ -1,6 +1,6 @@
 <test>
     <create_query>CREATE TABLE test_join_used_flags (i64 Int64, i32 Int32) ENGINE = Memory</create_query>
-    <fill_query>INSERT INTO test_join_used_flags SELECT number AS i64, rand32() AS i32 FROM numbers(20000000)</fill_query>
+    <fill_query>INSERT INTO test_join_used_flags SELECT number AS i64, rand32() AS i32 FROM numbers_mt(3000000)</fill_query>
     <query>SELECT l.i64, r.i64, l.i32, r.i32 FROM test_join_used_flags l RIGHT JOIN test_join_used_flags r USING i64 format Null</query>
     <drop_query>DROP TABLE IF EXISTS test_join_used_flags</drop_query>
 </test>

From d06de83ac14dd8aab015868c84ac341799be7294 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 3 Jan 2024 17:44:28 +0000
Subject: [PATCH 073/105] Fix KeyCondition for file/url/s3

---
 src/Processors/SourceWithKeyCondition.h       | 34 ++++++++-----------
 src/Storages/StorageFile.cpp                  |  7 ++--
 src/Storages/StorageS3.cpp                    |  7 ++--
 src/Storages/StorageURL.cpp                   |  7 ++--
 src/Storages/VirtualColumnUtils.cpp           | 34 +++++++++++++++++++
 .../02725_parquet_preserve_order.reference    |  4 +--
 6 files changed, 65 insertions(+), 28 deletions(-)

diff --git a/src/Processors/SourceWithKeyCondition.h b/src/Processors/SourceWithKeyCondition.h
index 9e641cc8c51..c9617d3e73e 100644
--- a/src/Processors/SourceWithKeyCondition.h
+++ b/src/Processors/SourceWithKeyCondition.h
@@ -18,31 +18,25 @@ protected:
 
     void setKeyConditionImpl(const SelectQueryInfo & query_info, ContextPtr context, const Block & keys)
     {
-        if (!context->getSettingsRef().allow_experimental_analyzer)
-        {
-            key_condition = std::make_shared<const KeyCondition>(
-                query_info,
-                context,
-                keys.getNames(),
-                std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
-        }
+        key_condition = std::make_shared<const KeyCondition>(
+            query_info,
+            context,
+            keys.getNames(),
+            std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
     }
 
     void setKeyConditionImpl(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context, const Block & keys)
     {
-        if (context->getSettingsRef().allow_experimental_analyzer)
-        {
-            std::unordered_map<std::string, DB::ColumnWithTypeAndName> node_name_to_input_column;
-            for (const auto & column : keys.getColumnsWithTypeAndName())
-                node_name_to_input_column.insert({column.name, column});
+        std::unordered_map<std::string, DB::ColumnWithTypeAndName> node_name_to_input_column;
+        for (const auto & column : keys.getColumnsWithTypeAndName())
+            node_name_to_input_column.insert({column.name, column});
 
-            auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_column, context);
-            key_condition = std::make_shared<const KeyCondition>(
-                filter_actions_dag,
-                context,
-                keys.getNames(),
-                std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
-        }
+        auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(nodes, node_name_to_input_column, context);
+        key_condition = std::make_shared<const KeyCondition>(
+            filter_actions_dag,
+            context,
+            keys.getNames(),
+            std::make_shared<ExpressionActions>(std::make_shared<ActionsDAG>(keys.getColumnsWithTypeAndName())));
     }
 
 public:
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 60e06291200..f3917b878d6 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1469,14 +1469,17 @@ void ReadFromFile::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
         if (storage->has_peekable_read_buffer_from_fd.exchange(false))
             read_buffer = std::move(storage->peekable_read_buffer_from_fd);
 
-        pipes.emplace_back(std::make_shared<StorageFileSource>(
+        auto source = std::make_shared<StorageFileSource>(
             info,
             storage,
             context,
             max_block_size,
             files_iterator,
             std::move(read_buffer),
-            need_only_count));
+            need_only_count);
+
+        source->setKeyCondition(filter_nodes.nodes, context);
+        pipes.emplace_back(std::move(source));
     }
 
     auto pipe = Pipe::unitePipes(std::move(pipes));
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index b6d96e21e33..ce49be32120 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1255,7 +1255,7 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
     pipes.reserve(num_streams);
     for (size_t i = 0; i < num_streams; ++i)
     {
-        pipes.emplace_back(std::make_shared<StorageS3Source>(
+        auto source = std::make_shared<StorageS3Source>(
             read_from_format_info,
             query_configuration.format,
             storage.getName(),
@@ -1270,7 +1270,10 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
             query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()),
             iterator_wrapper,
             max_parsing_threads,
-            need_only_count));
+            need_only_count);
+
+        source->setKeyCondition(filter_nodes.nodes, local_context);
+        pipes.emplace_back(std::move(source));
     }
 
     auto pipe = Pipe::unitePipes(std::move(pipes));
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 9ace7775d4b..c0e4be36202 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -1064,7 +1064,7 @@ void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const Buil
 
     for (size_t i = 0; i < num_streams; ++i)
     {
-        pipes.emplace_back(std::make_shared<StorageURLSource>(
+        auto source = std::make_shared<StorageURLSource>(
             info,
             iterator_wrapper,
             storage->getReadMethod(),
@@ -1080,7 +1080,10 @@ void ReadFromURL::initializePipeline(QueryPipelineBuilder & pipeline, const Buil
             storage->headers,
             read_uri_params,
             is_url_with_globs,
-            need_only_count));
+            need_only_count);
+
+        source->setKeyCondition(filter_nodes.nodes, context);
+        pipes.emplace_back(std::move(source));
     }
 
     if (uri_options)
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 20e9a5ea174..76138bbea87 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -36,7 +36,10 @@
 #include <Storages/VirtualColumnUtils.h>
 #include <IO/WriteHelpers.h>
 #include <Common/typeid_cast.h>
+#include "Functions/FunctionsLogical.h"
 #include "Functions/IFunction.h"
+#include "Functions/IFunctionAdaptors.h"
+#include "Functions/indexHint.h"
 #include <Parsers/makeASTForLogicalFunction.h>
 #include <Columns/ColumnSet.h>
 #include <Functions/FunctionHelpers.h>
@@ -519,6 +522,37 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
 
             return &node_copy;
         }
+        else if (node->function_base->getName() == "indexHint")
+        {
+            if (const auto * adaptor = typeid_cast<const FunctionToFunctionBaseAdaptor *>(node->function_base.get()))
+            {
+                if (const auto * index_hint = typeid_cast<const FunctionIndexHint *>(adaptor->getFunction().get()))
+                {
+                    auto index_hint_dag = index_hint->getActions()->clone();
+                    ActionsDAG::NodeRawConstPtrs atoms;
+                    for (const auto & output : index_hint_dag->getOutputs())
+                        if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes))
+                            atoms.push_back(child_copy);
+
+                    if (!atoms.empty())
+                    {
+                        const auto * res = atoms.at(0);
+
+                        if (atoms.size() > 1)
+                        {
+                            FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
+                            res = &index_hint_dag->addFunction(func_builder_and, atoms, {});
+                        }
+
+                        if (!res->result_type->equals(*node->result_type))
+                            res = &index_hint_dag->addCast(*res, node->result_type, {});
+
+                        additional_nodes.splice(additional_nodes.end(), ActionsDAG::detachNodes(std::move(*index_hint_dag)));
+                        return res;
+                    }
+                }
+            }
+        }
     }
 
     if (!canEvaluateSubtree(node, allowed_inputs))
diff --git a/tests/queries/0_stateless/02725_parquet_preserve_order.reference b/tests/queries/0_stateless/02725_parquet_preserve_order.reference
index e9c8f99bb33..3f410c13ec4 100644
--- a/tests/queries/0_stateless/02725_parquet_preserve_order.reference
+++ b/tests/queries/0_stateless/02725_parquet_preserve_order.reference
@@ -3,10 +3,10 @@
 2
 (Expression)
 ExpressionTransform
-  (ReadFromStorage)
+  (ReadFromFile)
   File 0 → 1
 (Expression)
 ExpressionTransform × 2
-  (ReadFromStorage)
+  (ReadFromFile)
   Resize 1 → 2
     File 0 → 1

From 66d2db52832a81aea43cda66a500d8b3369547ef Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Thu, 4 Jan 2024 00:27:04 +0100
Subject: [PATCH 074/105] New parallel replicas coordinator implementation
 (#57968)

---
 src/Common/ProfileEvents.cpp                  |  12 +
 src/Core/Settings.h                           |   1 +
 .../ClusterProxy/executeQuery.cpp             |   3 +-
 .../MergeTreeReadPoolParallelReplicas.cpp     |  11 +-
 .../MergeTreeReadPoolParallelReplicas.h       |   1 +
 .../ParallelReplicasReadingCoordinator.cpp    | 810 +++++++++++++-----
 .../ParallelReplicasReadingCoordinator.h      |   4 +-
 .../__init__.py                               |   0
 .../configs/remote_servers.xml                |  32 +
 .../test.py                                   | 156 ++++
 .../configs/remote_servers.xml                |  22 -
 .../test.py                                   | 156 ----
 .../__init__.py                               |   0
 .../configs/remote_servers.xml                |  22 -
 .../test.py                                   | 140 ---
 15 files changed, 817 insertions(+), 553 deletions(-)
 rename tests/integration/{test_parallel_replicas_distributed_read_from_all => test_parallel_replicas_all_marks_read}/__init__.py (100%)
 create mode 100644 tests/integration/test_parallel_replicas_all_marks_read/configs/remote_servers.xml
 create mode 100644 tests/integration/test_parallel_replicas_all_marks_read/test.py
 delete mode 100644 tests/integration/test_parallel_replicas_distributed_read_from_all/configs/remote_servers.xml
 delete mode 100644 tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
 delete mode 100644 tests/integration/test_parallel_replicas_working_set/__init__.py
 delete mode 100644 tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml
 delete mode 100644 tests/integration/test_parallel_replicas_working_set/test.py

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 4bdf6288a1c..119e0d99143 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -288,6 +288,18 @@ The server successfully detected this situation and will download merged part fr
     M(OSReadChars, "Number of bytes read from filesystem, including page cache.") \
     M(OSWriteChars, "Number of bytes written to filesystem, including page cache.") \
     \
+    M(ParallelReplicasHandleRequestMicroseconds, "Time spent processing requests for marks from replicas") \
+    M(ParallelReplicasHandleAnnouncementMicroseconds, "Time spent processing replicas announcements") \
+    \
+    M(ParallelReplicasReadAssignedMarks, "Sum across all replicas of how many of scheduled marks were assigned by consistent hash") \
+    M(ParallelReplicasReadUnassignedMarks, "Sum across all replicas of how many unassigned marks were scheduled") \
+    M(ParallelReplicasReadAssignedForStealingMarks, "Sum across all replicas of how many of scheduled marks were assigned for stealing by consistent hash") \
+    \
+    M(ParallelReplicasStealingByHashMicroseconds, "Time spent collecting segments meant for stealing by hash") \
+    M(ParallelReplicasProcessingPartsMicroseconds, "Time spent processing data parts") \
+    M(ParallelReplicasStealingLeftoversMicroseconds, "Time spent collecting orphaned segments") \
+    M(ParallelReplicasCollectingOwnedSegmentsMicroseconds, "Time spent collecting segments meant by hash") \
+    \
     M(PerfCpuCycles, "Total cycles. Be wary of what happens during CPU frequency scaling.")  \
     M(PerfInstructions, "Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts.") \
     M(PerfCacheReferences, "Cache accesses. Usually, this indicates Last Level Cache accesses, but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU.") \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 9516ef72077..4e057861f60 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -185,6 +185,7 @@ class IColumn;
     M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
     M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
     M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
+    M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
     \
     M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \
     \
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 18f7280dd19..c448206ed78 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -412,7 +412,8 @@ void executeQueryWithParallelReplicas(
         new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings, settings.max_parallel_replicas);
     }
 
-    auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(new_cluster->getShardCount());
+    auto coordinator
+        = std::make_shared<ParallelReplicasReadingCoordinator>(new_cluster->getShardCount(), settings.parallel_replicas_mark_segment_size);
     auto external_tables = new_context->getExternalTables();
     auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
         query_ast,
diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp
index e61ddf0d122..69e64d5ea98 100644
--- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp
@@ -1,5 +1,6 @@
 #include <Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h>
 
+
 namespace DB
 {
 
@@ -30,12 +31,10 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas(
         settings_,
         context_)
     , extension(std::move(extension_))
+    , coordination_mode(CoordinationMode::Default)
 {
-    extension.all_callback(InitialAllRangesAnnouncement(
-        CoordinationMode::Default,
-        parts_ranges.getDescriptions(),
-        extension.number_of_current_replica
-    ));
+    extension.all_callback(
+        InitialAllRangesAnnouncement(coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica));
 }
 
 MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_idx*/, MergeTreeReadTask * previous_task)
@@ -48,7 +47,7 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_id
     if (buffered_ranges.empty())
     {
         auto result = extension.callback(ParallelReadRequest(
-            CoordinationMode::Default,
+            coordination_mode,
             extension.number_of_current_replica,
             pool_settings.min_marks_for_concurrent_read * pool_settings.threads,
             /// For Default coordination mode we don't need to pass part names.
diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h
index 08020565ec4..7579a892b67 100644
--- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h
+++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h
@@ -31,6 +31,7 @@ private:
     mutable std::mutex mutex;
 
     const ParallelReadingExtension extension;
+    const CoordinationMode coordination_mode;
     RangesInDataPartsDescription buffered_ranges;
     bool no_more_tasks_available{false};
     Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPoolParallelReplicas");
diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index 333a0590d6b..bbe8c30a5c0 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -1,27 +1,77 @@
 #include <Storages/MergeTree/ParallelReplicasReadingCoordinator.h>
 
 #include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <iterator>
+#include <map>
 #include <mutex>
 #include <numeric>
-#include <vector>
-#include <map>
 #include <set>
-
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
 #include <consistent_hashing.h>
 
-#include "Common/Exception.h"
-#include <Common/logger_useful.h>
-#include <Common/SipHash.h>
-#include <Common/thread_local_rng.h>
-#include <base/types.h>
-#include "IO/WriteBufferFromString.h"
 #include <IO/Progress.h>
-#include "Storages/MergeTree/RangesInDataPart.h"
-#include "Storages/MergeTree/RequestResponse.h"
-#include <Storages/MergeTree/MarkRange.h>
+#include <IO/WriteBufferFromString.h>
 #include <Storages/MergeTree/IntersectionsIndexes.h>
+#include <Storages/MergeTree/MarkRange.h>
+#include <Storages/MergeTree/MergeTreePartInfo.h>
+#include <Storages/MergeTree/RangesInDataPart.h>
+#include <Storages/MergeTree/RequestResponse.h>
+#include <base/defines.h>
+#include <base/types.h>
+#include <boost/algorithm/string/split.hpp>
 #include <fmt/core.h>
 #include <fmt/format.h>
+#include <Common/ElapsedTimeProfileEventIncrement.h>
+#include <Common/Exception.h>
+#include <Common/ProfileEvents.h>
+#include <Common/SipHash.h>
+#include <Common/logger_useful.h>
+#include <Common/thread_local_rng.h>
+
+using namespace DB;
+
+namespace
+{
+size_t roundDownToMultiple(size_t num, size_t multiple)
+{
+    return (num / multiple) * multiple;
+}
+
+size_t
+takeFromRange(const MarkRange & range, size_t min_number_of_marks, size_t & current_marks_amount, RangesInDataPartDescription & result)
+{
+    const auto marks_needed = min_number_of_marks - current_marks_amount;
+    chassert(marks_needed);
+    auto range_we_take = MarkRange{range.begin, range.begin + std::min(marks_needed, range.getNumberOfMarks())};
+    if (!result.ranges.empty() && result.ranges.back().end == range_we_take.begin)
+        /// Can extend the previous range
+        result.ranges.back().end = range_we_take.end;
+    else
+        result.ranges.emplace_back(range_we_take);
+    current_marks_amount += range_we_take.getNumberOfMarks();
+    return range_we_take.getNumberOfMarks();
+}
+}
+
+namespace ProfileEvents
+{
+extern const Event ParallelReplicasHandleRequestMicroseconds;
+extern const Event ParallelReplicasHandleAnnouncementMicroseconds;
+
+extern const Event ParallelReplicasStealingByHashMicroseconds;
+extern const Event ParallelReplicasProcessingPartsMicroseconds;
+extern const Event ParallelReplicasStealingLeftoversMicroseconds;
+extern const Event ParallelReplicasCollectingOwnedSegmentsMicroseconds;
+
+extern const Event ParallelReplicasReadAssignedMarks;
+extern const Event ParallelReplicasReadUnassignedMarks;
+extern const Event ParallelReplicasReadAssignedForStealingMarks;
+}
 
 namespace ProfileEvents
 {
@@ -58,7 +108,8 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
+extern const int BAD_ARGUMENTS;
+extern const int LOGICAL_ERROR;
 }
 
 class ParallelReplicasReadingCoordinator::ImplInterface
@@ -68,6 +119,15 @@ public:
     {
         size_t number_of_requests{0};
         size_t sum_marks{0};
+
+        /// Marks assigned to the given replica by consistent hash
+        size_t assigned_to_me = 0;
+        /// Marks stolen from other replicas
+        size_t stolen_unassigned = 0;
+
+        /// Stolen marks that were assigned for stealing to the given replica by hash. Makes sense only for DefaultCoordinator
+        size_t stolen_by_hash = 0;
+
         bool is_unavailable{false};
     };
     using Stats = std::vector<Stat>;
@@ -76,7 +136,15 @@ public:
         String result = "Statistics: ";
         std::vector<String> stats_by_replica;
         for (size_t i = 0; i < stats.size(); ++i)
-            stats_by_replica.push_back(fmt::format("replica {}{} - {{requests: {} marks: {}}}", i, stats[i].is_unavailable ? " is unavailable" : "", stats[i].number_of_requests, stats[i].sum_marks));
+            stats_by_replica.push_back(fmt::format(
+                "replica {}{} - {{requests: {} marks: {} assigned_to_me: {} stolen_by_hash: {} stolen_unassigned: {}}}",
+                i,
+                stats[i].is_unavailable ? " is unavailable" : "",
+                stats[i].number_of_requests,
+                stats[i].sum_marks,
+                stats[i].assigned_to_me,
+                stats[i].stolen_by_hash,
+                stats[i].stolen_unassigned));
         result += fmt::format("{}", fmt::join(stats_by_replica, "; "));
         return result;
     }
@@ -92,6 +160,7 @@ public:
     {}
 
     virtual ~ImplInterface() = default;
+
     virtual ParallelReadResponse handleRequest(ParallelReadRequest request) = 0;
     virtual void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) = 0;
     virtual void markReplicaAsUnavailable(size_t replica_number) = 0;
@@ -103,165 +172,227 @@ using Parts = std::set<Part>;
 using PartRefs = std::deque<Parts::iterator>;
 
 
+/// This coordinator relies heavily on the fact that we work with a single shard,
+/// i.e. the difference in parts contained in each replica's snapshot is rather negligible (it is only recently inserted or merged parts).
+/// So the guarantees we provide here are basically the same as with single-node reading: we will read from parts as their were seen by some node at the moment when query started.
+///
+/// Knowing that almost each part could be read by each node, we suppose ranges of each part to be available to all the replicas and thus distribute them evenly between them
+/// (of course we still check if replica has access to the given part before scheduling a reading from it).
+///
+/// Of course we want to distribute marks evenly. Looks like it is better to split parts into reasonably small segments of equal size
+/// (something between 16 and 128 granules i.e. ~100K and ~1M rows should work).
+/// This approach seems to work ok for all three main cases: full scan, reading random sub-ranges and reading only {pre,suf}-fix of parts.
+/// Also we could expect that more granular division will make distribution more even up to a certain point.
 class DefaultCoordinator : public ParallelReplicasReadingCoordinator::ImplInterface
 {
 public:
-    using ParallelReadRequestPtr = std::unique_ptr<ParallelReadRequest>;
-    using PartToMarkRanges = std::map<PartToRead::PartAndProjectionNames, HalfIntervals>;
-
-    explicit DefaultCoordinator(size_t replicas_count_)
+    explicit DefaultCoordinator(size_t replicas_count_, size_t mark_segment_size_)
         : ParallelReplicasReadingCoordinator::ImplInterface(replicas_count_)
-        , reading_state(replicas_count_)
+        , mark_segment_size(mark_segment_size_)
+        , replica_status(replicas_count_)
+        , distribution_by_hash_queue(replicas_count_)
     {
+        if (mark_segment_size == 0)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Zero value provided for `mark_segment_size`");
     }
 
     ~DefaultCoordinator() override;
 
-    struct PartitionReading
-    {
-        PartSegments part_ranges;
-        PartToMarkRanges mark_ranges_in_part;
-    };
+    ParallelReadResponse handleRequest(ParallelReadRequest request) override;
 
-    using PartitionToBlockRanges = std::map<String, PartitionReading>;
-    PartitionToBlockRanges partitions;
+    void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) override;
+
+    void markReplicaAsUnavailable(size_t replica_number) override;
+
+private:
+    /// This many granules will represent a single segment of marks that will be assigned to a replica
+    const size_t mark_segment_size{0};
 
     size_t sent_initial_requests{0};
+    bool state_initialized{false};
+    size_t finished_replicas{0};
 
-    Parts all_parts_to_read;
-    /// Contains only parts which we haven't started to read from
-    PartRefs delayed_parts;
-    /// Per-replica preferred parts split by consistent hash
-    /// Once all task will be done by some replica, it can steal tasks
-    std::vector<PartRefs> reading_state;
+    struct ReplicaStatus
+    {
+        bool is_finished{false};
+        bool is_announcement_received{false};
+    };
+    std::vector<ReplicaStatus> replica_status;
 
     Poco::Logger * log = &Poco::Logger::get("DefaultCoordinator");
 
-    std::atomic<bool> state_initialized{false};
+    /// Workflow of a segment:
+    /// 0. `all_parts_to_read` contains all the parts and thus all the segments initially present there (virtually)
+    /// 1. when we traverse `all_parts_to_read` in selectPartsAndRanges() we either:
+    ///     * take this segment into output
+    ///     * put this segment into `distribution_by_hash_queue` for its owner if it's available and can read from it
+    ///     * otherwise put this segment into `distribution_by_hash_queue` for its stealer_by_hash if it's available and can read from it
+    ///     * otherwise put this segment into `ranges_for_stealing_queue`
+    /// 2. when we traverse `distribution_by_hash_queue` in `selectPartsAndRanges` we either:
+    ///     * take this segment into output
+    ///     * otherwise put this segment into `distribution_by_hash_queue` for its stealer_by_hash if it's available and can read from it
+    ///     * otherwise put this segment into `ranges_for_stealing_queue`
+    /// 3. when we figuring out that some replica is unavailable we move all segments from its `distribution_by_hash_queue` to their stealers by hash or to `ranges_for_stealing_queue`
+    /// 4. when we get the announcement from a replica we move all segments it cannot read to their stealers by hash or to `ranges_for_stealing_queue`
+    ///
+    /// So, segments always move in one direction down this path (possibly skipping some stops):
+    /// `all_parts_to_read` -> `distribution_by_hash_queue[owner]` -> `distribution_by_hash_queue[stealer_by_hash]` -> `ranges_for_stealing_queue`
 
-    ParallelReadResponse handleRequest(ParallelReadRequest request) override;
-    void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement) override;
-    void markReplicaAsUnavailable(size_t replica_number) override;
+    /// We take the set of parts announced by this replica as the working set for the whole query.
+    /// For this replica we know for sure that
+    ///     1. it sees all the parts from this set
+    ///     2. it was available in the beginning of execution (since we got announcement), so if it will become unavailable at some point - query will be failed with exception.
+    ///        this means that we can delegate reading of all leftover segments (i.e. segments that were not read by their owner or stealer by hash) to this node
+    size_t source_replica_for_parts_snapshot{0};
 
-    void updateReadingState(InitialAllRangesAnnouncement announcement);
-    void finalizeReadingState();
+    /// Parts view from the first announcement we received
+    std::vector<Part> all_parts_to_read;
 
-    size_t computeConsistentHash(const MergeTreePartInfo & info) const
+    std::unordered_map<std::string, std::unordered_set<size_t>> part_visibility; /// part_name -> set of replicas announced that part
+
+    /// We order parts from biggest (= oldest) to newest and steal from newest. Because we assume
+    /// that they're gonna be merged soon anyway and for them we should already expect worse cache hit.
+    struct BiggerPartsFirst
     {
-        auto hash = SipHash();
-        hash.update(info.getPartNameV1());
-        return ConsistentHashing(hash.get64(), replicas_count);
-    }
+        bool operator()(const auto & lhs, const auto & rhs) const { return lhs.info.getBlocksCount() > rhs.info.getBlocksCount(); }
+    };
 
-    void selectPartsAndRanges(const PartRefs & container, size_t replica_num, size_t min_number_of_marks, size_t & current_mark_size, ParallelReadResponse & response) const;
+    /// We don't precalculate the whole assignment for each node at the start.
+    /// When replica asks coordinator for a new portion of data to read, it traverses `all_parts_to_read` to find ranges relevant to this replica (by consistent hash).
+    /// Many hashes are being calculated during this process and just to not loose this time we save the information about all these ranges
+    /// observed along the way to what node they belong to.
+    /// Ranges in this queue might belong to a part that the given replica cannot read from - the corresponding check happens later.
+    /// TODO: consider making it bounded in size
+    std::vector<std::multiset<RangesInDataPartDescription, BiggerPartsFirst>> distribution_by_hash_queue;
+
+    /// For some ranges their owner and stealer (by consistent hash) cannot read from the given part at all. So this range have to be stolen anyway.
+    /// TODO: consider making it bounded in size
+    RangesInDataPartsDescription ranges_for_stealing_queue;
+
+    /// We take only first replica's set of parts as the whole working set for this query.
+    /// For other replicas we'll just discard parts that they know, but that weren't present in the first request we received.
+    /// The second and all subsequent announcements needed only to understand if we can schedule reading from the given part to the given replica.
+    void initializeReadingState(InitialAllRangesAnnouncement announcement);
+
+    void setProgressCallback();
+
+    enum class ScanMode
+    {
+        /// Main working set for the replica
+        TakeWhatsMineByHash,
+        /// We need to steal to optimize tail latency, let's do it by hash nevertheless
+        TakeWhatsMineForStealing,
+        /// All bets are off, we need to steal "for correctness" - to not leave any segments unread
+        TakeEverythingAvailable
+    };
+
+    void selectPartsAndRanges(
+        size_t replica_num,
+        ScanMode scan_mode,
+        size_t min_number_of_marks,
+        size_t & current_marks_amount,
+        RangesInDataPartsDescription & description);
+
+    size_t computeConsistentHash(const std::string & part_name, size_t segment_begin, ScanMode scan_mode) const;
+
+    void tryToTakeFromDistributionQueue(
+        size_t replica_num, size_t min_number_of_marks, size_t & current_marks_amount, RangesInDataPartsDescription & description);
+
+    void tryToStealFromQueues(
+        size_t replica_num,
+        ScanMode scan_mode,
+        size_t min_number_of_marks,
+        size_t & current_marks_amount,
+        RangesInDataPartsDescription & description);
+
+    void tryToStealFromQueue(
+        auto & queue,
+        ssize_t owner, /// In case `queue` is `distribution_by_hash_queue[replica]`
+        size_t replica_num,
+        ScanMode scan_mode,
+        size_t min_number_of_marks,
+        size_t & current_marks_amount,
+        RangesInDataPartsDescription & description);
+
+    void processPartsFurther(
+        size_t replica_num,
+        ScanMode scan_mode,
+        size_t min_number_of_marks,
+        size_t & current_marks_amount,
+        RangesInDataPartsDescription & description);
+
+    bool possiblyCanReadPart(size_t replica, const MergeTreePartInfo & info) const;
+    void enqueueSegment(const MergeTreePartInfo & info, const MarkRange & segment, size_t owner);
+    void enqueueToStealerOrStealingQueue(const MergeTreePartInfo & info, const MarkRange & segment);
 };
 
+
 DefaultCoordinator::~DefaultCoordinator()
 {
-    LOG_DEBUG(log, "Coordination done: {}", toString(stats));
+    try
+    {
+        LOG_DEBUG(log, "Coordination done: {}", toString(stats));
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log);
+    }
 }
 
-void DefaultCoordinator::updateReadingState(InitialAllRangesAnnouncement announcement)
+void DefaultCoordinator::initializeReadingState(InitialAllRangesAnnouncement announcement)
 {
-    PartRefs parts_diff;
-
-    /// To get rid of duplicates
-    for (auto && part_ranges: announcement.description)
+    for (const auto & part : announcement.description)
     {
-        Part part{.description = std::move(part_ranges), .replicas = {announcement.replica_num}};
-        const MergeTreePartInfo & announced_part = part.description.info;
-
-        auto it = std::lower_bound(cbegin(all_parts_to_read), cend(all_parts_to_read), part);
-        if (it != all_parts_to_read.cend())
-        {
-            const MergeTreePartInfo & found_part = it->description.info;
-            if (found_part == announced_part)
-            {
-                /// We have the same part - add the info about presence on current replica
-                it->replicas.insert(announcement.replica_num);
-                continue;
-            }
-            else
-            {
-                /// check if it is covering or covered part
-                /// need to compare with 2 nearest parts in set, - lesser and greater than the part from the announcement
-                bool is_disjoint = found_part.isDisjoint(announced_part);
-                if (it != all_parts_to_read.cbegin() && is_disjoint)
-                {
-                    const MergeTreePartInfo & lesser_part = (--it)->description.info;
-                    is_disjoint &= lesser_part.isDisjoint(announced_part);
-                }
-                if (!is_disjoint)
-                    continue;
-            }
-        }
-        else if (!all_parts_to_read.empty())
-        {
-            /// the announced part is greatest - check if it's disjoint with lesser part
-            const MergeTreePartInfo & lesser_part = all_parts_to_read.crbegin()->description.info;
-            if (!lesser_part.isDisjoint(announced_part))
-                continue;
-        }
-
-        auto [insert_it, _] = all_parts_to_read.emplace(std::move(part));
-        parts_diff.push_back(insert_it);
+        /// We don't really care here if this part will be included into the working set or not
+        part_visibility[part.info.getPartNameV1()].insert(announcement.replica_num);
     }
 
-    /// Split all parts by consistent hash
-    while (!parts_diff.empty())
+    /// If state is already initialized - just register availabitily info and leave
+    if (state_initialized)
+        return;
+
+    for (auto && part : announcement.description)
     {
-        auto current_part_it = parts_diff.front();
-        parts_diff.pop_front();
-        auto consistent_hash = computeConsistentHash(current_part_it->description.info);
+        auto intersecting_it = std::find_if(
+            all_parts_to_read.begin(),
+            all_parts_to_read.end(),
+            [&part](const Part & other) { return !other.description.info.isDisjoint(part.info); });
 
-        /// Check whether the new part can easy go to replica queue
-        if (current_part_it->replicas.contains(consistent_hash))
-        {
-            reading_state[consistent_hash].emplace_back(current_part_it);
-            continue;
-        }
+        if (intersecting_it != all_parts_to_read.end())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Intersecting parts found in announcement");
 
-        /// Add to delayed parts
-        delayed_parts.emplace_back(current_part_it);
+        all_parts_to_read.push_back(Part{.description = std::move(part), .replicas = {announcement.replica_num}});
     }
+
+    std::ranges::sort(
+        all_parts_to_read, [](const Part & lhs, const Part & rhs) { return BiggerPartsFirst()(lhs.description, rhs.description); });
+    state_initialized = true;
+    source_replica_for_parts_snapshot = announcement.replica_num;
+
+    LOG_DEBUG(log, "Reading state is fully initialized: {}", fmt::join(all_parts_to_read, "; "));
 }
 
 void DefaultCoordinator::markReplicaAsUnavailable(size_t replica_number)
 {
-    if (stats[replica_number].is_unavailable == false)
+    LOG_DEBUG(log, "Replica number {} is unavailable", replica_number);
+
+    ++unavailable_replicas_count;
+    stats[replica_number].is_unavailable = true;
+
+    if (sent_initial_requests == replicas_count - unavailable_replicas_count)
+        setProgressCallback();
+
+    for (const auto & segment : distribution_by_hash_queue[replica_number])
     {
-        LOG_DEBUG(log, "Replica number {} is unavailable", replica_number);
-
-        stats[replica_number].is_unavailable = true;
-        ++unavailable_replicas_count;
-
-        if (sent_initial_requests == replicas_count - unavailable_replicas_count)
-            finalizeReadingState();
+        chassert(segment.ranges.size() == 1);
+        enqueueToStealerOrStealingQueue(segment.info, segment.ranges.front());
     }
+    distribution_by_hash_queue[replica_number].clear();
 }
 
-void DefaultCoordinator::finalizeReadingState()
+void DefaultCoordinator::setProgressCallback()
 {
-    /// Clear all the delayed queue
-    while (!delayed_parts.empty())
-    {
-        auto current_part_it = delayed_parts.front();
-        auto consistent_hash = computeConsistentHash(current_part_it->description.info);
-
-        if (current_part_it->replicas.contains(consistent_hash))
-        {
-            reading_state[consistent_hash].emplace_back(current_part_it);
-            delayed_parts.pop_front();
-            continue;
-        }
-
-        /// In this situation just assign to a random replica which has this part
-        auto replica = *(std::next(current_part_it->replicas.begin(), thread_local_rng() % current_part_it->replicas.size()));
-        reading_state[replica].emplace_back(current_part_it);
-        delayed_parts.pop_front();
-    }
-
-    // update progress with total rows
+    // Update progress with total rows
     if (progress_callback)
     {
         size_t total_rows_to_read = 0;
@@ -274,116 +405,378 @@ void DefaultCoordinator::finalizeReadingState()
 
         LOG_DEBUG(log, "Total rows to read: {}", total_rows_to_read);
     }
-
-    LOG_DEBUG(log, "Reading state is fully initialized: {}", fmt::join(all_parts_to_read, "; "));
 }
 
-
 void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement)
 {
     const auto replica_num = announcement.replica_num;
 
-    updateReadingState(std::move(announcement));
+    LOG_DEBUG(log, "Initial request from replica {}: {}", announcement.replica_num, announcement.describe());
+
+    initializeReadingState(std::move(announcement));
 
     if (replica_num >= stats.size())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", replica_num, stats.size());
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", replica_num, stats.size());
 
     ++stats[replica_num].number_of_requests;
+    replica_status[replica_num].is_announcement_received = true;
 
     ++sent_initial_requests;
     LOG_DEBUG(log, "Sent initial requests: {} Replicas count: {}", sent_initial_requests, replicas_count);
+
     if (sent_initial_requests == replicas_count)
-        finalizeReadingState();
-}
+        setProgressCallback();
 
-void DefaultCoordinator::selectPartsAndRanges(const PartRefs & container, size_t replica_num, size_t min_number_of_marks, size_t & current_mark_size, ParallelReadResponse & response) const
-{
-    for (const auto & part : container)
+    /// Sift the queue to move out all invisible segments
+    for (const auto & segment : distribution_by_hash_queue[replica_num])
     {
-        if (current_mark_size >= min_number_of_marks)
+        if (!part_visibility[segment.info.getPartNameV1()].contains(replica_num))
         {
-            LOG_TEST(log, "Current mark size {} is bigger than min_number_marks {}", current_mark_size, min_number_of_marks);
-            break;
-        }
-
-        if (part->description.ranges.empty())
-        {
-            LOG_TEST(log, "Part {} is already empty in reading state", part->description.info.getPartNameV1());
-            continue;
-        }
-
-        if (std::find(part->replicas.begin(), part->replicas.end(), replica_num) == part->replicas.end())
-        {
-            LOG_TEST(log, "Not found part {} on replica {}", part->description.info.getPartNameV1(), replica_num);
-            continue;
-        }
-
-        response.description.push_back({
-            .info = part->description.info,
-            .ranges = {},
-        });
-
-        while (!part->description.ranges.empty() && current_mark_size < min_number_of_marks)
-        {
-            auto & range = part->description.ranges.front();
-            const size_t needed = min_number_of_marks - current_mark_size;
-
-            if (range.getNumberOfMarks() > needed)
-            {
-                auto range_we_take = MarkRange{range.begin, range.begin + needed};
-                response.description.back().ranges.emplace_back(range_we_take);
-                current_mark_size += range_we_take.getNumberOfMarks();
-
-                range.begin += needed;
-                break;
-            }
-
-            response.description.back().ranges.emplace_back(range);
-            current_mark_size += range.getNumberOfMarks();
-            part->description.ranges.pop_front();
+            chassert(segment.ranges.size() == 1);
+            enqueueToStealerOrStealingQueue(segment.info, segment.ranges.front());
         }
     }
 }
 
+void DefaultCoordinator::tryToTakeFromDistributionQueue(
+    size_t replica_num, size_t min_number_of_marks, size_t & current_marks_amount, RangesInDataPartsDescription & description)
+{
+    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasCollectingOwnedSegmentsMicroseconds);
+
+    auto & distribution_queue = distribution_by_hash_queue[replica_num];
+    auto replica_can_read_part = [&](auto replica, const auto & part) { return part_visibility[part.getPartNameV1()].contains(replica); };
+
+    RangesInDataPartDescription result;
+
+    while (!distribution_queue.empty() && current_marks_amount < min_number_of_marks)
+    {
+        if (result.ranges.empty() || distribution_queue.begin()->info != result.info)
+        {
+            if (!result.ranges.empty())
+                /// We're switching to a different part, so have to save currently accumulated ranges
+                description.push_back(result);
+            result = {.info = distribution_queue.begin()->info};
+        }
+
+        /// NOTE: this works because ranges are not considered by the comparator
+        auto & part_ranges = const_cast<RangesInDataPartDescription &>(*distribution_queue.begin());
+        chassert(part_ranges.ranges.size() == 1);
+        auto & range = part_ranges.ranges.front();
+
+        if (replica_can_read_part(replica_num, part_ranges.info))
+        {
+            if (auto taken = takeFromRange(range, min_number_of_marks, current_marks_amount, result); taken == range.getNumberOfMarks())
+                distribution_queue.erase(distribution_queue.begin());
+            else
+            {
+                range.begin += taken;
+                break;
+            }
+        }
+        else
+        {
+            /// It might be that `replica_num` is the stealer by hash itself - no problem,
+            /// we'll just have a redundant hash computation inside this function
+            enqueueToStealerOrStealingQueue(part_ranges.info, range);
+            distribution_queue.erase(distribution_queue.begin());
+        }
+    }
+
+    if (!result.ranges.empty())
+        description.push_back(result);
+}
+
+void DefaultCoordinator::tryToStealFromQueues(
+    size_t replica_num,
+    ScanMode scan_mode,
+    size_t min_number_of_marks,
+    size_t & current_marks_amount,
+    RangesInDataPartsDescription & description)
+{
+    auto steal_from_other_replicas = [&]()
+    {
+        /// Try to steal from other replicas starting from replicas with longest queues
+        std::vector<size_t> order(replicas_count);
+        std::iota(order.begin(), order.end(), 0);
+        std::ranges::sort(
+            order, [&](auto lhs, auto rhs) { return distribution_by_hash_queue[lhs].size() > distribution_by_hash_queue[rhs].size(); });
+
+        for (auto replica : order)
+            tryToStealFromQueue(
+                distribution_by_hash_queue[replica],
+                replica,
+                replica_num,
+                scan_mode,
+                min_number_of_marks,
+                current_marks_amount,
+                description);
+    };
+
+    if (scan_mode == ScanMode::TakeWhatsMineForStealing)
+    {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasStealingByHashMicroseconds);
+        steal_from_other_replicas();
+    }
+    else
+    {
+        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasStealingLeftoversMicroseconds);
+        /// Check orphaned ranges
+        tryToStealFromQueue(
+            ranges_for_stealing_queue, /*owner=*/-1, replica_num, scan_mode, min_number_of_marks, current_marks_amount, description);
+        /// Last hope. In case we haven't yet figured out that some node is unavailable its segments are still in the distribution queue.
+        steal_from_other_replicas();
+    }
+}
+
+void DefaultCoordinator::tryToStealFromQueue(
+    auto & queue,
+    ssize_t owner,
+    size_t replica_num,
+    ScanMode scan_mode,
+    size_t min_number_of_marks,
+    size_t & current_marks_amount,
+    RangesInDataPartsDescription & description)
+{
+    auto replica_can_read_part = [&](auto replica, const auto & part) { return part_visibility[part.getPartNameV1()].contains(replica); };
+
+    RangesInDataPartDescription result;
+
+    auto it = queue.rbegin();
+    while (it != queue.rend() && current_marks_amount < min_number_of_marks)
+    {
+        auto & part_ranges = const_cast<RangesInDataPartDescription &>(*it);
+        chassert(part_ranges.ranges.size() == 1);
+        auto & range = part_ranges.ranges.front();
+
+        if (result.ranges.empty() || part_ranges.info != result.info)
+        {
+            if (!result.ranges.empty())
+                /// We're switching to a different part, so have to save currently accumulated ranges
+                description.push_back(result);
+            result = {.info = part_ranges.info};
+        }
+
+        if (replica_can_read_part(replica_num, part_ranges.info))
+        {
+            bool can_take = false;
+            if (scan_mode == ScanMode::TakeWhatsMineForStealing)
+            {
+                chassert(owner >= 0);
+                const size_t segment_begin = roundDownToMultiple(range.begin, mark_segment_size);
+                can_take = computeConsistentHash(part_ranges.info.getPartNameV1(), segment_begin, scan_mode) == replica_num;
+            }
+            else
+            {
+                /// Don't steal segments with alive owner that sees them
+                can_take = owner == -1 || stats[owner].is_unavailable || !replica_status[owner].is_announcement_received;
+            }
+            if (can_take)
+            {
+                if (auto taken = takeFromRange(range, min_number_of_marks, current_marks_amount, result); taken == range.getNumberOfMarks())
+                {
+                    it = decltype(it)(queue.erase(std::next(it).base()));
+                    continue;
+                }
+                else
+                    range.begin += taken;
+            }
+        }
+
+        ++it;
+    }
+
+    if (!result.ranges.empty())
+        description.push_back(result);
+}
+
+void DefaultCoordinator::processPartsFurther(
+    size_t replica_num,
+    ScanMode scan_mode,
+    size_t min_number_of_marks,
+    size_t & current_marks_amount,
+    RangesInDataPartsDescription & description)
+{
+    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasProcessingPartsMicroseconds);
+
+    for (const auto & part : all_parts_to_read)
+    {
+        if (current_marks_amount >= min_number_of_marks)
+        {
+            LOG_TEST(log, "Current mark size {} is bigger than min_number_marks {}", current_marks_amount, min_number_of_marks);
+            return;
+        }
+
+        RangesInDataPartDescription result{.info = part.description.info};
+
+        while (!part.description.ranges.empty() && current_marks_amount < min_number_of_marks)
+        {
+            auto & range = part.description.ranges.front();
+
+            /// Parts are divided into segments of `mark_segment_size` granules staring from 0-th granule
+            for (size_t segment_begin = roundDownToMultiple(range.begin, mark_segment_size);
+                 segment_begin < range.end && current_marks_amount < min_number_of_marks;
+                 segment_begin += mark_segment_size)
+            {
+                const auto cur_segment
+                    = MarkRange{std::max(range.begin, segment_begin), std::min(range.end, segment_begin + mark_segment_size)};
+
+                const auto owner = computeConsistentHash(part.description.info.getPartNameV1(), segment_begin, scan_mode);
+                if (owner == replica_num)
+                {
+                    const auto taken = takeFromRange(cur_segment, min_number_of_marks, current_marks_amount, result);
+                    if (taken == range.getNumberOfMarks())
+                        part.description.ranges.pop_front();
+                    else
+                    {
+                        range.begin += taken;
+                        break;
+                    }
+                }
+                else
+                {
+                    chassert(scan_mode == ScanMode::TakeWhatsMineByHash);
+                    enqueueSegment(part.description.info, cur_segment, owner);
+                    range.begin += cur_segment.getNumberOfMarks();
+                    if (range.getNumberOfMarks() == 0)
+                        part.description.ranges.pop_front();
+                }
+            }
+        }
+
+        if (!result.ranges.empty())
+            description.push_back(std::move(result));
+    }
+}
+
+void DefaultCoordinator::selectPartsAndRanges(
+    size_t replica_num,
+    ScanMode scan_mode,
+    size_t min_number_of_marks,
+    size_t & current_marks_amount,
+    RangesInDataPartsDescription & description)
+{
+    if (scan_mode == ScanMode::TakeWhatsMineByHash)
+    {
+        tryToTakeFromDistributionQueue(replica_num, min_number_of_marks, current_marks_amount, description);
+        processPartsFurther(replica_num, scan_mode, min_number_of_marks, current_marks_amount, description);
+        /// We might back-fill `distribution_by_hash_queue` for this replica in `enqueueToStealerOrStealingQueue`
+        tryToTakeFromDistributionQueue(replica_num, min_number_of_marks, current_marks_amount, description);
+    }
+    else
+        tryToStealFromQueues(replica_num, scan_mode, min_number_of_marks, current_marks_amount, description);
+}
+
+bool DefaultCoordinator::possiblyCanReadPart(size_t replica, const MergeTreePartInfo & info) const
+{
+    /// At this point we might not be sure if `owner` can read from the given part.
+    /// Then we will check it while processing `owner`'s data requests - they are guaranteed to came after the announcement.
+    return !stats[replica].is_unavailable && !replica_status[replica].is_finished
+        && (!replica_status[replica].is_announcement_received || part_visibility.at(info.getPartNameV1()).contains(replica));
+}
+
+void DefaultCoordinator::enqueueSegment(const MergeTreePartInfo & info, const MarkRange & segment, size_t owner)
+{
+    if (possiblyCanReadPart(owner, info))
+    {
+        /// TODO: optimize me (maybe we can store something lighter than RangesInDataPartDescription)
+        distribution_by_hash_queue[owner].insert(RangesInDataPartDescription{.info = info, .ranges = {segment}});
+        LOG_TEST(log, "Segment {} is added to its owner's ({}) queue", segment, owner);
+    }
+    else
+        enqueueToStealerOrStealingQueue(info, segment);
+}
+
+void DefaultCoordinator::enqueueToStealerOrStealingQueue(const MergeTreePartInfo & info, const MarkRange & segment)
+{
+    auto && range = RangesInDataPartDescription{.info = info, .ranges = {segment}};
+    const auto stealer_by_hash = computeConsistentHash(
+        info.getPartNameV1(), roundDownToMultiple(segment.begin, mark_segment_size), ScanMode::TakeWhatsMineForStealing);
+    if (possiblyCanReadPart(stealer_by_hash, info))
+    {
+        distribution_by_hash_queue[stealer_by_hash].insert(std::move(range));
+        LOG_TEST(log, "Segment {} is added to its stealer's ({}) queue", segment, stealer_by_hash);
+    }
+    else
+    {
+        ranges_for_stealing_queue.push_back(std::move(range));
+        LOG_TEST(log, "Segment {} is added to stealing queue", segment);
+    }
+}
+
+size_t DefaultCoordinator::computeConsistentHash(const std::string & part_name, size_t segment_begin, ScanMode scan_mode) const
+{
+    chassert(segment_begin % mark_segment_size == 0);
+    auto hash = SipHash();
+    hash.update(part_name);
+    hash.update(segment_begin);
+    hash.update(scan_mode);
+    return ConsistentHashing(hash.get64(), replicas_count);
+}
+
 ParallelReadResponse DefaultCoordinator::handleRequest(ParallelReadRequest request)
 {
     LOG_TRACE(log, "Handling request from replica {}, minimal marks size is {}", request.replica_num, request.min_number_of_marks);
 
-    size_t current_mark_size = 0;
     ParallelReadResponse response;
 
-    /// 1. Try to select from preferred set of parts for current replica
-    selectPartsAndRanges(reading_state[request.replica_num], request.replica_num, request.min_number_of_marks, current_mark_size, response);
+    size_t current_mark_size = 0;
 
-    /// 2. Try to use parts from delayed queue
-    while (!delayed_parts.empty() && current_mark_size < request.min_number_of_marks)
-    {
-        auto part = delayed_parts.front();
-        delayed_parts.pop_front();
-        reading_state[request.replica_num].emplace_back(part);
-        selectPartsAndRanges(reading_state[request.replica_num], request.replica_num, request.min_number_of_marks, current_mark_size, response);
-    }
+    /// 1. Try to select ranges meant for this replica by consistent hash
+    selectPartsAndRanges(
+        request.replica_num, ScanMode::TakeWhatsMineByHash, request.min_number_of_marks, current_mark_size, response.description);
+    const size_t assigned_to_me = current_mark_size;
 
-    /// 3. Try to steal tasks;
-    if (current_mark_size < request.min_number_of_marks)
-    {
-        for (size_t i = 0; i < replicas_count; ++i)
-        {
-            if (i != request.replica_num)
-                selectPartsAndRanges(reading_state[i], request.replica_num, request.min_number_of_marks, current_mark_size, response);
+    /// 2. Try to steal but with caching again (with different key)
+    selectPartsAndRanges(
+        request.replica_num, ScanMode::TakeWhatsMineForStealing, request.min_number_of_marks, current_mark_size, response.description);
+    const size_t stolen_by_hash = current_mark_size - assigned_to_me;
 
-            if (current_mark_size >= request.min_number_of_marks)
-                break;
-        }
-    }
+    /// 3. Try to steal with no preference. We're trying to postpone it as much as possible.
+    if (current_mark_size == 0 && request.replica_num == source_replica_for_parts_snapshot)
+        selectPartsAndRanges(
+            request.replica_num, ScanMode::TakeEverythingAvailable, request.min_number_of_marks, current_mark_size, response.description);
+    const size_t stolen_unassigned = current_mark_size - stolen_by_hash - assigned_to_me;
 
     stats[request.replica_num].number_of_requests += 1;
     stats[request.replica_num].sum_marks += current_mark_size;
 
+    stats[request.replica_num].assigned_to_me += assigned_to_me;
+    stats[request.replica_num].stolen_by_hash += stolen_by_hash;
+    stats[request.replica_num].stolen_unassigned += stolen_unassigned;
+
+    ProfileEvents::increment(ProfileEvents::ParallelReplicasReadAssignedMarks, assigned_to_me);
+    ProfileEvents::increment(ProfileEvents::ParallelReplicasReadUnassignedMarks, stolen_unassigned);
+    ProfileEvents::increment(ProfileEvents::ParallelReplicasReadAssignedForStealingMarks, stolen_by_hash);
+
     if (response.description.empty())
+    {
         response.finish = true;
 
-    LOG_TRACE(log, "Going to respond to replica {} with {}", request.replica_num, response.describe());
+        replica_status[request.replica_num].is_finished = true;
+
+        if (++finished_replicas == replicas_count - unavailable_replicas_count)
+        {
+            /// Nobody will come to process any more data
+
+            if (!ranges_for_stealing_queue.empty())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Some orphaned segments were left unread");
+
+            for (size_t replica = 0; replica < replicas_count; ++replica)
+                if (!distribution_by_hash_queue[replica].empty())
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-empty distribution_by_hash_queue for replica {}", replica);
+        }
+    }
+
+    LOG_DEBUG(
+        log,
+        "Going to respond to replica {} with {}; mine_marks={}, stolen_by_hash={}, stolen_rest={}",
+        request.replica_num,
+        response.describe(),
+        assigned_to_me,
+        stolen_by_hash,
+        stolen_unassigned);
+
     return response;
 }
 
@@ -456,6 +849,8 @@ void InOrderCoordinator<mode>::handleInitialAllRangesAnnouncement(InitialAllRang
         std::sort(ranges.begin(), ranges.end());
     }
 
+    ++stats[announcement.replica_num].number_of_requests;
+
     if (new_rows_to_read > 0)
     {
         Progress progress;
@@ -557,6 +952,8 @@ ParallelReadResponse InOrderCoordinator<mode>::handleRequest(ParallelReadRequest
 
 void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement)
 {
+    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasHandleAnnouncementMicroseconds);
+
     std::lock_guard lock(mutex);
 
     if (!pimpl)
@@ -570,6 +967,8 @@ void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(Init
 
 ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelReadRequest request)
 {
+    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ParallelReplicasHandleRequestMicroseconds);
+
     std::lock_guard lock(mutex);
 
     if (!pimpl)
@@ -604,7 +1003,7 @@ void ParallelReplicasReadingCoordinator::initialize()
     switch (mode)
     {
         case CoordinationMode::Default:
-            pimpl = std::make_unique<DefaultCoordinator>(replicas_count);
+            pimpl = std::make_unique<DefaultCoordinator>(replicas_count, mark_segment_size);
             break;
         case CoordinationMode::WithOrder:
             pimpl = std::make_unique<InOrderCoordinator<CoordinationMode::WithOrder>>(replicas_count);
@@ -621,7 +1020,10 @@ void ParallelReplicasReadingCoordinator::initialize()
         pimpl->markReplicaAsUnavailable(replica);
 }
 
-ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t replicas_count_) : replicas_count(replicas_count_) {}
+ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t replicas_count_, size_t mark_segment_size_)
+    : replicas_count(replicas_count_), mark_segment_size(mark_segment_size_)
+{
+}
 
 ParallelReplicasReadingCoordinator::~ParallelReplicasReadingCoordinator() = default;
 
diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
index acc265c124f..9cba7d8e8c2 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h
@@ -15,7 +15,7 @@ class ParallelReplicasReadingCoordinator
 public:
     class ImplInterface;
 
-    explicit ParallelReplicasReadingCoordinator(size_t replicas_count_);
+    explicit ParallelReplicasReadingCoordinator(size_t replicas_count_, size_t mark_segment_size_ = 0);
     ~ParallelReplicasReadingCoordinator();
 
     void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement);
@@ -35,8 +35,8 @@ private:
 
     std::mutex mutex;
     size_t replicas_count{0};
+    size_t mark_segment_size{0};
     CoordinationMode mode{CoordinationMode::Default};
-    std::atomic<bool> initialized{false};
     std::unique_ptr<ImplInterface> pimpl;
     ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation
     std::set<size_t> replicas_used;
diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/__init__.py b/tests/integration/test_parallel_replicas_all_marks_read/__init__.py
similarity index 100%
rename from tests/integration/test_parallel_replicas_distributed_read_from_all/__init__.py
rename to tests/integration/test_parallel_replicas_all_marks_read/__init__.py
diff --git a/tests/integration/test_parallel_replicas_all_marks_read/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_all_marks_read/configs/remote_servers.xml
new file mode 100644
index 00000000000..1ad562334f5
--- /dev/null
+++ b/tests/integration/test_parallel_replicas_all_marks_read/configs/remote_servers.xml
@@ -0,0 +1,32 @@
+<clickhouse>
+    <remote_servers>
+        <parallel_replicas_with_unavailable_nodes>
+            <shard>
+                <replica>
+                    <host>node0</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node3</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node4</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node5</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </parallel_replicas_with_unavailable_nodes>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_parallel_replicas_all_marks_read/test.py b/tests/integration/test_parallel_replicas_all_marks_read/test.py
new file mode 100644
index 00000000000..7776ccb0c09
--- /dev/null
+++ b/tests/integration/test_parallel_replicas_all_marks_read/test.py
@@ -0,0 +1,156 @@
+import json
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+from random import randint
+
+cluster = ClickHouseCluster(__file__)
+cluster_name = "parallel_replicas_with_unavailable_nodes"
+
+nodes = [
+    cluster.add_instance(
+        f"node{num}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
+    )
+    for num in range(3)
+]
+
+
+@pytest.fixture(scope="module", autouse=True)
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def _create_tables(table_name, table_size, index_granularity):
+    for num in range(len(nodes)):
+        nodes[num].query(f"DROP TABLE IF EXISTS {table_name}")
+
+        nodes[num].query(
+            f"""
+            CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String)
+            Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', '{num}')
+            ORDER BY (key)
+            SETTINGS index_granularity = {index_granularity}
+            """
+        )
+
+    nodes[0].query(
+        f"""
+        INSERT INTO {table_name}
+        SELECT number, toString(number) FROM numbers_mt({table_size})
+        """
+    )
+
+
+def _create_query(query_tmpl, table_name):
+    rand_set = [randint(0, 500) for i in range(42)]
+    return query_tmpl.format(table_name=table_name, rand_set=rand_set)
+
+
+def _get_result_without_parallel_replicas(query):
+    return nodes[0].query(
+        query,
+        settings={
+            "allow_experimental_parallel_reading_from_replicas": 0,
+        },
+    )
+
+
+def _get_result_with_parallel_replicas(
+    query, query_id, cluster_name, parallel_replicas_mark_segment_size
+):
+    return nodes[0].query(
+        query,
+        settings={
+            "allow_experimental_parallel_reading_from_replicas": 2,
+            "max_parallel_replicas": 6,
+            "cluster_for_parallel_replicas": f"{cluster_name}",
+            "parallel_replicas_mark_segment_size": parallel_replicas_mark_segment_size,
+            "query_id": query_id,
+        },
+    )
+
+
+def _get_expected_amount_of_marks_to_read(query):
+    return json.loads(
+        nodes[0].query(
+            f"""
+            EXPLAIN ESTIMATE
+            {query}
+            FORMAT JSONEachRow
+            """
+        )
+    )["marks"]
+
+
+def _get_number_of_marks_read_by_replicas(query_id):
+    nodes[0].query("SYSTEM FLUSH LOGS")
+    return (
+        nodes[0]
+        .query(
+            f"""
+                SELECT sum(
+                    ProfileEvents['ParallelReplicasReadAssignedMarks']
+                    + ProfileEvents['ParallelReplicasReadUnassignedMarks']
+                    + ProfileEvents['ParallelReplicasReadAssignedForStealingMarks']
+                )
+                FROM system.query_log
+                WHERE query_id = '{query_id}'
+                """
+        )
+        .strip()
+    )
+
+
+@pytest.mark.parametrize(
+    "query_tmpl",
+    [
+        "SELECT sum(cityHash64(*)) FROM {table_name}",
+        "SELECT sum(cityHash64(*)) FROM {table_name} WHERE intDiv(key, 100) IN {rand_set}",
+    ],
+)
+@pytest.mark.parametrize(
+    "table_size",
+    [1000, 10000, 100000],
+)
+@pytest.mark.parametrize(
+    "index_granularity",
+    [10, 100],
+)
+@pytest.mark.parametrize(
+    "parallel_replicas_mark_segment_size",
+    [1, 10],
+)
+def test_number_of_marks_read(
+    start_cluster,
+    query_tmpl,
+    table_size,
+    index_granularity,
+    parallel_replicas_mark_segment_size,
+):
+    if nodes[0].is_built_with_sanitizer():
+        pytest.skip("Disabled for sanitizers (too slow)")
+
+    table_name = f"tbl_{len(query_tmpl)}_{cluster_name}_{table_size}_{index_granularity}_{parallel_replicas_mark_segment_size}"
+    _create_tables(table_name, table_size, index_granularity)
+
+    if "where" in query_tmpl.lower():
+        # We need all the replicas to see the same state of parts to make sure that index analysis will pick the same amount of marks for reading
+        # regardless of which replica's state will be chosen as the working set. This should became redundant once we start to always use initiator's snapshot.
+        nodes[0].query(f"OPTIMIZE TABLE {table_name} FINAL", settings={"alter_sync": 2})
+        for node in nodes:
+            node.query(f"SYSTEM SYNC REPLICA {table_name} STRICT")
+
+    query = _create_query(query_tmpl, table_name)
+    query_id = f"{table_name}_{randint(0, 1e9)}"
+
+    assert _get_result_with_parallel_replicas(
+        query, query_id, cluster_name, parallel_replicas_mark_segment_size
+    ) == _get_result_without_parallel_replicas(query)
+
+    assert _get_number_of_marks_read_by_replicas(
+        query_id
+    ) == _get_expected_amount_of_marks_to_read(query)
diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_distributed_read_from_all/configs/remote_servers.xml
deleted file mode 100644
index 02a315479f8..00000000000
--- a/tests/integration/test_parallel_replicas_distributed_read_from_all/configs/remote_servers.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <test_single_shard_multiple_replicas>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>n1</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>n2</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>n3</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_single_shard_multiple_replicas>
-    </remote_servers>
-</clickhouse>
-
diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
deleted file mode 100644
index 8af7bb12595..00000000000
--- a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
+++ /dev/null
@@ -1,156 +0,0 @@
-import pytest
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-
-nodes = [
-    cluster.add_instance(
-        f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
-    )
-    for i in (1, 2, 3)
-]
-
-
-@pytest.fixture(scope="module", autouse=True)
-def start_cluster():
-    try:
-        cluster.start()
-        yield cluster
-    finally:
-        cluster.shutdown()
-
-
-def create_tables(cluster, table_name):
-    """create replicated tables in special way
-    - each table is populated by equal number of rows
-    - fetches are disabled, so each replica will have different set of rows
-      which enforce parallel replicas read from each replica
-    """
-
-    # create replicated tables
-    for node in nodes:
-        node.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
-
-    nodes[0].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1')
-            ORDER BY (key)"""
-    )
-    nodes[1].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2')
-            ORDER BY (key)"""
-    )
-    nodes[2].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3')
-            ORDER BY (key)"""
-    )
-    # stop merges
-    nodes[0].query(f"system stop merges {table_name}")
-    nodes[1].query(f"system stop merges {table_name}")
-    nodes[2].query(f"system stop merges {table_name}")
-    # stop fetches
-    nodes[0].query(f"system stop fetches {table_name}")
-    nodes[1].query(f"system stop fetches {table_name}")
-    nodes[2].query(f"system stop fetches {table_name}")
-
-    # create distributed table
-    nodes[0].query(f"DROP TABLE IF EXISTS {table_name}_d SYNC")
-    nodes[0].query(
-        f"""
-            CREATE TABLE {table_name}_d AS {table_name}
-            Engine=Distributed(
-                {cluster},
-                currentDatabase(),
-                {table_name},
-                rand()
-            )
-            """
-    )
-
-    # populate data, equal number of rows for each replica
-    nodes[0].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-    nodes[0].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(10, 10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-    nodes[1].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(20, 10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-    nodes[1].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(30, 10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-    nodes[2].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(40, 10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-    nodes[2].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(50, 10)",
-        settings={"distributed_foreground_insert": 1},
-    )
-
-    return "60\t0\t59\t1770\n"
-
-
-@pytest.mark.parametrize(
-    "prefer_localhost_replica",
-    [
-        pytest.param(0),
-        pytest.param(1),
-    ],
-)
-def test_read_equally_from_each_replica(start_cluster, prefer_localhost_replica):
-    """create and populate table in special way (see create_table()),
-    so parallel replicas will read equal number of rows from each replica
-    """
-
-    cluster = "test_single_shard_multiple_replicas"
-    table_name = "test_table"
-    expected_result = create_tables(cluster, table_name)
-
-    # parallel replicas
-    assert (
-        nodes[0].query(
-            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d",
-            settings={
-                "allow_experimental_parallel_reading_from_replicas": 2,
-                "prefer_localhost_replica": prefer_localhost_replica,
-                "max_parallel_replicas": 3,
-            },
-        )
-        == expected_result
-    )
-
-    # check logs for coordinator statistic
-    for n in nodes:
-        n.query("SYSTEM FLUSH LOGS")
-
-    # each replica has 2 distinct parts (non-intersecting with another replicas),
-    # each part less then index granularity, therefore 2 marks for each replica to handle
-    coordinator_statistic = "replica 0 - {requests: 3 marks: 2}; replica 1 - {requests: 3 marks: 2}; replica 2 - {requests: 3 marks: 2}"
-    assert (
-        nodes[0].contains_in_log(coordinator_statistic)
-        or nodes[1].contains_in_log(coordinator_statistic)
-        or nodes[2].contains_in_log(coordinator_statistic)
-    )
-
-    # w/o parallel replicas
-    # start fetches back, otherwise the result will be not as expected
-    nodes[0].query(f"system start fetches {table_name}")
-    nodes[1].query(f"system start fetches {table_name}")
-    nodes[2].query(f"system start fetches {table_name}")
-    # ensure that replica in sync before querying it to get stable result
-    nodes[0].query(f"system start merges {table_name}")
-    nodes[0].query(f"system sync  replica {table_name}")
-    assert (
-        nodes[0].query(
-            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d",
-            settings={
-                "allow_experimental_parallel_reading_from_replicas": 0,
-            },
-        )
-        == expected_result
-    )
diff --git a/tests/integration/test_parallel_replicas_working_set/__init__.py b/tests/integration/test_parallel_replicas_working_set/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml b/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml
deleted file mode 100644
index 02a315479f8..00000000000
--- a/tests/integration/test_parallel_replicas_working_set/configs/remote_servers.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<clickhouse>
-    <remote_servers>
-        <test_single_shard_multiple_replicas>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>n1</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>n2</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>n3</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_single_shard_multiple_replicas>
-    </remote_servers>
-</clickhouse>
-
diff --git a/tests/integration/test_parallel_replicas_working_set/test.py b/tests/integration/test_parallel_replicas_working_set/test.py
deleted file mode 100644
index 0ede9d9b1a5..00000000000
--- a/tests/integration/test_parallel_replicas_working_set/test.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import pytest
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-
-nodes = [
-    cluster.add_instance(
-        f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
-    )
-    for i in (1, 2, 3)
-]
-
-
-@pytest.fixture(scope="module", autouse=True)
-def start_cluster():
-    try:
-        cluster.start()
-        yield cluster
-    finally:
-        cluster.shutdown()
-
-
-def create_tables(cluster, table_name, node_with_covering_part):
-    # create replicated tables
-    for node in nodes:
-        node.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
-
-    nodes[0].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r1')
-            ORDER BY (key)"""
-    )
-    nodes[1].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r2')
-            ORDER BY (key)"""
-    )
-    nodes[2].query(
-        f"""CREATE TABLE IF NOT EXISTS {table_name} (key Int64, value String) Engine=ReplicatedMergeTree('/test_parallel_replicas/shard1/{table_name}', 'r3')
-            ORDER BY (key)"""
-    )
-    # stop merges to keep original parts
-    # stop fetches to keep only parts created on the nodes
-    for i in (0, 1, 2):
-        if i != node_with_covering_part:
-            nodes[i].query(f"system stop fetches {table_name}")
-            nodes[i].query(f"system stop merges {table_name}")
-
-    # populate data, equal number of rows for each replica
-    nodes[0].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(10)",
-    )
-    nodes[0].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(10, 10)"
-    )
-    nodes[1].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(20, 10)"
-    )
-    nodes[1].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(30, 10)"
-    )
-    nodes[2].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(40, 10)"
-    )
-    nodes[2].query(
-        f"INSERT INTO {table_name} SELECT number, number FROM numbers(50, 10)"
-    )
-    nodes[node_with_covering_part].query(f"system sync replica {table_name}")
-    nodes[node_with_covering_part].query(f"optimize table {table_name}")
-
-    # check we have expected set of parts
-    expected_active_parts = ""
-    if node_with_covering_part == 0:
-        expected_active_parts = (
-            "all_0_5_1\nall_2_2_0\nall_3_3_0\nall_4_4_0\nall_5_5_0\n"
-        )
-
-    if node_with_covering_part == 1:
-        expected_active_parts = (
-            "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_4_4_0\nall_5_5_0\n"
-        )
-
-    if node_with_covering_part == 2:
-        expected_active_parts = (
-            "all_0_0_0\nall_0_5_1\nall_1_1_0\nall_2_2_0\nall_3_3_0\n"
-        )
-
-    assert (
-        nodes[0].query(
-            f"select distinct name from clusterAllReplicas({cluster}, system.parts) where table='{table_name}' and active order by name"
-        )
-        == expected_active_parts
-    )
-
-
-@pytest.mark.parametrize("node_with_covering_part", [0, 1, 2])
-def test_covering_part_in_announcement(start_cluster, node_with_covering_part):
-    """create and populate table in special way (see create_table()),
-    node_with_covering_part contains all parts merged into one,
-    other nodes contain only parts which are result of insert via the node
-    """
-
-    cluster = "test_single_shard_multiple_replicas"
-    table_name = "test_table"
-    create_tables(cluster, table_name, node_with_covering_part)
-
-    # query result can be one of the following outcomes
-    # (1) query result if parallel replicas working set contains all_0_5_1
-    expected_full_result = "60\t0\t59\t1770\n"
-    expected_results = {expected_full_result}
-
-    # (2) query result if parallel replicas working set DOESN'T contain all_0_5_1
-    if node_with_covering_part == 0:
-        expected_results.add("40\t20\t59\t1580\n")
-    if node_with_covering_part == 1:
-        expected_results.add("40\t0\t59\t1180\n")
-    if node_with_covering_part == 2:
-        expected_results.add("40\t0\t39\t780\n")
-
-    # parallel replicas
-    result = nodes[0].query(
-        f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}",
-        settings={
-            "allow_experimental_parallel_reading_from_replicas": 2,
-            "prefer_localhost_replica": 0,
-            "max_parallel_replicas": 3,
-            "use_hedged_requests": 0,
-            "cluster_for_parallel_replicas": cluster,
-        },
-    )
-    assert result in expected_results
-
-    # w/o parallel replicas
-    assert (
-        nodes[node_with_covering_part].query(
-            f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}",
-            settings={
-                "allow_experimental_parallel_reading_from_replicas": 0,
-            },
-        )
-        == expected_full_result
-    )

From 3c7ae2f171bb8bf56d04677448a6ab0384f865a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 11:20:07 +0000
Subject: [PATCH 075/105] Reduce bounding_ratio.xml

---
 tests/performance/bounding_ratio.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/performance/bounding_ratio.xml b/tests/performance/bounding_ratio.xml
index e3a15f90013..ed0b25848df 100644
--- a/tests/performance/bounding_ratio.xml
+++ b/tests/performance/bounding_ratio.xml
@@ -1,4 +1,4 @@
 <test>
-    <query>SELECT boundingRatio(number, number) FROM numbers(100000000)</query>
-    <query>SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM numbers(100000000)</query>
+    <query>SELECT boundingRatio(number, number) FROM numbers(30000000)</query>
+    <query>SELECT (argMax(number, number) - argMin(number, number)) / (max(number) - min(number)) FROM numbers(30000000)</query>
 </test>

From 39eaa8dc9cd599b337a091dafa8cd3bb020e1b47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 11:24:36 +0000
Subject: [PATCH 076/105] Halve the size of reinterpret_as.xml

---
 tests/performance/reinterpret_as.xml | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml
index dbf6df160ed..d05ef3bb038 100644
--- a/tests/performance/reinterpret_as.xml
+++ b/tests/performance/reinterpret_as.xml
@@ -19,7 +19,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -38,7 +38,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -57,7 +57,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -76,7 +76,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -95,7 +95,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(10000000)
+        FROM numbers_mt(5000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -115,7 +115,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -134,7 +134,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -153,7 +153,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -172,7 +172,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(200000000)
+        FROM numbers_mt(100000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -191,7 +191,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(100000000)
+        FROM numbers_mt(50000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -210,7 +210,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(10000000)
+        FROM numbers_mt(5000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -230,7 +230,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(20000000)
+        FROM numbers_mt(10000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>
@@ -249,7 +249,7 @@
             toInt256(number) as d,
             toString(number) as f,
             toFixedString(f, 20) as g
-        FROM numbers_mt(100000000)
+        FROM numbers_mt(50000000)
         SETTINGS max_threads = 8
         FORMAT Null
     </query>

From 2aa6690f2c63c4630c04b6cae54e0fdbb8b12082 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 11:29:17 +0000
Subject: [PATCH 077/105] Reduce hashed_dictionary.xml

---
 tests/performance/hashed_dictionary.xml | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/tests/performance/hashed_dictionary.xml b/tests/performance/hashed_dictionary.xml
index e9038e694c6..b9de02a70e0 100644
--- a/tests/performance/hashed_dictionary.xml
+++ b/tests/performance/hashed_dictionary.xml
@@ -82,7 +82,6 @@
             <name>elements_count</name>
             <values>
                 <value>5000000</value>
-                <value>7500000</value>
             </values>
         </substitution>
     </substitutions>
@@ -90,16 +89,14 @@
     <query>
         WITH rand64() % toUInt64({elements_count}) as key
         SELECT dictGet('default.simple_key_hashed_dictionary', {column_name}, key)
-        FROM system.numbers
-        LIMIT {elements_count}
+        FROM numbers_mt({elements_count})
         FORMAT Null;
     </query>
 
     <query>
         WITH rand64() % toUInt64({elements_count}) as key
         SELECT dictHas('default.simple_key_hashed_dictionary', key)
-        FROM system.numbers
-        LIMIT {elements_count}
+        FROM numbers_mt({elements_count})
         FORMAT Null;
     </query>
 
@@ -111,16 +108,14 @@
     <query>
         WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key
         SELECT dictGet('default.complex_key_hashed_dictionary', {column_name}, key)
-        FROM system.numbers
-        LIMIT {elements_count}
+        FROM numbers_mt({elements_count})
         FORMAT Null;
     </query>
 
     <query>
         WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key
         SELECT dictHas('default.complex_key_hashed_dictionary', key)
-        FROM system.numbers
-        LIMIT {elements_count}
+        FROM numbers_mt({elements_count})
         FORMAT Null;
     </query>
 

From 1d1edd5b57b6f6cf188c6c616d09b374a9144268 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 11:31:20 +0000
Subject: [PATCH 078/105] Reduce sum_map.xml

---
 tests/performance/sum_map.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/sum_map.xml b/tests/performance/sum_map.xml
index f55af077023..ffb9b9507ae 100644
--- a/tests/performance/sum_map.xml
+++ b/tests/performance/sum_map.xml
@@ -7,7 +7,7 @@
         <substitution>
            <name>scale</name>
            <values>
-               <value>1000000</value>
+               <value>100000</value>
            </values>
        </substitution>
         <substitution>

From 641caba5b0d1caf6a4146c769ee3af6b55bd8899 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 11:36:33 +0000
Subject: [PATCH 079/105] Adapt more tests

---
 tests/performance/group_by_fixed_keys.xml | 2 +-
 tests/performance/join_used_flags.xml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/performance/group_by_fixed_keys.xml b/tests/performance/group_by_fixed_keys.xml
index a64208eb3de..d74b65ad47a 100644
--- a/tests/performance/group_by_fixed_keys.xml
+++ b/tests/performance/group_by_fixed_keys.xml
@@ -11,7 +11,7 @@
 
     <create_query>create table group_by_fk(a UInt32, b UInt32, c LowCardinality(UInt32), d Nullable(UInt32), e UInt64, f UInt64, g UInt64, h LowCardinality(UInt64), i Nullable(UInt64)) engine=MergeTree order by tuple()</create_query>
 
-    <fill_query>insert into group_by_fk select number, number, number % 10000, number % 2 == 0 ? number : Null, number, number, number, number % 10000, number % 2 == 0 ? number : Null from numbers_mt(3e7)</fill_query>
+    <fill_query>insert into group_by_fk select number, number, number % 10000, number % 2 == 0 ? number : Null, number, number, number, number % 10000, number % 2 == 0 ? number : Null from numbers_mt(1e7) settings max_insert_threads=8</fill_query>
 
     <!-- keys64_two_level -->
     <query>select a, b from group_by_fk group by a, b format Null</query>
diff --git a/tests/performance/join_used_flags.xml b/tests/performance/join_used_flags.xml
index 70b0b45391d..1bb994f7be2 100644
--- a/tests/performance/join_used_flags.xml
+++ b/tests/performance/join_used_flags.xml
@@ -1,6 +1,6 @@
 <test>
     <create_query>CREATE TABLE test_join_used_flags (i64 Int64, i32 Int32) ENGINE = Memory</create_query>
-    <fill_query>INSERT INTO test_join_used_flags SELECT number AS i64, rand32() AS i32 FROM numbers_mt(3000000)</fill_query>
+    <fill_query>INSERT INTO test_join_used_flags SELECT number AS i64, rand32() AS i32 FROM numbers_mt(1500000)</fill_query>
     <query>SELECT l.i64, r.i64, l.i32, r.i32 FROM test_join_used_flags l RIGHT JOIN test_join_used_flags r USING i64 format Null</query>
     <drop_query>DROP TABLE IF EXISTS test_join_used_flags</drop_query>
 </test>

From b5997e6a9639f54698cf1dda354625a5f20bb776 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 4 Jan 2024 15:06:38 +0300
Subject: [PATCH 080/105] MergeTreePrefetchedReadPool disable for LIMIT only
 queries

---
 src/Interpreters/InterpreterSelectQuery.cpp    | 7 ++++++-
 src/Planner/PlannerJoinTree.cpp                | 7 ++++++-
 src/Processors/QueryPlan/ReadFromMergeTree.cpp | 8 +++++++-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index cdf1b4228bc..d3d7470ad25 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2501,7 +2501,12 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
             max_block_size = std::max<UInt64>(1, max_block_limited);
             max_threads_execute_query = max_streams = 1;
         }
-        if (max_block_limited < local_limits.local_limits.size_limits.max_rows)
+        if (local_limits.local_limits.size_limits.max_rows != 0 &&
+            max_block_limited < local_limits.local_limits.size_limits.max_rows)
+        {
+            query_info.limit = max_block_limited;
+        }
+        else
         {
             query_info.limit = max_block_limited;
         }
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index e2cdf146a69..095db09ffbd 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -645,7 +645,12 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                     max_threads_execute_query = 1;
                 }
 
-                if (max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows)
+                if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows != 0 &&
+                    max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows)
+                {
+                    table_expression_query_info.limit = max_block_size_limited;
+                }
+                else
                 {
                     table_expression_query_info.limit = max_block_size_limited;
                 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index aa1c463e4e6..bdb2f7ea009 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -418,7 +418,13 @@ Pipe ReadFromMergeTree::readFromPool(
         && settings.allow_prefetched_read_pool_for_local_filesystem
         && MergeTreePrefetchedReadPool::checkReadMethodAllowed(reader_settings.read_settings.local_fs_method);
 
-    if (allow_prefetched_remote || allow_prefetched_local)
+    /** Do not use prefetched read pool if query is trivial limit query.
+      * Because time spend during filling per thread tasks can be greater than whole query
+      * execution for big tables with small limit.
+      */
+    bool use_prefetched_read_pool = query_info.limit != 0 && (allow_prefetched_remote || allow_prefetched_local);
+
+    if (use_prefetched_read_pool)
     {
         pool = std::make_shared<MergeTreePrefetchedReadPool>(
             std::move(parts_with_range),

From 8573c66b09d3879d65069d5b50713ad0714238b5 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 4 Jan 2024 15:29:25 +0300
Subject: [PATCH 081/105] Fixed code review issues

---
 src/Interpreters/InterpreterSelectQuery.cpp | 6 +++---
 src/Planner/PlannerJoinTree.cpp             | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index d3d7470ad25..b6c9b8cdba3 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2501,10 +2501,10 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
             max_block_size = std::max<UInt64>(1, max_block_limited);
             max_threads_execute_query = max_streams = 1;
         }
-        if (local_limits.local_limits.size_limits.max_rows != 0 &&
-            max_block_limited < local_limits.local_limits.size_limits.max_rows)
+        if (local_limits.local_limits.size_limits.max_rows != 0)
         {
-            query_info.limit = max_block_limited;
+            if (max_block_limited < local_limits.local_limits.size_limits.max_rows)
+                query_info.limit = max_block_limited;
         }
         else
         {
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 095db09ffbd..857fb993600 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -645,10 +645,10 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                     max_threads_execute_query = 1;
                 }
 
-                if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows != 0 &&
-                    max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows)
+                if (select_query_info.local_storage_limits.local_limits.size_limits.max_rows != 0)
                 {
-                    table_expression_query_info.limit = max_block_size_limited;
+                    if (max_block_size_limited < select_query_info.local_storage_limits.local_limits.size_limits.max_rows)
+                        table_expression_query_info.limit = max_block_size_limited;
                 }
                 else
                 {

From 74fb390444baec49360f5e07a34b32f63684218c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 4 Jan 2024 13:36:42 +0100
Subject: [PATCH 082/105] fix build

---
 src/Interpreters/DDLTask.cpp | 2 +-
 src/Interpreters/DDLTask.h   | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp
index 85bf6fec655..d418be51cc5 100644
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@@ -232,7 +232,7 @@ bool DDLTask::findCurrentHostID(ContextPtr global_context, Poco::Logger * log, c
             throw Exception(
                 ErrorCodes::DNS_ERROR,
                 "{} is not a local address. Check parameter 'host_name' in the configuration",
-                *config_host_name)
+                *config_host_name);
     }
 
     for (const HostID & host : entry.hosts)
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index e1a81ac97af..bc45b46bf0f 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -44,6 +44,9 @@ struct HostID
     explicit HostID(const Cluster::Address & address)
         : host_name(address.host_name), port(address.port) {}
 
+    HostID(const String & host_name_, UInt16 port_)
+        : host_name(host_name_), port(port_) {}
+
     static HostID fromString(const String & host_port_str);
 
     String toString() const

From 82d3d570530ebd014717ba0e11bfd975fe2502e7 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Thu, 4 Jan 2024 12:45:17 +0000
Subject: [PATCH 083/105] Sync content of the docker test images

---
 docker/test/stateless/stress_tests.lib | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib
index 8f89c1b80dd..6f0dabb5207 100644
--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
@@ -236,6 +236,10 @@ function check_logs_for_critical_errors()
         && echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
         || echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
 
+    rg -Fa "it is lost forever" /var/log/clickhouse-server/clickhouse-server*.log | grep 'SharedMergeTreePartCheckThread' > /dev/null \
+        && echo -e "Lost forever for SharedMergeTree$FAIL" >> /test_output/test_results.tsv \
+        || echo -e "No SharedMergeTree lost forever in clickhouse-server.log$OK" >> /test_output/test_results.tsv
+
     # Remove file no_such_key_errors.txt if it's empty
     [ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
 

From 5bfddfebb6ac1f50ebbdca5d0e146f72fe085793 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 4 Jan 2024 14:08:58 +0000
Subject: [PATCH 084/105] Fix instantiation detection

---
 src/AggregateFunctions/AggregateFunctionMax.cpp     | 5 +++--
 src/AggregateFunctions/AggregateFunctionMin.cpp     | 5 +++--
 src/AggregateFunctions/AggregateFunctionMinMaxAny.h | 2 ++
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp
index 2577c932592..e9cd651b8db 100644
--- a/src/AggregateFunctions/AggregateFunctionMax.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMax.cpp
@@ -1,6 +1,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/HelpersMinMaxAny.h>
+#include <Common/Concepts.h>
 #include <Common/findExtreme.h>
 
 namespace DB
@@ -74,7 +75,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
     {
         /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
         /// faster than doing a permutation
@@ -169,7 +170,7 @@ void AggregateFunctionsSingleValueMax<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
     {
         /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
         /// faster than doing a permutation
diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp
index 701101e7207..d767bd5c563 100644
--- a/src/AggregateFunctions/AggregateFunctionMin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionMin.cpp
@@ -1,6 +1,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/HelpersMinMaxAny.h>
+#include <Common/Concepts.h>
 #include <Common/findExtreme.h>
 
 
@@ -75,7 +76,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlace(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
     {
         /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
         /// faster than doing a permutation
@@ -170,7 +171,7 @@ void AggregateFunctionsSingleValueMin<Data>::addBatchSinglePlaceNotNull(
     Arena * arena,
     ssize_t if_argument_pos) const
 {
-    if constexpr (!std::is_same_v<Data, SingleValueDataString> || !std::is_same_v<Data, SingleValueDataGeneric>)
+    if constexpr (!is_any_of<typename Data::Impl, SingleValueDataString, SingleValueDataGeneric>)
     {
         /// Leave other numeric types (large integers, decimals, etc) to keep doing the comparison as it's
         /// faster than doing a permutation
diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
index b69a0b100a3..dec70861543 100644
--- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@@ -965,6 +965,7 @@ template <typename Data>
 struct AggregateFunctionMinData : Data
 {
     using Self = AggregateFunctionMinData;
+    using Impl = Data;
 
     bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena)     { return this->changeIfLess(column, row_num, arena); }
     bool changeIfBetter(const Self & to, Arena * arena)                            { return this->changeIfLess(to, arena); }
@@ -993,6 +994,7 @@ template <typename Data>
 struct AggregateFunctionMaxData : Data
 {
     using Self = AggregateFunctionMaxData;
+    using Impl = Data;
 
     bool changeIfBetter(const IColumn & column, size_t row_num, Arena * arena)     { return this->changeIfGreater(column, row_num, arena); }
     bool changeIfBetter(const Self & to, Arena * arena)                            { return this->changeIfGreater(to, arena); }

From d9f68f4a2c4e3fcdce8776af5d9ee2cf7a551f15 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 4 Jan 2024 17:16:47 +0300
Subject: [PATCH 085/105] Fixed tests

---
 src/Processors/QueryPlan/ReadFromMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index bdb2f7ea009..6f0429459cd 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -422,7 +422,7 @@ Pipe ReadFromMergeTree::readFromPool(
       * Because time spend during filling per thread tasks can be greater than whole query
       * execution for big tables with small limit.
       */
-    bool use_prefetched_read_pool = query_info.limit != 0 && (allow_prefetched_remote || allow_prefetched_local);
+    bool use_prefetched_read_pool = query_info.limit == 0 && (allow_prefetched_remote || allow_prefetched_local);
 
     if (use_prefetched_read_pool)
     {

From 494a32f4e47af2576455cda2794ffa13568c60f3 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 4 Jan 2024 14:41:04 +0000
Subject: [PATCH 086/105] Review fixes

---
 src/Storages/StorageS3.cpp | 105 ++++++++-----------------------------
 1 file changed, 23 insertions(+), 82 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index ce49be32120..d7cc86ed321 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -147,7 +147,8 @@ public:
         const Names & column_names_,
         StorageSnapshotPtr storage_snapshot_,
         StorageS3 & storage_,
-        SelectQueryInfo query_info_,
+        ReadFromFormatInfo read_from_format_info_,
+        bool need_only_count_,
         ContextPtr context_,
         size_t max_block_size_,
         size_t num_streams_)
@@ -155,7 +156,8 @@ public:
         , column_names(column_names_)
         , storage_snapshot(std::move(storage_snapshot_))
         , storage(storage_)
-        , query_info(std::move(query_info_))
+        , read_from_format_info(std::move(read_from_format_info_))
+        , need_only_count(need_only_count_)
         , local_context(std::move(context_))
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
@@ -168,7 +170,8 @@ private:
     Names column_names;
     StorageSnapshotPtr storage_snapshot;
     StorageS3 & storage;
-    SelectQueryInfo query_info;
+    ReadFromFormatInfo read_from_format_info;
+    bool need_only_count;
     StorageS3::Configuration query_configuration;
     NamesAndTypesList virtual_columns;
 
@@ -183,77 +186,6 @@ private:
 };
 
 
-static Block getBlockWithVirtuals(const NamesAndTypesList & virtual_columns, const String & bucket, const std::unordered_set<String> & keys)
-{
-    Block virtual_columns_block;
-    fs::path bucket_path(bucket);
-
-    for (const auto & [column_name, column_type] : virtual_columns)
-    {
-        if (column_name == "_path")
-        {
-            auto column = column_type->createColumn();
-            for (const auto & key : keys)
-                column->insert((bucket_path / key).string());
-            virtual_columns_block.insert({std::move(column), column_type, column_name});
-        }
-        else if (column_name == "_file")
-        {
-            auto column = column_type->createColumn();
-            for (const auto & key : keys)
-            {
-                auto pos = key.find_last_of('/');
-                if (pos != std::string::npos)
-                    column->insert(key.substr(pos + 1));
-                else
-                    column->insert(key);
-            }
-            virtual_columns_block.insert({std::move(column), column_type, column_name});
-        }
-        else if (column_name == "_key")
-        {
-            auto column = column_type->createColumn();
-            for (const auto & key : keys)
-                column->insert(key);
-            virtual_columns_block.insert({std::move(column), column_type, column_name});
-        }
-        else
-        {
-            auto column = column_type->createColumn();
-            column->insertManyDefaults(keys.size());
-            virtual_columns_block.insert({std::move(column), column_type, column_name});
-        }
-    }
-
-    /// Column _key is mandatory and may not be in virtual_columns list
-    if (!virtual_columns_block.has("_key"))
-    {
-        auto column_type = std::make_shared<DataTypeString>();
-        auto column = column_type->createColumn(); for (const auto & key : keys)
-            column->insert(key);
-        virtual_columns_block.insert({std::move(column), column_type, "_key"});
-    }
-
-    return virtual_columns_block;
-}
-
-static std::vector<String> filterKeysForPartitionPruning(
-    const std::vector<String> & keys,
-    const String & bucket,
-    const NamesAndTypesList & virtual_columns,
-    const ActionsDAG::Node * predicate,
-    ContextPtr context)
-{
-    std::unordered_set<String> result_keys(keys.begin(), keys.end());
-
-    auto block = getBlockWithVirtuals(virtual_columns, bucket, result_keys);
-    VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context);
-    result_keys = VirtualColumnUtils::extractSingleValueFromBlock<String>(block, "_key");
-
-    LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Applied partition pruning {} from {} keys left", result_keys.size(), keys.size());
-    return std::vector<String>(result_keys.begin(), result_keys.end());
-}
-
 class IOutputFormat;
 using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
 
@@ -305,9 +237,9 @@ public:
                 "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
 
         recursive = globbed_uri.key == "/**" ? true : false;
-        fillInternalBufferAssumeLocked();
 
         filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+        fillInternalBufferAssumeLocked();
     }
 
     KeyWithInfoPtr next()
@@ -1161,7 +1093,17 @@ static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
     }
     else
     {
-        Strings keys = filterKeysForPartitionPruning(configuration.keys, configuration.url.bucket, virtual_columns, predicate, local_context);
+        Strings keys = configuration.keys;
+        auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+        if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(keys.size());
+            for (const auto & key : keys)
+                paths.push_back(fs::path(configuration.url.bucket) / key);
+            VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context);
+        }
+
         return std::make_shared<StorageS3Source::KeysIterator>(
             *configuration.client, configuration.url.version_id, keys,
             configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback);
@@ -1195,12 +1137,16 @@ void StorageS3::read(
 {
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), virtual_columns);
 
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && local_context->getSettingsRef().optimize_count_from_files;
+
     auto reading = std::make_unique<ReadFromStorageS3Step>(
         read_from_format_info.source_header,
         column_names,
         storage_snapshot,
         *this,
-        query_info,
+        std::move(read_from_format_info),
+        need_only_count,
         local_context,
         max_block_size,
         num_streams);
@@ -1235,8 +1181,6 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
 
     createIterator(nullptr);
 
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, storage.supportsSubsetOfColumns(local_context), virtual_columns);
-
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
     if (estimated_keys_count > 1)
         num_streams = std::min(num_streams, estimated_keys_count);
@@ -1244,9 +1188,6 @@ void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline,
         /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case.
         num_streams = 1;
 
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && local_context->getSettingsRef().optimize_count_from_files;
-
     const size_t max_threads = local_context->getSettingsRef().max_threads;
     const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul));
     LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads);

From 39b15f91303483ca3f9f5efcaab6cea6236b7d46 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Thu, 4 Jan 2024 16:33:52 +0100
Subject: [PATCH 087/105] Add a comment

---
 src/Planner/CollectTableExpressionData.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp
index 38c986fd31f..78a7c7074c3 100644
--- a/src/Planner/CollectTableExpressionData.cpp
+++ b/src/Planner/CollectTableExpressionData.cpp
@@ -46,9 +46,12 @@ public:
             for (auto & using_element : using_list)
             {
                 auto & column_node = using_element->as<ColumnNode&>();
+                /// This list contains column nodes from left and right tables.
                 auto & columns_from_subtrees = column_node.getExpressionOrThrow()->as<ListNode&>().getNodes();
 
+                /// Visit left table column node.
                 visitUsingColumn(columns_from_subtrees[0]);
+                /// Visit right table column node.
                 visitUsingColumn(columns_from_subtrees[1]);
             }
             return;

From 296e1ac8aa000996f004343aa299e5b732c7c8df Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 4 Jan 2024 16:11:39 +0000
Subject: [PATCH 088/105] FunctionSqid.cpp --> sqid.cpp

---
 src/Functions/{FunctionSqid.cpp => sqid.cpp} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/Functions/{FunctionSqid.cpp => sqid.cpp} (100%)

diff --git a/src/Functions/FunctionSqid.cpp b/src/Functions/sqid.cpp
similarity index 100%
rename from src/Functions/FunctionSqid.cpp
rename to src/Functions/sqid.cpp

From 03e344c36ae27b62cfcf058640b81b8ae8460afe Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 4 Jan 2024 16:15:06 +0000
Subject: [PATCH 089/105] Fix preprocessor guard

---
 src/Functions/sqid.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp
index 546263914c2..4517bba963e 100644
--- a/src/Functions/sqid.cpp
+++ b/src/Functions/sqid.cpp
@@ -1,6 +1,6 @@
 #include "config.h"
 
-#ifdef ENABLE_SQIDS
+#if USE_SQIDS
 
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>

From 98d602c3d5e3e197ed9d3579ad34155b386acb74 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 4 Jan 2024 16:21:08 +0000
Subject: [PATCH 090/105] Reserve enough space in result column upfront

---
 src/Functions/sqid.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp
index 4517bba963e..abd9d22f4c5 100644
--- a/src/Functions/sqid.cpp
+++ b/src/Functions/sqid.cpp
@@ -57,9 +57,10 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
-        size_t num_args = arguments.size();
         auto col_res = ColumnString::create();
+        col_res->reserve(input_rows_count);
 
+        const size_t num_args = arguments.size();
         std::vector<UInt64> numbers(num_args);
         for (size_t i = 0; i < input_rows_count; ++i)
         {

From 52058211e7ff227feb9c890f641d2299af9a246c Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Thu, 4 Jan 2024 08:21:46 -0800
Subject: [PATCH 091/105] Fix some thread pool settings not updating at runtime
 (#58485)

---
 programs/server/Server.cpp | 94 +++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 926e57070f3..1fa3d1cfa73 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1260,11 +1260,11 @@ try
         {
             Settings::checkNoSettingNamesAtTopLevel(*config, config_path);
 
-            ServerSettings server_settings_;
-            server_settings_.loadSettingsFromConfig(*config);
+            ServerSettings new_server_settings;
+            new_server_settings.loadSettingsFromConfig(*config);
 
-            size_t max_server_memory_usage = server_settings_.max_server_memory_usage;
-            double max_server_memory_usage_to_ram_ratio = server_settings_.max_server_memory_usage_to_ram_ratio;
+            size_t max_server_memory_usage = new_server_settings.max_server_memory_usage;
+            double max_server_memory_usage_to_ram_ratio = new_server_settings.max_server_memory_usage_to_ram_ratio;
 
             size_t current_physical_server_memory = getMemoryAmount(); /// With cgroups, the amount of memory available to the server can be changed dynamically.
             size_t default_max_server_memory_usage = static_cast<size_t>(current_physical_server_memory * max_server_memory_usage_to_ram_ratio);
@@ -1294,9 +1294,9 @@ try
             total_memory_tracker.setDescription("(total)");
             total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
 
-            size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit;
+            size_t merges_mutations_memory_usage_soft_limit = new_server_settings.merges_mutations_memory_usage_soft_limit;
 
-            size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(current_physical_server_memory * server_settings_.merges_mutations_memory_usage_to_ram_ratio);
+            size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(current_physical_server_memory * new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
             if (merges_mutations_memory_usage_soft_limit == 0)
             {
                 merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage;
@@ -1304,7 +1304,7 @@ try
                     " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)",
                     formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit),
                     formatReadableSizeWithBinarySuffix(current_physical_server_memory),
-                    server_settings_.merges_mutations_memory_usage_to_ram_ratio);
+                    new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
             }
             else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage)
             {
@@ -1313,7 +1313,7 @@ try
                     " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)",
                     formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit),
                     formatReadableSizeWithBinarySuffix(current_physical_server_memory),
-                    server_settings_.merges_mutations_memory_usage_to_ram_ratio);
+                    new_server_settings.merges_mutations_memory_usage_to_ram_ratio);
             }
 
             LOG_INFO(log, "Merges and mutations memory limit is set to {}",
@@ -1322,7 +1322,7 @@ try
             background_memory_tracker.setDescription("(background)");
             background_memory_tracker.setMetric(CurrentMetrics::MergesMutationsMemoryTracking);
 
-            total_memory_tracker.setAllowUseJemallocMemory(server_settings_.allow_use_jemalloc_memory);
+            total_memory_tracker.setAllowUseJemallocMemory(new_server_settings.allow_use_jemalloc_memory);
 
             auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker();
             total_memory_tracker.setOvercommitTracker(global_overcommit_tracker);
@@ -1346,26 +1346,26 @@ try
             global_context->setRemoteHostFilter(*config);
             global_context->setHTTPHeaderFilter(*config);
 
-            global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
-            global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
-            global_context->setMaxTableNumToWarn(server_settings_.max_table_num_to_warn);
-            global_context->setMaxDatabaseNumToWarn(server_settings_.max_database_num_to_warn);
-            global_context->setMaxPartNumToWarn(server_settings_.max_part_num_to_warn);
+            global_context->setMaxTableSizeToDrop(new_server_settings.max_table_size_to_drop);
+            global_context->setMaxPartitionSizeToDrop(new_server_settings.max_partition_size_to_drop);
+            global_context->setMaxTableNumToWarn(new_server_settings.max_table_num_to_warn);
+            global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn);
+            global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn);
 
             ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
-            if (server_settings_.concurrent_threads_soft_limit_num > 0 && server_settings_.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
-                concurrent_threads_soft_limit = server_settings_.concurrent_threads_soft_limit_num;
-            if (server_settings_.concurrent_threads_soft_limit_ratio_to_cores > 0)
+            if (new_server_settings.concurrent_threads_soft_limit_num > 0 && new_server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit)
+                concurrent_threads_soft_limit = new_server_settings.concurrent_threads_soft_limit_num;
+            if (new_server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0)
             {
-                auto value = server_settings_.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency();
+                auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency();
                 if (value > 0 && value < concurrent_threads_soft_limit)
                     concurrent_threads_soft_limit = value;
             }
             ConcurrencyControl::instance().setMaxConcurrency(concurrent_threads_soft_limit);
 
-            global_context->getProcessList().setMaxSize(server_settings_.max_concurrent_queries);
-            global_context->getProcessList().setMaxInsertQueriesAmount(server_settings_.max_concurrent_insert_queries);
-            global_context->getProcessList().setMaxSelectQueriesAmount(server_settings_.max_concurrent_select_queries);
+            global_context->getProcessList().setMaxSize(new_server_settings.max_concurrent_queries);
+            global_context->getProcessList().setMaxInsertQueriesAmount(new_server_settings.max_concurrent_insert_queries);
+            global_context->getProcessList().setMaxSelectQueriesAmount(new_server_settings.max_concurrent_select_queries);
 
             if (config->has("keeper_server"))
                 global_context->updateKeeperConfiguration(*config);
@@ -1376,68 +1376,68 @@ try
             /// This is done for backward compatibility.
             if (global_context->areBackgroundExecutorsInitialized())
             {
-                auto new_pool_size = server_settings_.background_pool_size;
-                auto new_ratio = server_settings_.background_merges_mutations_concurrency_ratio;
+                auto new_pool_size = new_server_settings.background_pool_size;
+                auto new_ratio = new_server_settings.background_merges_mutations_concurrency_ratio;
                 global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, static_cast<size_t>(new_pool_size * new_ratio));
-                global_context->getMergeMutateExecutor()->updateSchedulingPolicy(server_settings_.background_merges_mutations_scheduling_policy.toString());
+                global_context->getMergeMutateExecutor()->updateSchedulingPolicy(new_server_settings.background_merges_mutations_scheduling_policy.toString());
             }
 
             if (global_context->areBackgroundExecutorsInitialized())
             {
-                auto new_pool_size = server_settings_.background_move_pool_size;
+                auto new_pool_size = new_server_settings.background_move_pool_size;
                 global_context->getMovesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
             }
 
             if (global_context->areBackgroundExecutorsInitialized())
             {
-                auto new_pool_size = server_settings_.background_fetches_pool_size;
+                auto new_pool_size = new_server_settings.background_fetches_pool_size;
                 global_context->getFetchesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
             }
 
             if (global_context->areBackgroundExecutorsInitialized())
             {
-                auto new_pool_size = server_settings_.background_common_pool_size;
+                auto new_pool_size = new_server_settings.background_common_pool_size;
                 global_context->getCommonExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size);
             }
 
-            global_context->getBufferFlushSchedulePool().increaseThreadsCount(server_settings_.background_buffer_flush_schedule_pool_size);
-            global_context->getSchedulePool().increaseThreadsCount(server_settings_.background_schedule_pool_size);
-            global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings_.background_message_broker_schedule_pool_size);
-            global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings_.background_distributed_schedule_pool_size);
+            global_context->getBufferFlushSchedulePool().increaseThreadsCount(new_server_settings.background_buffer_flush_schedule_pool_size);
+            global_context->getSchedulePool().increaseThreadsCount(new_server_settings.background_schedule_pool_size);
+            global_context->getMessageBrokerSchedulePool().increaseThreadsCount(new_server_settings.background_message_broker_schedule_pool_size);
+            global_context->getDistributedSchedulePool().increaseThreadsCount(new_server_settings.background_distributed_schedule_pool_size);
 
-            global_context->getAsyncLoader().setMaxThreads(TablesLoaderForegroundPoolId, server_settings_.tables_loader_foreground_pool_size);
-            global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundLoadPoolId, server_settings_.tables_loader_background_pool_size);
-            global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundStartupPoolId, server_settings_.tables_loader_background_pool_size);
+            global_context->getAsyncLoader().setMaxThreads(TablesLoaderForegroundPoolId, new_server_settings.tables_loader_foreground_pool_size);
+            global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundLoadPoolId, new_server_settings.tables_loader_background_pool_size);
+            global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundStartupPoolId, new_server_settings.tables_loader_background_pool_size);
 
             getIOThreadPool().reloadConfiguration(
-                server_settings.max_io_thread_pool_size,
-                server_settings.max_io_thread_pool_free_size,
-                server_settings.io_thread_pool_queue_size);
+                new_server_settings.max_io_thread_pool_size,
+                new_server_settings.max_io_thread_pool_free_size,
+                new_server_settings.io_thread_pool_queue_size);
 
             getBackupsIOThreadPool().reloadConfiguration(
-                server_settings.max_backups_io_thread_pool_size,
-                server_settings.max_backups_io_thread_pool_free_size,
-                server_settings.backups_io_thread_pool_queue_size);
+                new_server_settings.max_backups_io_thread_pool_size,
+                new_server_settings.max_backups_io_thread_pool_free_size,
+                new_server_settings.backups_io_thread_pool_queue_size);
 
             getActivePartsLoadingThreadPool().reloadConfiguration(
-                server_settings.max_active_parts_loading_thread_pool_size,
+                new_server_settings.max_active_parts_loading_thread_pool_size,
                 0, // We don't need any threads once all the parts will be loaded
-                server_settings.max_active_parts_loading_thread_pool_size);
+                new_server_settings.max_active_parts_loading_thread_pool_size);
 
             getOutdatedPartsLoadingThreadPool().reloadConfiguration(
-                server_settings.max_outdated_parts_loading_thread_pool_size,
+                new_server_settings.max_outdated_parts_loading_thread_pool_size,
                 0, // We don't need any threads once all the parts will be loaded
-                server_settings.max_outdated_parts_loading_thread_pool_size);
+                new_server_settings.max_outdated_parts_loading_thread_pool_size);
 
             /// It could grow if we need to synchronously wait until all the data parts will be loaded.
             getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(
-                server_settings.max_active_parts_loading_thread_pool_size
+                new_server_settings.max_active_parts_loading_thread_pool_size
             );
 
             getPartsCleaningThreadPool().reloadConfiguration(
-                server_settings.max_parts_cleaning_thread_pool_size,
+                new_server_settings.max_parts_cleaning_thread_pool_size,
                 0, // We don't need any threads one all the parts will be deleted
-                server_settings.max_parts_cleaning_thread_pool_size);
+                new_server_settings.max_parts_cleaning_thread_pool_size);
 
             if (config->has("resources"))
             {

From 76b7cddb186ba6d44e581fa35dce9fd48fc6b3ed Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 4 Jan 2024 16:29:43 +0000
Subject: [PATCH 092/105] Update docs

---
 docs/en/sql-reference/functions/hash-functions.md | 4 +++-
 src/Functions/sqid.cpp                            | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index a23849c13aa..2c6a468af0e 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -1779,7 +1779,9 @@ Result:
 
 ## sqid
 
-Transforms numbers into YouTube-like short URL hash called [Sqid](https://sqids.org/).
+Transforms numbers into a [Sqid](https://sqids.org/) which is a YouTube-like ID string.
+The output alphabet is `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`.
+Do not use this function for hashing - the generated IDs can be decoded back into numbers.
 
 **Syntax**
 
diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp
index abd9d22f4c5..363a3f8ac13 100644
--- a/src/Functions/sqid.cpp
+++ b/src/Functions/sqid.cpp
@@ -84,7 +84,7 @@ REGISTER_FUNCTION(Sqid)
 {
     factory.registerFunction<FunctionSqid>(FunctionDocumentation{
         .description=R"(
-Transforms numbers into YouTube-like short URL hash called [Sqid](https://sqids.org/).)",
+Transforms numbers into a [Sqid](https://sqids.org/) which is a Youtube-like ID string.)",
         .syntax="sqid(number1, ...)",
         .arguments={{"number1, ...", "Arbitrarily many UInt8, UInt16, UInt32 or UInt64 arguments"}},
         .returned_value="A hash id [String](/docs/en/sql-reference/data-types/string.md).",

From 9f5015737bcb9fdb3a0d0d8056ca05dfc0c1302a Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 3 Jan 2024 23:46:13 +0100
Subject: [PATCH 093/105] fix a stupid case of intersecting parts

---
 src/Storages/MergeTree/MergeTreeData.cpp         | 14 +++++++++-----
 ...02486_truncate_and_unexpected_parts.reference |  2 ++
 .../02486_truncate_and_unexpected_parts.sql      | 16 ++++++++++++++++
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 1c80778f1ca..a23d59055ca 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3985,8 +3985,15 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
     /// FIXME refactor removePartsFromWorkingSet(...), do not remove parts twice
     removePartsFromWorkingSet(txn, parts_to_remove, clear_without_timeout, lock);
 
+    /// We can only create a covering part for a blocks range that starts with 0 (otherwise we may get "intersecting parts"
+    /// if we remove a range from the middle when dropping a part).
+    /// Maybe we could do it by incrementing mutation version to get a name for the empty covering part,
+    /// but it's okay to simply avoid creating it for DROP PART (for a part in the middle).
+    /// NOTE: Block numbers in ReplicatedMergeTree start from 0. For MergeTree, is_new_syntax is always false.
+    assert(!create_empty_part || supportsReplication());
+    bool range_in_the_middle = drop_range.min_block;
     bool is_new_syntax = format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING;
-    if (create_empty_part && !parts_to_remove.empty() && is_new_syntax)
+    if (create_empty_part && !parts_to_remove.empty() && is_new_syntax && !range_in_the_middle)
     {
         /// We are going to remove a lot of parts from zookeeper just after returning from this function.
         /// And we will remove parts from disk later (because some queries may use them).
@@ -3995,12 +4002,9 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
         /// We don't need to commit it to zk, and don't even need to activate it.
 
         MergeTreePartInfo empty_info = drop_range;
-        empty_info.level = empty_info.mutation = 0;
-        if (!empty_info.min_block)
-            empty_info.min_block = MergeTreePartInfo::MAX_BLOCK_NUMBER;
+        empty_info.min_block = empty_info.level = empty_info.mutation = 0;
         for (const auto & part : parts_to_remove)
         {
-            empty_info.min_block = std::min(empty_info.min_block, part->info.min_block);
             empty_info.level = std::max(empty_info.level, part->info.level);
             empty_info.mutation = std::max(empty_info.mutation, part->info.mutation);
         }
diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
index 2ece1147d78..824d4bbec98 100644
--- a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.reference
@@ -13,3 +13,5 @@
 5	rmt2
 7	rmt2
 9	rmt2
+1
+3
diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
index 52e8be236c8..755cba2a155 100644
--- a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
@@ -50,3 +50,19 @@ system sync replica rmt1;
 system sync replica rmt2;
 
 select *, _table from merge(currentDatabase(), '') order by _table, (*,);
+
+
+create table rmt3 (n int) engine=ReplicatedMergeTree('/test/02468/{database}3', '1') order by tuple() settings replicated_max_ratio_of_wrong_parts=0, max_suspicious_broken_parts=0, max_suspicious_broken_parts_bytes=0;
+set insert_keeper_fault_injection_probability=0;
+insert into rmt3 values (1);
+insert into rmt3 values (2);
+insert into rmt3 values (3);
+
+system stop cleanup rmt3;
+alter table rmt3 drop part 'all_1_1_0';
+optimize table rmt3 final;
+
+detach table rmt3 sync;
+attach table rmt3;
+
+select * from rmt3 order by n;

From 9149072520f979c7744f1c5222950f83de8365ff Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 4 Jan 2024 17:27:26 +0000
Subject: [PATCH 094/105] Update tests

---
 ...f_indexes_support_match_function.reference | 12 +++
 ...ngrambf_indexes_support_match_function.sql | 98 ++++++++++++++++---
 2 files changed, 99 insertions(+), 11 deletions(-)

diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
index 5c6a213a03f..1cf1644fe0a 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.reference
@@ -2,8 +2,12 @@
 2	Hello World
 1	Hello ClickHouse
 2	Hello World
+          Granules: 6/6
+          Granules: 2/6
             Granules: 6/6
             Granules: 2/6
+          Granules: 6/6
+          Granules: 2/6
             Granules: 6/6
             Granules: 2/6
 ---
@@ -13,14 +17,22 @@
 1	Hello ClickHouse
 2	Hello World
 6	World Champion
+          Granules: 6/6
+          Granules: 3/6
             Granules: 6/6
             Granules: 3/6
+          Granules: 6/6
+          Granules: 3/6
             Granules: 6/6
             Granules: 3/6
 ---
 5	OLAP Database
 5	OLAP Database
+          Granules: 6/6
+          Granules: 1/6
             Granules: 6/6
             Granules: 1/6
+          Granules: 6/6
+          Granules: 1/6
             Granules: 6/6
             Granules: 1/6
diff --git a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
index df39be8abd6..49d39c601ef 100644
--- a/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
+++ b/tests/queries/0_stateless/02943_tokenbf_and_ngrambf_indexes_support_match_function.sql
@@ -1,4 +1,3 @@
-SET allow_experimental_analyzer = 1;
 DROP TABLE IF EXISTS tokenbf_tab;
 DROP TABLE IF EXISTS ngrambf_tab;
 
@@ -28,7 +27,7 @@ INSERT INTO ngrambf_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3,
 SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
 SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
 
--- Skip 2/6 granules
+-- Read 2/6 granules
 -- Required string: 'Hello '
 -- Alternatives: 'Hello ClickHouse', 'Hello World'
 
@@ -39,7 +38,20 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT *
 FROM
@@ -48,14 +60,28 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes=1
+    SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
+
 
 SELECT '---';
 
 SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
 SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
 
--- Skip 3/6 granules
+-- Read 3/6 granules
 -- Required string: -
 -- Alternatives: 'ClickHouse', 'World'
 
@@ -66,7 +92,20 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT *
 FROM
@@ -75,18 +114,30 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT '---';
 
 SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
 SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
 
--- Skip 5/6 granules
+-- Read 1/6 granules
 -- Required string: 'OLAP'
 -- Alternatives: -
 
-set allow_experimental_analyzer = 1;
 SELECT *
 FROM
 (
@@ -94,7 +145,19 @@ FROM
     SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 SELECT *
 FROM
@@ -103,7 +166,20 @@ FROM
     SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
 )
 WHERE
-  explain LIKE '%Granules: %';
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 0;
+
+SELECT *
+FROM
+(
+    EXPLAIN PLAN indexes = 1
+    SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
+)
+WHERE
+    explain LIKE '%Granules: %'
+SETTINGS
+  allow_experimental_analyzer = 1;
 
 DROP TABLE tokenbf_tab;
 DROP TABLE ngrambf_tab;

From 491df7bf6e7d8321d2694d76c522f1520871326d Mon Sep 17 00:00:00 2001
From: Jihyuk Bok <jihyuk.bok@clickhouse.com>
Date: Thu, 4 Jan 2024 18:46:55 +0100
Subject: [PATCH 095/105] enable ordinary databases while restoration

---
 src/Backups/RestorerFromBackup.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp
index 4e580e493a7..a33773f19ab 100644
--- a/src/Backups/RestorerFromBackup.cpp
+++ b/src/Backups/RestorerFromBackup.cpp
@@ -573,11 +573,12 @@ void RestorerFromBackup::createDatabase(const String & database_name) const
     create_database_query->if_not_exists = (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists);
 
     LOG_TRACE(log, "Creating database {}: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query));
-
+    auto query_context = Context::createCopy(context);
+    query_context->setSetting("allow_deprecated_database_ordinary", 1);
     try
     {
         /// Execute CREATE DATABASE query.
-        InterpreterCreateQuery interpreter{create_database_query, context};
+        InterpreterCreateQuery interpreter{create_database_query, query_context};
         interpreter.setInternal(true);
         interpreter.execute();
     }

From bc1c05e4cd4492d6e8735345000d3a50809047d1 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 4 Jan 2024 20:15:52 +0100
Subject: [PATCH 096/105] Update 02486_truncate_and_unexpected_parts.sql

---
 .../queries/0_stateless/02486_truncate_and_unexpected_parts.sql  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
index 755cba2a155..5c90313b6b8 100644
--- a/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
+++ b/tests/queries/0_stateless/02486_truncate_and_unexpected_parts.sql
@@ -59,6 +59,7 @@ insert into rmt3 values (2);
 insert into rmt3 values (3);
 
 system stop cleanup rmt3;
+system sync replica rmt3 pull;
 alter table rmt3 drop part 'all_1_1_0';
 optimize table rmt3 final;
 

From 2a385bc573879a45b2ca8332d4abb91c9b862609 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 4 Jan 2024 18:26:25 +0100
Subject: [PATCH 097/105] Fix currentProfiles()

---
 src/Access/SettingsProfilesCache.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Access/SettingsProfilesCache.cpp b/src/Access/SettingsProfilesCache.cpp
index 9f4fc5a5d89..275b3aeb6b5 100644
--- a/src/Access/SettingsProfilesCache.cpp
+++ b/src/Access/SettingsProfilesCache.cpp
@@ -140,7 +140,6 @@ void SettingsProfilesCache::mergeSettingsAndConstraintsFor(EnabledSettings & ena
 
     auto info = std::make_shared<SettingsProfilesInfo>(access_control);
 
-    info->profiles = merged_settings.toProfileIDs();
     substituteProfiles(merged_settings, info->profiles, info->profiles_with_implicit, info->names_of_profiles);
 
     info->settings = merged_settings.toSettingsChanges();
@@ -156,6 +155,8 @@ void SettingsProfilesCache::substituteProfiles(
     std::vector<UUID> & substituted_profiles,
     std::unordered_map<UUID, String> & names_of_substituted_profiles) const
 {
+    profiles = elements.toProfileIDs();
+
     /// We should substitute profiles in reversive order because the same profile can occur
     /// in `elements` multiple times (with some other settings in between) and in this case
     /// the last occurrence should override all the previous ones.
@@ -231,12 +232,12 @@ std::shared_ptr<const SettingsProfilesInfo> SettingsProfilesCache::getSettingsPr
     if (auto pos = this->profile_infos_cache.get(profile_id))
         return *pos;
 
-    SettingsProfileElements elements = all_profiles[profile_id]->elements;
+    SettingsProfileElements elements;
+    auto & element = elements.emplace_back();
+    element.parent_profile = profile_id;
 
     auto info = std::make_shared<SettingsProfilesInfo>(access_control);
 
-    info->profiles.push_back(profile_id);
-    info->profiles_with_implicit.push_back(profile_id);
     substituteProfiles(elements, info->profiles, info->profiles_with_implicit, info->names_of_profiles);
     info->settings = elements.toSettingsChanges();
     info->constraints.merge(elements.toSettingsConstraints(access_control));

From 9c465965566019c1184a07579be7764c049ea91f Mon Sep 17 00:00:00 2001
From: Mathieu Rey <matrey@gmail.com>
Date: Fri, 5 Jan 2024 14:07:51 +0800
Subject: [PATCH 098/105] Fix example 3 and tweak formatting

* example 3 is about having several SQL queries in the same input, made an example illustrating that
* removed the sql marker for all results except example 2 to emphasize when you would get colorized output in the terminal
---
 .../operations/utilities/clickhouse-format.md | 28 +++++++++----------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/docs/en/operations/utilities/clickhouse-format.md b/docs/en/operations/utilities/clickhouse-format.md
index 101310cc65e..3e4295598aa 100644
--- a/docs/en/operations/utilities/clickhouse-format.md
+++ b/docs/en/operations/utilities/clickhouse-format.md
@@ -27,7 +27,7 @@ $ clickhouse-format --query "select number from numbers(10) where number%2 order
 
 Result:
 
-```sql
+```bash
 SELECT number
 FROM numbers(10)
 WHERE number % 2
@@ -49,22 +49,20 @@ SELECT sum(number) FROM numbers(5)
 3. Multiqueries:
 
 ```bash
-$ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNION DISTINCT SELECT 3);"
+$ clickhouse-format -n <<< "SELECT min(number) FROM numbers(5); SELECT max(number) FROM numbers(5);"
 ```
 
 Result:
 
-```sql
-SELECT *
-FROM
-(
-    SELECT 1 AS x
-    UNION ALL
-    SELECT 1
-    UNION DISTINCT
-    SELECT 3
-)
+```
+SELECT min(number)
+FROM numbers(5)
 ;
+
+SELECT max(number)
+FROM numbers(5)
+;
+
 ```
 
 4. Obfuscating:
@@ -75,7 +73,7 @@ $ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWE
 
 Result:
 
-```sql
+```
 SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
 ```
 
@@ -87,7 +85,7 @@ $ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWE
 
 Result:
 
-```sql
+```
 SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
 ```
 
@@ -99,7 +97,7 @@ $ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELE
 
 Result:
 
-```sql
+```
 SELECT * \
 FROM  \
 ( \

From 7d2dafb02415a387c9fe7bb191e669ac29230f8f Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:02:42 +0000
Subject: [PATCH 099/105] Update version_date.tsv and changelogs after
 v23.12.2.59-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v23.12.2.59-stable.md | 32 +++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  3 +++
 5 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.12.2.59-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 145f5d13cc2..4b5e8cd3970 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26d65eb3ccc..452d8539a48 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5b96b208b11..0cefa3c14cb 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.12.2.59-stable.md b/docs/changelogs/v23.12.2.59-stable.md
new file mode 100644
index 00000000000..6533f4e6b86
--- /dev/null
+++ b/docs/changelogs/v23.12.2.59-stable.md
@@ -0,0 +1,32 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.12.2.59-stable (17ab210e761) FIXME as compared to v23.12.1.1368-stable (a2faa65b080)
+
+#### Backward Incompatible Change
+* Backported in [#58389](https://github.com/ClickHouse/ClickHouse/issues/58389): The MergeTree setting `clean_deleted_rows` is deprecated, it has no effect anymore. The `CLEANUP` keyword for `OPTIMIZE` is not allowed by default (unless `allow_experimental_replacing_merge_with_cleanup` is enabled). [#58316](https://github.com/ClickHouse/ClickHouse/pull/58316) ([Alexander Tokmakov](https://github.com/tavplubix)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix segfault when graphite table does not have agg function [#58453](https://github.com/ClickHouse/ClickHouse/pull/58453) ([Duc Canh Le](https://github.com/canhld94)).
+* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Revert "Refreshable materialized views (takeover)"'. [#58296](https://github.com/ClickHouse/ClickHouse/pull/58296) ([Alexander Tokmakov](https://github.com/tavplubix)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Fix an error in the release script - it didn't allow to make 23.12. [#58288](https://github.com/ClickHouse/ClickHouse/pull/58288) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Update version_date.tsv and changelogs after v23.12.1.1368-stable [#58290](https://github.com/ClickHouse/ClickHouse/pull/58290) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Fix test_storage_s3_queue/test.py::test_drop_table [#58293](https://github.com/ClickHouse/ClickHouse/pull/58293) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Handle another case for preprocessing in Keeper [#58308](https://github.com/ClickHouse/ClickHouse/pull/58308) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 53ad807c44b..5296a8426b0 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
+v23.10.6.60-stable	2024-01-05
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10

From 9d7912fa7559cf35a6ef6bf088833718dc165c6a Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:04:23 +0000
Subject: [PATCH 100/105] Update version_date.tsv and changelogs after
 v23.11.4.24-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v23.11.4.24-stable.md | 26 ++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  4 ++++
 5 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.11.4.24-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 145f5d13cc2..4b5e8cd3970 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26d65eb3ccc..452d8539a48 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5b96b208b11..0cefa3c14cb 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.11.4.24-stable.md b/docs/changelogs/v23.11.4.24-stable.md
new file mode 100644
index 00000000000..40096285b06
--- /dev/null
+++ b/docs/changelogs/v23.11.4.24-stable.md
@@ -0,0 +1,26 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.11.4.24-stable (e79d840d7fe) FIXME as compared to v23.11.3.23-stable (a14ab450b0e)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
+* Disable system.kafka_consumers by default (due to possible live memory leak) [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)).
+* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)).
+* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix parallel parsing for JSONCompactEachRow [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Handle another case for preprocessing in Keeper [#58308](https://github.com/ClickHouse/ClickHouse/pull/58308) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 53ad807c44b..79a8a16314e 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
+v23.10.6.60-stable	2024-01-05
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
@@ -13,6 +16,7 @@ v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.9.54-lts	2024-01-05
 v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08

From 5b9cc914db25a0ca89992f89c0a2b1d64102a6f1 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:11:11 +0000
Subject: [PATCH 101/105] Update version_date.tsv and changelogs after
 v23.8.9.54-lts

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.8.9.54-lts.md    | 47 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  5 +++
 5 files changed, 55 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.8.9.54-lts.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 145f5d13cc2..4b5e8cd3970 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26d65eb3ccc..452d8539a48 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5b96b208b11..0cefa3c14cb 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.8.9.54-lts.md b/docs/changelogs/v23.8.9.54-lts.md
new file mode 100644
index 00000000000..00607c60c39
--- /dev/null
+++ b/docs/changelogs/v23.8.9.54-lts.md
@@ -0,0 +1,47 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.8.9.54-lts (192a1d231fa) FIXME as compared to v23.8.8.20-lts (5e012a03bf2)
+
+#### Improvement
+* Backported in [#57668](https://github.com/ClickHouse/ClickHouse/issues/57668): Output valid JSON/XML on excetpion during HTTP query execution. Add setting `http_write_exception_in_output_format` to enable/disable this behaviour (enabled by default). [#52853](https://github.com/ClickHouse/ClickHouse/pull/52853) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#58491](https://github.com/ClickHouse/ClickHouse/issues/58491): Fix transfer query to MySQL compatible query. Fixes [#57253](https://github.com/ClickHouse/ClickHouse/issues/57253). Fixes [#52654](https://github.com/ClickHouse/ClickHouse/issues/52654). Fixes [#56729](https://github.com/ClickHouse/ClickHouse/issues/56729). [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)).
+* Backported in [#57238](https://github.com/ClickHouse/ClickHouse/issues/57238): Fetching a part waits when that part is fully committed on remote replica. It is better not send part in PreActive state. In case of zero copy this is mandatory restriction. [#56808](https://github.com/ClickHouse/ClickHouse/pull/56808) ([Sema Checherinda](https://github.com/CheSema)).
+* Backported in [#57655](https://github.com/ClickHouse/ClickHouse/issues/57655): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#57582](https://github.com/ClickHouse/ClickHouse/issues/57582): Fix issue caught in https://github.com/docker-library/official-images/pull/15846. [#57571](https://github.com/ClickHouse/ClickHouse/pull/57571) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)).
+* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)).
+* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
+* bugfix: correctly parse SYSTEM STOP LISTEN TCP SECURE [#57483](https://github.com/ClickHouse/ClickHouse/pull/57483) ([joelynch](https://github.com/joelynch)).
+* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities.  [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Disable system.kafka_consumers by default (due to possible live memory leak) [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)).
+* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)).
+* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)).
+* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix parallel parsing for JSONCompactEachRow [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Update PeekableWriteBuffer.cpp'. [#57701](https://github.com/ClickHouse/ClickHouse/pull/57701) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 53ad807c44b..b2983033e44 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
+v23.10.6.60-stable	2024-01-05
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
@@ -13,6 +16,7 @@ v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.9.54-lts	2024-01-05
 v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
@@ -41,6 +45,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.19.32-lts	2024-01-05
 v23.3.18.15-lts	2023-11-25
 v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08

From 0b04c5f68bc99a1a3b0175689f66f51ce21c073a Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:11:15 +0000
Subject: [PATCH 102/105] Update version_date.tsv and changelogs after
 v23.10.6.60-stable

---
 docker/keeper/Dockerfile              |  2 +-
 docker/server/Dockerfile.alpine       |  2 +-
 docker/server/Dockerfile.ubuntu       |  2 +-
 docs/changelogs/v23.10.6.60-stable.md | 51 +++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv  |  5 +++
 5 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.10.6.60-stable.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 145f5d13cc2..4b5e8cd3970 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26d65eb3ccc..452d8539a48 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5b96b208b11..0cefa3c14cb 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.10.6.60-stable.md b/docs/changelogs/v23.10.6.60-stable.md
new file mode 100644
index 00000000000..5e1c126e729
--- /dev/null
+++ b/docs/changelogs/v23.10.6.60-stable.md
@@ -0,0 +1,51 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.10.6.60-stable (68907bbe643) FIXME as compared to v23.10.5.20-stable (e84001e5c61)
+
+#### Improvement
+* Backported in [#58493](https://github.com/ClickHouse/ClickHouse/issues/58493): Fix transfer query to MySQL compatible query. Fixes [#57253](https://github.com/ClickHouse/ClickHouse/issues/57253). Fixes [#52654](https://github.com/ClickHouse/ClickHouse/issues/52654). Fixes [#56729](https://github.com/ClickHouse/ClickHouse/issues/56729). [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)).
+* Backported in [#57659](https://github.com/ClickHouse/ClickHouse/issues/57659): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#57586](https://github.com/ClickHouse/ClickHouse/issues/57586): Fix issue caught in https://github.com/docker-library/official-images/pull/15846. [#57571](https://github.com/ClickHouse/ClickHouse/pull/57571) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Flatten only true Nested type if flatten_nested=1, not all Array(Tuple) [#56132](https://github.com/ClickHouse/ClickHouse/pull/56132) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix ALTER COLUMN with ALIAS [#56493](https://github.com/ClickHouse/ClickHouse/pull/56493) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)).
+* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)).
+* Fix `ReadonlyReplica` metric for all cases [#57267](https://github.com/ClickHouse/ClickHouse/pull/57267) ([Antonio Andelic](https://github.com/antonio2368)).
+* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)).
+* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix function jsonMergePatch for partially const columns [#57379](https://github.com/ClickHouse/ClickHouse/pull/57379) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix working with read buffers in StreamingFormatExecutor [#57438](https://github.com/ClickHouse/ClickHouse/pull/57438) ([Kruglov Pavel](https://github.com/Avogar)).
+* bugfix: correctly parse SYSTEM STOP LISTEN TCP SECURE [#57483](https://github.com/ClickHouse/ClickHouse/pull/57483) ([joelynch](https://github.com/joelynch)).
+* Ignore ON CLUSTER clause in grant/revoke queries for management of replicated access entities.  [#57538](https://github.com/ClickHouse/ClickHouse/pull/57538) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Disable system.kafka_consumers by default (due to possible live memory leak) [#57822](https://github.com/ClickHouse/ClickHouse/pull/57822) ([Azat Khuzhin](https://github.com/azat)).
+* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)).
+* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)).
+* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix parallel parsing for JSONCompactEachRow [#58250](https://github.com/ClickHouse/ClickHouse/pull/58250) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix lost blobs after dropping a replica with broken detached parts [#58333](https://github.com/ClickHouse/ClickHouse/pull/58333) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* MergeTreePrefetchedReadPool disable for LIMIT only queries [#58505](https://github.com/ClickHouse/ClickHouse/pull/58505) ([Maksim Kita](https://github.com/kitaisreal)).
+
+#### NO CL CATEGORY
+
+* Backported in [#57916](https://github.com/ClickHouse/ClickHouse/issues/57916):. [#57909](https://github.com/ClickHouse/ClickHouse/pull/57909) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)).
+* Fix test_user_valid_until [#58409](https://github.com/ClickHouse/ClickHouse/pull/58409) ([Nikolay Degterinsky](https://github.com/evillique)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 53ad807c44b..b2983033e44 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
+v23.10.6.60-stable	2024-01-05
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
@@ -13,6 +16,7 @@ v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.9.54-lts	2024-01-05
 v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
@@ -41,6 +45,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.19.32-lts	2024-01-05
 v23.3.18.15-lts	2023-11-25
 v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08

From 21523820ab72c90a61b562095d6a9e2a5aa726f2 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 5 Jan 2024 14:14:00 +0000
Subject: [PATCH 103/105] Update version_date.tsv and changelogs after
 v23.3.19.32-lts

---
 docker/keeper/Dockerfile             |  2 +-
 docker/server/Dockerfile.alpine      |  2 +-
 docker/server/Dockerfile.ubuntu      |  2 +-
 docs/changelogs/v23.3.19.32-lts.md   | 36 ++++++++++++++++++++++++++++
 utils/list-versions/version_date.tsv |  5 ++++
 5 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelogs/v23.3.19.32-lts.md

diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 145f5d13cc2..4b5e8cd3970 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index 26d65eb3ccc..452d8539a48 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""
 
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 5b96b208b11..0cefa3c14cb 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.12.1.1368"
+ARG VERSION="23.12.2.59"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docs/changelogs/v23.3.19.32-lts.md b/docs/changelogs/v23.3.19.32-lts.md
new file mode 100644
index 00000000000..4604c986fe6
--- /dev/null
+++ b/docs/changelogs/v23.3.19.32-lts.md
@@ -0,0 +1,36 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.3.19.32-lts (c4d4ca8ec02) FIXME as compared to v23.3.18.15-lts (7228475d77a)
+
+#### Backward Incompatible Change
+* Backported in [#57840](https://github.com/ClickHouse/ClickHouse/issues/57840): Remove function `arrayFold` because it has a bug. This closes [#57816](https://github.com/ClickHouse/ClickHouse/issues/57816). This closes [#57458](https://github.com/ClickHouse/ClickHouse/issues/57458). [#57836](https://github.com/ClickHouse/ClickHouse/pull/57836) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Improvement
+* Backported in [#58489](https://github.com/ClickHouse/ClickHouse/issues/58489): Fix transfer query to MySQL compatible query. Fixes [#57253](https://github.com/ClickHouse/ClickHouse/issues/57253). Fixes [#52654](https://github.com/ClickHouse/ClickHouse/issues/52654). Fixes [#56729](https://github.com/ClickHouse/ClickHouse/issues/56729). [#56456](https://github.com/ClickHouse/ClickHouse/pull/56456) ([flynn](https://github.com/ucasfl)).
+* Backported in [#57653](https://github.com/ClickHouse/ClickHouse/issues/57653): Handle sigabrt case when getting PostgreSQl table structure with empty array. [#57618](https://github.com/ClickHouse/ClickHouse/pull/57618) ([Mike Kot (Михаил Кот)](https://github.com/myrrc)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#57580](https://github.com/ClickHouse/ClickHouse/issues/57580): Fix issue caught in https://github.com/docker-library/official-images/pull/15846. [#57571](https://github.com/ClickHouse/ClickHouse/pull/57571) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Prevent incompatible ALTER of projection columns [#56948](https://github.com/ClickHouse/ClickHouse/pull/56948) ([Amos Bird](https://github.com/amosbird)).
+* Fix segfault after ALTER UPDATE with Nullable MATERIALIZED column [#57147](https://github.com/ClickHouse/ClickHouse/pull/57147) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix incorrect JOIN plan optimization with partially materialized normal projection [#57196](https://github.com/ClickHouse/ClickHouse/pull/57196) ([Amos Bird](https://github.com/amosbird)).
+* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)).
+* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)).
+* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix invalid preprocessing on Keeper [#58069](https://github.com/ClickHouse/ClickHouse/pull/58069) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix Integer overflow in Poco::UTF32Encoding [#58073](https://github.com/ClickHouse/ClickHouse/pull/58073) ([Andrey Fedotov](https://github.com/anfedotoff)).
+* Remove parallel parsing for JSONCompactEachRow [#58181](https://github.com/ClickHouse/ClickHouse/pull/58181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Pin alpine version of integration tests helper container [#57669](https://github.com/ClickHouse/ClickHouse/pull/57669) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 53ad807c44b..b2983033e44 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,7 +1,10 @@
+v23.12.2.59-stable	2024-01-05
 v23.12.1.1368-stable	2023-12-28
+v23.11.4.24-stable	2024-01-05
 v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
+v23.10.6.60-stable	2024-01-05
 v23.10.5.20-stable	2023-11-25
 v23.10.4.25-stable	2023-11-17
 v23.10.3.5-stable	2023-11-10
@@ -13,6 +16,7 @@ v23.9.4.11-stable	2023-11-08
 v23.9.3.12-stable	2023-10-31
 v23.9.2.56-stable	2023-10-19
 v23.9.1.1854-stable	2023-09-29
+v23.8.9.54-lts	2024-01-05
 v23.8.8.20-lts	2023-11-25
 v23.8.7.24-lts	2023-11-17
 v23.8.6.16-lts	2023-11-08
@@ -41,6 +45,7 @@ v23.4.4.16-stable	2023-06-17
 v23.4.3.48-stable	2023-06-12
 v23.4.2.11-stable	2023-05-02
 v23.4.1.1943-stable	2023-04-27
+v23.3.19.32-lts	2024-01-05
 v23.3.18.15-lts	2023-11-25
 v23.3.17.13-lts	2023-11-17
 v23.3.16.7-lts	2023-11-08

From 149cd477988a326d4fc84a02b772b54409ee8f95 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <var1able@var1able.ru>
Date: Fri, 5 Jan 2024 18:31:49 +0100
Subject: [PATCH 104/105] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 283000f1804..0beb6f97af5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -375,6 +375,7 @@
 * Do not interpret the `send_timeout` set on the client side as the `receive_timeout` on the server side and vise-versa. [#56035](https://github.com/ClickHouse/ClickHouse/pull/56035) ([Azat Khuzhin](https://github.com/azat)).
 * Comparison of time intervals with different units will throw an exception. This closes [#55942](https://github.com/ClickHouse/ClickHouse/issues/55942). You might have occasionally rely on the previous behavior when the underlying numeric values were compared regardless of the units. [#56090](https://github.com/ClickHouse/ClickHouse/pull/56090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Rewrited the experimental `S3Queue` table engine completely: changed the way we keep information in zookeeper which allows to make less zookeeper requests, added caching of zookeeper state in cases when we know the state will not change, improved the polling from s3 process to make it less aggressive, changed the way ttl and max set for trached files is maintained, now it is a background process. Added `system.s3queue` and `system.s3queue_log` tables. Closes [#54998](https://github.com/ClickHouse/ClickHouse/issues/54998). [#54422](https://github.com/ClickHouse/ClickHouse/pull/54422) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Arbitrary parts on HTTP endpoint are no longer interpreted as a request to the `/query` endpoint. [#55521](https://github.com/ClickHouse/ClickHouse/pull/55521) ([Konstantin Bogdanov](https://github.com/thevar1able)).
 
 #### New Feature
 * Add function `arrayFold(accumulator, x1, ..., xn -> expression, initial, array1, ..., arrayn)` which applies a lambda function to multiple arrays of the same cardinality and collects the result in an accumulator. [#49794](https://github.com/ClickHouse/ClickHouse/pull/49794) ([Lirikl](https://github.com/Lirikl)).

From 33c143c21f326e6846726d74d7d145b911a39e74 Mon Sep 17 00:00:00 2001
From: Konstantin Bogdanov <var1able@var1able.ru>
Date: Fri, 5 Jan 2024 18:34:57 +0100
Subject: [PATCH 105/105] Typo

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0beb6f97af5..0355b21c962 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -375,7 +375,7 @@
 * Do not interpret the `send_timeout` set on the client side as the `receive_timeout` on the server side and vise-versa. [#56035](https://github.com/ClickHouse/ClickHouse/pull/56035) ([Azat Khuzhin](https://github.com/azat)).
 * Comparison of time intervals with different units will throw an exception. This closes [#55942](https://github.com/ClickHouse/ClickHouse/issues/55942). You might have occasionally rely on the previous behavior when the underlying numeric values were compared regardless of the units. [#56090](https://github.com/ClickHouse/ClickHouse/pull/56090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Rewrited the experimental `S3Queue` table engine completely: changed the way we keep information in zookeeper which allows to make less zookeeper requests, added caching of zookeeper state in cases when we know the state will not change, improved the polling from s3 process to make it less aggressive, changed the way ttl and max set for trached files is maintained, now it is a background process. Added `system.s3queue` and `system.s3queue_log` tables. Closes [#54998](https://github.com/ClickHouse/ClickHouse/issues/54998). [#54422](https://github.com/ClickHouse/ClickHouse/pull/54422) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* Arbitrary parts on HTTP endpoint are no longer interpreted as a request to the `/query` endpoint. [#55521](https://github.com/ClickHouse/ClickHouse/pull/55521) ([Konstantin Bogdanov](https://github.com/thevar1able)).
+* Arbitrary paths on HTTP endpoint are no longer interpreted as a request to the `/query` endpoint. [#55521](https://github.com/ClickHouse/ClickHouse/pull/55521) ([Konstantin Bogdanov](https://github.com/thevar1able)).
 
 #### New Feature
 * Add function `arrayFold(accumulator, x1, ..., xn -> expression, initial, array1, ..., arrayn)` which applies a lambda function to multiple arrays of the same cardinality and collects the result in an accumulator. [#49794](https://github.com/ClickHouse/ClickHouse/pull/49794) ([Lirikl](https://github.com/Lirikl)).