From 0d2b9fd0e5bb5ba50317260e7c3bcc2fea6f420c Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 22 Jun 2023 01:35:11 +0000
Subject: [PATCH 001/243] Add VALID UNTIL clause for users

---
 .../sql-reference/statements/create/user.md   | 11 +++
 src/Access/IAccessStorage.cpp                 |  8 +++
 src/Access/User.cpp                           |  3 +-
 src/Access/User.h                             |  1 +
 .../Access/InterpreterCreateUserQuery.cpp     | 38 ++++++++++-
 ...InterpreterShowCreateAccessEntityQuery.cpp |  8 +++
 src/Parsers/Access/ASTCreateUserQuery.cpp     |  8 +++
 src/Parsers/Access/ASTCreateUserQuery.h       |  2 +
 src/Parsers/Access/ParserCreateUserQuery.cpp  | 23 +++++++
 .../test_user_valid_until/__init__.py         |  0
 .../integration/test_user_valid_until/test.py | 68 +++++++++++++++++++
 11 files changed, 166 insertions(+), 4 deletions(-)
 create mode 100644 tests/integration/test_user_valid_until/__init__.py
 create mode 100644 tests/integration/test_user_valid_until/test.py
diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md
index d168be63c36..b32fa8dbeb0 100644
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@@ -14,6 +14,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
         [, name2 [ON CLUSTER cluster_name2] ...]
     [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}]
     [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
+    [VALID UNTIL datetime]
     [DEFAULT ROLE role [,...]]
     [DEFAULT DATABASE database | NONE]
     [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
@@ -135,6 +136,16 @@ Another way of specifying host is to use `@` syntax following the username. Exam
 ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so.
 :::
 
+## VALID UNTIL Clause
+
+Allows you to specify the expiration date and, optionally, the time for a user. It accepts a string as a parameter. It is recommended to use the `YYYY-MM-DD [hh:mm:ss] [timezone]` format for datetime.
+
+Examples:
+
+- `CREATE USER name1 VALID UNTIL '2025-01-01'`
+- `CREATE USER name1 VALID UNTIL '2025-01-01 12:00:00 UTC'`
+- `CREATE USER name1 VALID UNTIL 'infinity'`
+
 ## GRANTEES Clause
 
 Specifies users or roles which are allowed to receive [privileges](../../../sql-reference/statements/grant.md#grant-privileges) from this user on the condition this user has also all required access granted with [GRANT OPTION](../../../sql-reference/statements/grant.md#grant-privigele-syntax). Options of the `GRANTEES` clause:
diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp
index 9468e8d220a..cb628c3e559 100644
--- a/src/Access/IAccessStorage.cpp
+++ b/src/Access/IAccessStorage.cpp
@@ -514,6 +514,14 @@ bool IAccessStorage::areCredentialsValid(
     if (credentials.getUserName() != user.getName())
         return false;
 
+    if (user.valid_until)
+    {
+        const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
+
+        if (now > user.valid_until)
+            return false;
+    }
+
     return Authentication::areCredentialsValid(credentials, user.auth_data, external_authenticators);
 }
 
diff --git a/src/Access/User.cpp b/src/Access/User.cpp
index c5750cdcd6c..3b4055b6b1d 100644
--- a/src/Access/User.cpp
+++ b/src/Access/User.cpp
@@ -17,7 +17,8 @@ bool User::equal(const IAccessEntity & other) const
     const auto & other_user = typeid_cast<const User &>(other);
     return (auth_data == other_user.auth_data) && (allowed_client_hosts == other_user.allowed_client_hosts)
         && (access == other_user.access) && (granted_roles == other_user.granted_roles) && (default_roles == other_user.default_roles)
-        && (settings == other_user.settings) && (grantees == other_user.grantees) && (default_database == other_user.default_database);
+        && (settings == other_user.settings) && (grantees == other_user.grantees) && (default_database == other_user.default_database)
+        && (valid_until == other_user.valid_until);
 }
 
 void User::setName(const String & name_)
diff --git a/src/Access/User.h b/src/Access/User.h
index 4b4bf90137f..e4ab654dafd 100644
--- a/src/Access/User.h
+++ b/src/Access/User.h
@@ -23,6 +23,7 @@ struct User : public IAccessEntity
     SettingsProfileElements settings;
     RolesOrUsersSet grantees = RolesOrUsersSet::AllTag{};
     String default_database;
+    time_t valid_until = 0;
 
     bool equal(const IAccessEntity & other) const override;
     std::shared_ptr<IAccessEntity> clone() const override { return cloneImpl<User>(); }
diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp
index 165937560cc..fa68b1adc1a 100644
--- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp
+++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp
@@ -10,6 +10,10 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
 #include <boost/range/algorithm/copy.hpp>
+#include <Interpreters/evaluateConstantExpression.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+#include <IO/parseDateTimeBestEffort.h>
+#include <IO/ReadBufferFromString.h>
 
 
 namespace DB
@@ -28,6 +32,7 @@ namespace
         const std::optional<RolesOrUsersSet> & override_default_roles,
         const std::optional<SettingsProfileElements> & override_settings,
         const std::optional<RolesOrUsersSet> & override_grantees,
+        const std::optional<time_t> & valid_until,
         bool allow_implicit_no_password,
         bool allow_no_password,
         bool allow_plaintext_password)
@@ -61,6 +66,9 @@ namespace
             }
         }
 
+        if (valid_until)
+            user.valid_until = *valid_until;
+
         if (override_name && !override_name->host_pattern.empty())
         {
             user.allowed_client_hosts = AllowedClientHosts{};
@@ -116,6 +124,26 @@ BlockIO InterpreterCreateUserQuery::execute()
     if (query.auth_data)
         auth_data = AuthenticationData::fromAST(*query.auth_data, getContext(), !query.attach);
 
+    std::optional<time_t> valid_until;
+    if (query.valid_until)
+    {
+        const ASTPtr valid_until_literal = evaluateConstantExpressionAsLiteral(query.valid_until, getContext());
+        const String valid_until_str = checkAndGetLiteralArgument<String>(valid_until_literal, "valid_until");
+
+        time_t time = 0;
+
+        if (valid_until_str != "infinity")
+        {
+            const auto & time_zone = DateLUT::instance("");
+            const auto & utc_time_zone = DateLUT::instance("UTC");
+
+            ReadBufferFromString in(valid_until_str);
+            parseDateTimeBestEffort(time, in, time_zone, utc_time_zone);
+        }
+
+        valid_until = time;
+    }
+
     std::optional<RolesOrUsersSet> default_roles_from_query;
     if (query.default_roles)
     {
@@ -148,7 +176,9 @@ BlockIO InterpreterCreateUserQuery::execute()
         auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr
         {
             auto updated_user = typeid_cast<std::shared_ptr<User>>(entity->clone());
-            updateUserFromQueryImpl(*updated_user, query, auth_data, {}, default_roles_from_query, settings_from_query, grantees_from_query, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed);
+            updateUserFromQueryImpl(
+                *updated_user, query, auth_data, {}, default_roles_from_query, settings_from_query, grantees_from_query,
+                valid_until, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed);
             return updated_user;
         };
 
@@ -167,7 +197,9 @@ BlockIO InterpreterCreateUserQuery::execute()
         for (const auto & name : *query.names)
         {
             auto new_user = std::make_shared<User>();
-            updateUserFromQueryImpl(*new_user, query, auth_data, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed);
+            updateUserFromQueryImpl(
+                *new_user, query, auth_data, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{},
+                valid_until, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed);
             new_users.emplace_back(std::move(new_user));
         }
 
@@ -201,7 +233,7 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat
     if (query.auth_data)
         auth_data = AuthenticationData::fromAST(*query.auth_data, {}, !query.attach);
 
-    updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, allow_no_password, allow_plaintext_password, true);
+    updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, {}, allow_no_password, allow_plaintext_password, true);
 }
 
 }
diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp
index 7292892d3c1..ec2e60b2ef7 100644
--- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp
+++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp
@@ -10,6 +10,7 @@
 #include <Parsers/Access/ASTRolesOrUsersSet.h>
 #include <Parsers/Access/ASTSettingsProfileElement.h>
 #include <Parsers/Access/ASTRowPolicyName.h>
+#include <Parsers/ASTLiteral.h>
 #include <Parsers/ExpressionListParsers.h>
 #include <Parsers/formatAST.h>
 #include <Parsers/parseQuery.h>
@@ -65,6 +66,13 @@ namespace
         if (user.auth_data.getType() != AuthenticationType::NO_PASSWORD)
             query->auth_data = user.auth_data.toAST();
 
+        if (user.valid_until)
+        {
+            WriteBufferFromOwnString out;
+            writeDateTimeText(user.valid_until, out);
+            query->valid_until = std::make_shared<ASTLiteral>(out.str());
+        }
+
         if (!user.settings.empty())
         {
             if (attach_mode)
diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp
index 0611545adf0..d73d6243b8f 100644
--- a/src/Parsers/Access/ASTCreateUserQuery.cpp
+++ b/src/Parsers/Access/ASTCreateUserQuery.cpp
@@ -24,6 +24,11 @@ namespace
         auth_data.format(settings);
     }
 
+    void formatValidUntil(const IAST & valid_until, const IAST::FormatSettings & settings)
+    {
+        settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " VALID UNTIL " << (settings.hilite ? IAST::hilite_none : "");
+        valid_until.format(settings);
+    }
 
     void formatHosts(const char * prefix, const AllowedClientHosts & hosts, const IAST::FormatSettings & settings)
     {
@@ -216,6 +221,9 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState &
     if (auth_data)
         formatAuthenticationData(*auth_data, format);
 
+    if (valid_until)
+        formatValidUntil(*valid_until, format);
+
     if (hosts)
         formatHosts(nullptr, *hosts, format);
     if (add_hosts)
diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h
index 62ddbfd0040..f75d9b03de6 100644
--- a/src/Parsers/Access/ASTCreateUserQuery.h
+++ b/src/Parsers/Access/ASTCreateUserQuery.h
@@ -58,6 +58,8 @@ public:
 
     std::shared_ptr<ASTDatabaseOrNone> default_database;
 
+    ASTPtr valid_until;
+
     String getID(char) const override;
     ASTPtr clone() const override;
     void formatImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override;
diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp
index 0344fb99c04..550d9756aec 100644
--- a/src/Parsers/Access/ParserCreateUserQuery.cpp
+++ b/src/Parsers/Access/ParserCreateUserQuery.cpp
@@ -363,6 +363,19 @@ namespace
             return true;
         });
     }
+
+    bool parseValidUntil(IParserBase::Pos & pos, Expected & expected, ASTPtr & valid_until)
+    {
+        return IParserBase::wrapParseImpl(pos, [&]
+        {
+            if (!ParserKeyword{"VALID UNTIL"}.ignore(pos, expected))
+                return false;
+
+            ParserStringAndSubstitution until_p;
+
+            return until_p.parse(pos, valid_until, expected);
+        });
+    }
 }
 
 
@@ -413,6 +426,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     std::shared_ptr<ASTSettingsProfileElements> settings;
     std::shared_ptr<ASTRolesOrUsersSet> grantees;
     std::shared_ptr<ASTDatabaseOrNone> default_database;
+    ASTPtr valid_until;
     String cluster;
 
     while (true)
@@ -427,6 +441,11 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
             }
         }
 
+        if (!valid_until)
+        {
+            parseValidUntil(pos, expected, valid_until);
+        }
+
         AllowedClientHosts new_hosts;
         if (parseHosts(pos, expected, "", new_hosts))
         {
@@ -514,10 +533,14 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     query->settings = std::move(settings);
     query->grantees = std::move(grantees);
     query->default_database = std::move(default_database);
+    query->valid_until = std::move(valid_until);
 
     if (query->auth_data)
         query->children.push_back(query->auth_data);
 
+    if (query->valid_until)
+        query->children.push_back(query->valid_until);
+
     return true;
 }
 }
diff --git a/tests/integration/test_user_valid_until/__init__.py b/tests/integration/test_user_valid_until/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_user_valid_until/test.py b/tests/integration/test_user_valid_until/test.py
new file mode 100644
index 00000000000..787250e6005
--- /dev/null
+++ b/tests/integration/test_user_valid_until/test.py
@@ -0,0 +1,68 @@
+import pytest
+
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance("node")
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_basic(started_cluster):
+    # 1. Without VALID UNTIL
+    node.query("CREATE USER user_basic")
+
+    assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic\n"
+    assert node.query("SELECT 1", user="user_basic") == "1\n"
+
+    # 2. With valid VALID UNTIL
+    node.query("ALTER USER user_basic VALID UNTIL '06/11/2040 08:03:20 Z+3'")
+
+    assert (
+        node.query("SHOW CREATE USER user_basic")
+        == "CREATE USER user_basic VALID UNTIL \\'2040-11-06 05:03:20\\'\n"
+    )
+    assert node.query("SELECT 1", user="user_basic") == "1\n"
+
+    # 3. With invalid VALID UNTIL
+    node.query("ALTER USER user_basic VALID UNTIL '06/11/2010 08:03:20 Z+3'")
+
+    assert (
+        node.query("SHOW CREATE USER user_basic")
+        == "CREATE USER user_basic VALID UNTIL \\'2010-11-06 05:03:20\\'\n"
+    )
+
+    error = "Authentication failed"
+    assert error in node.query_and_get_error("SELECT 1", user="user_basic")
+
+    # 4. Reset VALID UNTIL
+    node.query("ALTER USER user_basic VALID UNTIL 'infinity'")
+
+    assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic\n"
+    assert node.query("SELECT 1", user="user_basic") == "1\n"
+
+
+def test_details(started_cluster):
+    # 1. Does not do anything
+    node.query("CREATE USER user_details_infinity VALID UNTIL 'infinity'")
+
+    assert (
+        node.query("SHOW CREATE USER user_details_infinity")
+        == "CREATE USER user_details_infinity\n"
+    )
+
+    # 2. Time only is not supported
+    node.query("CREATE USER user_details_time_only VALID UNTIL '22:03:40'")
+
+    assert (
+        node.query("SHOW CREATE USER user_details_time_only")
+        == "CREATE USER user_details_time_only VALID UNTIL \\'2000-01-01 22:03:40\\'\n"
+    )

From 46eda82cdcadd9432c65489ccd9a008284dab3a1 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Thu, 13 Jul 2023 19:24:30 +0800
Subject: [PATCH 002/243] new analyzer: move functions out of any

---
 src/Analyzer/Passes/AnyFunctionPass.cpp | 90 +++++++++++++++++++++++++
 src/Analyzer/Passes/AnyFunctionPass.h   | 25 +++++++
 src/Analyzer/QueryTreePassManager.cpp   |  3 +
 3 files changed, 118 insertions(+)
 create mode 100644 src/Analyzer/Passes/AnyFunctionPass.cpp
 create mode 100644 src/Analyzer/Passes/AnyFunctionPass.h

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
new file mode 100644
index 00000000000..bcec31eb851
--- /dev/null
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -0,0 +1,90 @@
+#include "AnyFunctionPass.h"
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/FunctionNode.h>
+#include <Analyzer/ConstantNode.h>
+
+namespace DB
+{
+
+namespace
+{
+
+class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>
+{
+public:
+    using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>;
+    using Base::Base;
+
+    void visitImpl(QueryTreeNodePtr & node)
+    {
+        if (!getSettings().optimize_move_functions_out_of_any)
+            return;
+
+        auto * function_node = node->as<FunctionNode>();
+        if (!function_node)
+            return;
+
+        auto is_any = [](const String & name) { return name == "any" || name == "anylast"; };
+
+        /// check function is any
+        auto lower_function_name = Poco::toLower(function_node->getFunctionName());
+        if (!is_any(lower_function_name))
+            return;
+
+        auto & arguments = function_node->getArguments().getNodes();
+        if (arguments.size() != 1)
+            return;
+
+        auto * inside_function_node = arguments[0]->as<FunctionNode>();
+        /// check argument is a function
+        if (!inside_function_node)
+            return;
+
+        auto & inside_arguments = inside_function_node->getArguments().getNodes();
+
+        /// case any(f())
+        if (inside_arguments.empty())
+            return;
+
+        /// checking done, rewrite function
+        bool pushed = false;
+        for (auto & inside_argument : inside_arguments)
+        {
+            if (inside_argument->as<ConstantNode>()) /// skip constant node
+                break;
+
+            AggregateFunctionProperties properties;
+            auto aggregate_function = AggregateFunctionFactory::instance().get(lower_function_name, {inside_argument->getResultType()}, {}, properties);
+
+            auto any_function = std::make_shared<FunctionNode>(lower_function_name);
+            any_function->resolveAsAggregateFunction(std::move(aggregate_function));
+            any_function->setAlias(inside_argument->getAlias());
+
+            auto & any_function_arguments = any_function->getArguments().getNodes();
+            any_function_arguments.push_back(std::move(inside_argument));
+            inside_argument = std::move(any_function);
+
+            pushed = true;
+        }
+
+        if (pushed)
+        {
+            arguments[0]->setAlias(node->getAlias());
+            node = arguments[0];
+        }
+    }
+};
+
+}
+
+void AnyFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+{
+    AnyFunctionVisitor visitor(std::move(context));
+    visitor.visit(query_tree_node);
+}
+
+}
diff --git a/src/Analyzer/Passes/AnyFunctionPass.h b/src/Analyzer/Passes/AnyFunctionPass.h
new file mode 100644
index 00000000000..0ed83125796
--- /dev/null
+++ b/src/Analyzer/Passes/AnyFunctionPass.h
@@ -0,0 +1,25 @@
+#include <Analyzer/IQueryTreePass.h>
+
+namespace DB
+{
+
+/** Rewrite 'any' and 'anyLast' functions pushing them inside original function.
+  *
+  * Example: any(f(x, y, g(z)))
+  * Result: f(any(x), any(y), g(any(z)))
+  */
+class AnyFunctionPass final : public IQueryTreePass
+{
+public:
+    String getName() override { return "AnyFunction"; }
+
+    String getDescription() override
+    {
+        return "Rewrite 'any' and 'anyLast' functions pushing them inside original function.";
+    }
+
+    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+
+};
+
+}
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index a6da2a66615..0ccf56c96c0 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -42,6 +42,7 @@
 #include <Analyzer/Passes/CrossToInnerJoinPass.h>
 #include <Analyzer/Passes/ShardNumColumnToFunctionPass.h>
 #include <Analyzer/Passes/ConvertQueryToCNFPass.h>
+#include <Analyzer/Passes/AnyFunctionPass.h>
 
 namespace DB
 {
@@ -278,6 +279,8 @@ void addQueryTreePasses(QueryTreePassManager & manager)
     manager.addPass(std::make_unique<AutoFinalOnQueryPass>());
     manager.addPass(std::make_unique<CrossToInnerJoinPass>());
     manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
+
+    manager.addPass(std::make_unique<AnyFunctionPass>());
 }
 
 }

From f8b4bbcd23ae20b032008a4d25d2787ea4ea11f1 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Fri, 14 Jul 2023 09:50:26 +0800
Subject: [PATCH 003/243] fix style

---
 src/Analyzer/Passes/AnyFunctionPass.cpp | 10 ++++------
 src/Analyzer/Passes/AnyFunctionPass.h   |  2 ++
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
index bcec31eb851..1fbf3479d3d 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.cpp
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -28,11 +28,9 @@ public:
         if (!function_node)
             return;
 
-        auto is_any = [](const String & name) { return name == "any" || name == "anylast"; };
-
         /// check function is any
-        auto lower_function_name = Poco::toLower(function_node->getFunctionName());
-        if (!is_any(lower_function_name))
+        const auto & function_name = function_node->getFunctionName();
+        if (!(function_name == "any" || function_name == "anyLast"))
             return;
 
         auto & arguments = function_node->getArguments().getNodes();
@@ -58,9 +56,9 @@ public:
                 break;
 
             AggregateFunctionProperties properties;
-            auto aggregate_function = AggregateFunctionFactory::instance().get(lower_function_name, {inside_argument->getResultType()}, {}, properties);
+            auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, {inside_argument->getResultType()}, {}, properties);
 
-            auto any_function = std::make_shared<FunctionNode>(lower_function_name);
+            auto any_function = std::make_shared<FunctionNode>(function_name);
             any_function->resolveAsAggregateFunction(std::move(aggregate_function));
             any_function->setAlias(inside_argument->getAlias());
 
diff --git a/src/Analyzer/Passes/AnyFunctionPass.h b/src/Analyzer/Passes/AnyFunctionPass.h
index 0ed83125796..0cc65d238dd 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.h
+++ b/src/Analyzer/Passes/AnyFunctionPass.h
@@ -1,3 +1,5 @@
+#pragma once
+
 #include <Analyzer/IQueryTreePass.h>
 
 namespace DB

From eb6c1cb549e53e0b181a024c943ea4c0ef8e593e Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Fri, 14 Jul 2023 16:01:17 +0800
Subject: [PATCH 004/243] add tests

---
 src/Analyzer/Passes/AnyFunctionPass.cpp       |  23 +++-
 ...3_analyzer_push_any_to_functions.reference | 124 ++++++++++++++++++
 .../02813_analyzer_push_any_to_functions.sql  |  33 +++++
 3 files changed, 175 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
 create mode 100644 tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
index 1fbf3479d3d..aada2d3a4a7 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.cpp
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -38,8 +38,10 @@ public:
             return;
 
         auto * inside_function_node = arguments[0]->as<FunctionNode>();
-        /// check argument is a function
-        if (!inside_function_node)
+
+        /// check argument is a function and can not be arrayJoin or lambda
+        if (!inside_function_node || inside_function_node->getFunctionName() == "arrayJoin"
+            || inside_function_node->getFunctionName() == "lambda")
             return;
 
         auto & inside_arguments = inside_function_node->getArguments().getNodes();
@@ -48,6 +50,12 @@ public:
         if (inside_arguments.empty())
             return;
 
+        if (rewritten.count(node.get()))
+        {
+            node = rewritten.at(node.get());
+            return;
+        }
+
         /// checking done, rewrite function
         bool pushed = false;
         for (auto & inside_argument : inside_arguments)
@@ -60,21 +68,26 @@ public:
 
             auto any_function = std::make_shared<FunctionNode>(function_name);
             any_function->resolveAsAggregateFunction(std::move(aggregate_function));
-            any_function->setAlias(inside_argument->getAlias());
 
             auto & any_function_arguments = any_function->getArguments().getNodes();
             any_function_arguments.push_back(std::move(inside_argument));
-            inside_argument = std::move(any_function);
 
+            inside_argument = std::move(any_function);
             pushed = true;
         }
 
         if (pushed)
         {
-            arguments[0]->setAlias(node->getAlias());
+            rewritten.insert({node.get(), arguments[0]});
             node = arguments[0];
         }
     }
+
+private:
+    /// After query analysis alias will be rewritten to QueryTreeNode
+    /// whose memory address is same with the original one.
+    /// So we can reuse the rewritten one.
+    std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr > rewritten;
 };
 
 }
diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
new file mode 100644
index 00000000000..025c04af1da
--- /dev/null
+++ b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
@@ -0,0 +1,124 @@
+-- { echoOn }
+SET optimize_move_functions_out_of_any = 1;
+EXPLAIN QUERY TREE SELECT any(number + number * 2) FROM numbers(1, 2);
+QUERY id: 0
+  PROJECTION COLUMNS
+    any(plus(number, multiply(number, 2))) UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64
+        ARGUMENTS
+          LIST id: 3, nodes: 2
+            FUNCTION id: 4, function_name: any, function_type: aggregate, result_type: UInt64
+              ARGUMENTS
+                LIST id: 5, nodes: 1
+                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
+            FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64
+              ARGUMENTS
+                LIST id: 9, nodes: 2
+                  FUNCTION id: 10, function_name: any, function_type: aggregate, result_type: UInt64
+                    ARGUMENTS
+                      LIST id: 11, nodes: 1
+                        COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
+                  CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
+  JOIN TREE
+    TABLE_FUNCTION id: 7, table_function_name: numbers
+      ARGUMENTS
+        LIST id: 13, nodes: 2
+          CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8
+          CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8
+SELECT any(number + number * 2) FROM numbers(1, 2);
+3
+EXPLAIN QUERY TREE SELECT anyLast(number + number * 2) FROM numbers(1, 2);
+QUERY id: 0
+  PROJECTION COLUMNS
+    anyLast(plus(number, multiply(number, 2))) UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: plus, function_type: ordinary, result_type: UInt64
+        ARGUMENTS
+          LIST id: 3, nodes: 2
+            FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
+              ARGUMENTS
+                LIST id: 5, nodes: 1
+                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
+            FUNCTION id: 8, function_name: multiply, function_type: ordinary, result_type: UInt64
+              ARGUMENTS
+                LIST id: 9, nodes: 2
+                  FUNCTION id: 10, function_name: anyLast, function_type: aggregate, result_type: UInt64
+                    ARGUMENTS
+                      LIST id: 11, nodes: 1
+                        COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
+                  CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8
+  JOIN TREE
+    TABLE_FUNCTION id: 7, table_function_name: numbers
+      ARGUMENTS
+        LIST id: 13, nodes: 2
+          CONSTANT id: 14, constant_value: UInt64_1, constant_value_type: UInt8
+          CONSTANT id: 15, constant_value: UInt64_2, constant_value_type: UInt8
+SELECT anyLast(number + number * 2) FROM numbers(1, 2);
+6
+EXPLAIN QUERY TREE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
+QUERY id: 0
+  PROJECTION COLUMNS
+    x UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
+        ARGUMENTS
+          LIST id: 3, nodes: 2
+            FUNCTION id: 4, function_name: any, function_type: aggregate, result_type: UInt64
+              ARGUMENTS
+                LIST id: 5, nodes: 1
+                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
+            CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
+  JOIN TREE
+    TABLE_FUNCTION id: 7, table_function_name: numbers
+      ARGUMENTS
+        LIST id: 9, nodes: 2
+          CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
+          CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8
+WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
+3
+EXPLAIN QUERY TREE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
+QUERY id: 0
+  PROJECTION COLUMNS
+    x UInt64
+    x UInt64
+  PROJECTION
+    LIST id: 1, nodes: 2
+      FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
+        ARGUMENTS
+          LIST id: 3, nodes: 2
+            FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
+              ARGUMENTS
+                LIST id: 5, nodes: 1
+                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
+            CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
+      FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
+        ARGUMENTS
+          LIST id: 3, nodes: 2
+            FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
+              ARGUMENTS
+                LIST id: 5, nodes: 1
+                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
+            CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
+  JOIN TREE
+    TABLE_FUNCTION id: 7, table_function_name: numbers
+      ARGUMENTS
+        LIST id: 9, nodes: 2
+          CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
+          CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8
+SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
+6	6
+SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
+SET optimize_move_functions_out_of_any = 0;
+SELECT any(number + number * 2) FROM numbers(1, 2);
+3
+SELECT anyLast(number + number * 2) FROM numbers(1, 2);
+6
+WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
+3
+SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
+6	6
+SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql
new file mode 100644
index 00000000000..c9707d10fde
--- /dev/null
+++ b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.sql
@@ -0,0 +1,33 @@
+SET allow_experimental_analyzer = 1;
+
+-- { echoOn }
+SET optimize_move_functions_out_of_any = 1;
+
+EXPLAIN QUERY TREE SELECT any(number + number * 2) FROM numbers(1, 2);
+SELECT any(number + number * 2) FROM numbers(1, 2);
+
+EXPLAIN QUERY TREE SELECT anyLast(number + number * 2) FROM numbers(1, 2);
+SELECT anyLast(number + number * 2) FROM numbers(1, 2);
+
+EXPLAIN QUERY TREE WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
+WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
+
+EXPLAIN QUERY TREE SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
+SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
+
+SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
+
+
+
+SET optimize_move_functions_out_of_any = 0;
+
+SELECT any(number + number * 2) FROM numbers(1, 2);
+
+SELECT anyLast(number + number * 2) FROM numbers(1, 2);
+
+WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
+
+SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
+
+SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
+-- { echoOff }

From a2dce9663e488841b8407e7556a0eb55da758790 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Fri, 14 Jul 2023 17:27:32 +0800
Subject: [PATCH 005/243] skip rewriting for lambda and arrayJoin

---
 src/Analyzer/Passes/AnyFunctionPass.cpp | 54 +++++++++++++++++++++----
 1 file changed, 46 insertions(+), 8 deletions(-)

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
index aada2d3a4a7..b785df7fb05 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.cpp
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -5,7 +5,9 @@
 
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/FunctionNode.h>
+#include <Analyzer/LambdaNode.h>
 #include <Analyzer/ConstantNode.h>
+#include <Analyzer/ArrayJoinNode.h>
 
 namespace DB
 {
@@ -15,6 +17,39 @@ namespace
 
 class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>
 {
+private:
+    bool canRewrite(const FunctionNode * function_node)
+    {
+        for (auto & argument : function_node->getArguments().getNodes())
+        {
+            /// arrayJoin() is special and should not be optimized (think about
+            /// it as a an aggregate function), otherwise wrong result will be
+            /// produced:
+            ///     SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
+            ///     ┌─number─┬─arrayJoin(array(array(), array()))─┐
+            ///     │      0 │ []                                 │
+            ///     │      0 │ []                                 │
+            ///     └────────┴────────────────────────────────────┘
+            /// While should be:
+            ///     ┌─number─┬─any(arrayJoin(array(array(), array())))─┐
+            ///     │      0 │ []                                      │
+            ///     └────────┴─────────────────────────────────────────┘
+            if (argument->as<LambdaNode>())
+                return false;
+
+            if (argument->as<ArrayJoinNode>())
+                return false;
+
+            if (const auto * inside_function = argument->as<FunctionNode>())
+            {
+                if (!canRewrite(inside_function))
+                    return false;
+            }
+        }
+
+        return true;
+    }
+
 public:
     using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>;
     using Base::Base;
@@ -24,6 +59,12 @@ public:
         if (!getSettings().optimize_move_functions_out_of_any)
             return;
 
+        if (rewritten.count(node.get()))
+        {
+            node = rewritten.at(node.get());
+            return;
+        }
+
         auto * function_node = node->as<FunctionNode>();
         if (!function_node)
             return;
@@ -40,8 +81,11 @@ public:
         auto * inside_function_node = arguments[0]->as<FunctionNode>();
 
         /// check argument is a function and can not be arrayJoin or lambda
-        if (!inside_function_node || inside_function_node->getFunctionName() == "arrayJoin"
-            || inside_function_node->getFunctionName() == "lambda")
+        if (!inside_function_node)
+            return;
+
+        /// check arguments can not contain arrayJoin or lambda
+        if (!canRewrite(inside_function_node))
             return;
 
         auto & inside_arguments = inside_function_node->getArguments().getNodes();
@@ -50,12 +94,6 @@ public:
         if (inside_arguments.empty())
             return;
 
-        if (rewritten.count(node.get()))
-        {
-            node = rewritten.at(node.get());
-            return;
-        }
-
         /// checking done, rewrite function
         bool pushed = false;
         for (auto & inside_argument : inside_arguments)

From cbd4358bac116cf7dd184b5f48978ae34c2d105e Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Fri, 14 Jul 2023 17:29:33 +0800
Subject: [PATCH 006/243] fix special build error

---
 src/Analyzer/Passes/AnyFunctionPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
index b785df7fb05..6aba5a6cfae 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.cpp
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -59,7 +59,7 @@ public:
         if (!getSettings().optimize_move_functions_out_of_any)
             return;
 
-        if (rewritten.count(node.get()))
+        if (rewritten.contains(node.get()))
         {
             node = rewritten.at(node.get());
             return;

From 7ae0c3425f712ea0ca7bc9e5fb2daa547132e149 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Fri, 14 Jul 2023 18:30:37 +0800
Subject: [PATCH 007/243] fix test error

---
 src/Analyzer/Passes/AnyFunctionPass.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
index 6aba5a6cfae..63221a4d197 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.cpp
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -59,12 +59,6 @@ public:
         if (!getSettings().optimize_move_functions_out_of_any)
             return;
 
-        if (rewritten.contains(node.get()))
-        {
-            node = rewritten.at(node.get());
-            return;
-        }
-
         auto * function_node = node->as<FunctionNode>();
         if (!function_node)
             return;
@@ -94,6 +88,12 @@ public:
         if (inside_arguments.empty())
             return;
 
+        if (rewritten.contains(node.get()))
+        {
+            node = rewritten.at(node.get());
+            return;
+        }
+
         /// checking done, rewrite function
         bool pushed = false;
         for (auto & inside_argument : inside_arguments)

From f1044386ddf0beb7e7f80668a53d815377078c32 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Fri, 14 Jul 2023 19:42:56 +0800
Subject: [PATCH 008/243] fix style

---
 src/Analyzer/Passes/AnyFunctionPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
index 63221a4d197..28de49cb9e9 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.cpp
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -20,7 +20,7 @@ class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunction
 private:
     bool canRewrite(const FunctionNode * function_node)
     {
-        for (auto & argument : function_node->getArguments().getNodes())
+        for (const auto & argument : function_node->getArguments().getNodes())
         {
             /// arrayJoin() is special and should not be optimized (think about
             /// it as a an aggregate function), otherwise wrong result will be

From 4cd6737cafb9a8c2a833fcf53905ae191c27e199 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Wed, 19 Jul 2023 15:42:47 +0800
Subject: [PATCH 009/243] little optimization

---
 src/Analyzer/Passes/AnyFunctionPass.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
index 28de49cb9e9..f361b89f022 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.cpp
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -15,7 +15,7 @@ namespace DB
 namespace
 {
 
-class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>
+class AnyFunctionVisitor : public InDepthQueryTreeVisitor<AnyFunctionVisitor>
 {
 private:
     bool canRewrite(const FunctionNode * function_node)
@@ -51,14 +51,11 @@ private:
     }
 
 public:
-    using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>;
+    using Base = InDepthQueryTreeVisitor<AnyFunctionVisitor>;
     using Base::Base;
 
     void visitImpl(QueryTreeNodePtr & node)
     {
-        if (!getSettings().optimize_move_functions_out_of_any)
-            return;
-
         auto * function_node = node->as<FunctionNode>();
         if (!function_node)
             return;
@@ -132,7 +129,10 @@ private:
 
 void AnyFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
 {
-    AnyFunctionVisitor visitor(std::move(context));
+    if (!context->getSettings().optimize_move_functions_out_of_any)
+        return;
+
+    AnyFunctionVisitor visitor;
     visitor.visit(query_tree_node);
 }
 

From 98a30d635c456469ace74bb0b09db681bdd6c672 Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Thu, 27 Jul 2023 17:46:01 +0800
Subject: [PATCH 010/243] remove rewritten

---
 src/Analyzer/Passes/AnyFunctionPass.cpp       | 58 ++++++-------------
 ...3_analyzer_push_any_to_functions.reference | 34 +++++------
 2 files changed, 32 insertions(+), 60 deletions(-)

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
index f361b89f022..5fd6beec4d8 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.cpp
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -1,4 +1,4 @@
-#include "AnyFunctionPass.h"
+#include <Analyzer/Passes/AnyFunctionPass.h>
 
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/IAggregateFunction.h>
@@ -7,7 +7,6 @@
 #include <Analyzer/FunctionNode.h>
 #include <Analyzer/LambdaNode.h>
 #include <Analyzer/ConstantNode.h>
-#include <Analyzer/ArrayJoinNode.h>
 
 namespace DB
 {
@@ -15,30 +14,27 @@ namespace DB
 namespace
 {
 
-class AnyFunctionVisitor : public InDepthQueryTreeVisitor<AnyFunctionVisitor>
+class AnyFunctionVisitor : public InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>
 {
 private:
     bool canRewrite(const FunctionNode * function_node)
     {
         for (const auto & argument : function_node->getArguments().getNodes())
         {
-            /// arrayJoin() is special and should not be optimized (think about
-            /// it as a an aggregate function), otherwise wrong result will be
-            /// produced:
+            if (argument->as<LambdaNode>())
+                return false;
+
+            /// Function arrayJoin is special and should be skipped (think about it as a
+            /// an aggregate function), otherwise wrong result will be produced.
+            /// For example:
             ///     SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
             ///     ┌─number─┬─arrayJoin(array(array(), array()))─┐
             ///     │      0 │ []                                 │
             ///     │      0 │ []                                 │
             ///     └────────┴────────────────────────────────────┘
-            /// While should be:
-            ///     ┌─number─┬─any(arrayJoin(array(array(), array())))─┐
-            ///     │      0 │ []                                      │
-            ///     └────────┴─────────────────────────────────────────┘
-            if (argument->as<LambdaNode>())
-                return false;
-
-            if (argument->as<ArrayJoinNode>())
-                return false;
+            if (const auto * inside_function = argument->as<FunctionNode>())
+                if (inside_function->getFunctionName() == "arrayJoin")
+                    return false;
 
             if (const auto * inside_function = argument->as<FunctionNode>())
             {
@@ -51,11 +47,14 @@ private:
     }
 
 public:
-    using Base = InDepthQueryTreeVisitor<AnyFunctionVisitor>;
+    using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>;
     using Base::Base;
 
     void visitImpl(QueryTreeNodePtr & node)
     {
+        if (!getSettings().optimize_move_functions_out_of_any)
+            return;
+
         auto * function_node = node->as<FunctionNode>();
         if (!function_node)
             return;
@@ -71,7 +70,7 @@ public:
 
         auto * inside_function_node = arguments[0]->as<FunctionNode>();
 
-        /// check argument is a function and can not be arrayJoin or lambda
+        /// check argument is a function
         if (!inside_function_node)
             return;
 
@@ -85,14 +84,7 @@ public:
         if (inside_arguments.empty())
             return;
 
-        if (rewritten.contains(node.get()))
-        {
-            node = rewritten.at(node.get());
-            return;
-        }
-
         /// checking done, rewrite function
-        bool pushed = false;
         for (auto & inside_argument : inside_arguments)
         {
             if (inside_argument->as<ConstantNode>()) /// skip constant node
@@ -108,31 +100,17 @@ public:
             any_function_arguments.push_back(std::move(inside_argument));
 
             inside_argument = std::move(any_function);
-            pushed = true;
-        }
-
-        if (pushed)
-        {
-            rewritten.insert({node.get(), arguments[0]});
-            node = arguments[0];
         }
+        node = arguments[0];
     }
 
-private:
-    /// After query analysis alias will be rewritten to QueryTreeNode
-    /// whose memory address is same with the original one.
-    /// So we can reuse the rewritten one.
-    std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr > rewritten;
 };
 
 }
 
 void AnyFunctionPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
 {
-    if (!context->getSettings().optimize_move_functions_out_of_any)
-        return;
-
-    AnyFunctionVisitor visitor;
+    AnyFunctionVisitor visitor(context);
     visitor.visit(query_tree_node);
 }
 
diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
index 025c04af1da..3afb2cc353f 100644
--- a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
+++ b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
@@ -93,32 +93,26 @@ QUERY id: 0
             FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
               ARGUMENTS
                 LIST id: 5, nodes: 1
-                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
-            CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
+                  FUNCTION id: 6, function_name: anyLast, function_type: aggregate, result_type: UInt64
+                    ARGUMENTS
+                      LIST id: 7, nodes: 1
+                        COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
+            CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
       FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
         ARGUMENTS
           LIST id: 3, nodes: 2
             FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
               ARGUMENTS
                 LIST id: 5, nodes: 1
-                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
-            CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
+                  FUNCTION id: 6, function_name: anyLast, function_type: aggregate, result_type: UInt64
+                    ARGUMENTS
+                      LIST id: 7, nodes: 1
+                        COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
+            CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 7, table_function_name: numbers
+    TABLE_FUNCTION id: 9, table_function_name: numbers
       ARGUMENTS
-        LIST id: 9, nodes: 2
-          CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
-          CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8
+        LIST id: 11, nodes: 2
+          CONSTANT id: 12, constant_value: UInt64_1, constant_value_type: UInt8
+          CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8
 SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-6	6
-SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
-SET optimize_move_functions_out_of_any = 0;
-SELECT any(number + number * 2) FROM numbers(1, 2);
-3
-SELECT anyLast(number + number * 2) FROM numbers(1, 2);
-6
-WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
-3
-SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
-6	6
-SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }

From 1aac8bf12933a77ab52689e2090ff2a59f7dc0bc Mon Sep 17 00:00:00 2001
From: JackyWoo <wuchienchao@qq.com>
Date: Thu, 3 Aug 2023 11:46:26 +0800
Subject: [PATCH 011/243] fix tests

---
 src/Analyzer/Passes/AnyFunctionPass.cpp       | 23 +++++++++++--
 ...3_analyzer_push_any_to_functions.reference | 34 +++++++++++--------
 2 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
index 5fd6beec4d8..5fa709e71cf 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.cpp
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -50,7 +50,7 @@ public:
     using Base = InDepthQueryTreeVisitorWithContext<AnyFunctionVisitor>;
     using Base::Base;
 
-    void visitImpl(QueryTreeNodePtr & node)
+    void enterImpl(QueryTreeNodePtr & node)
     {
         if (!getSettings().optimize_move_functions_out_of_any)
             return;
@@ -84,7 +84,14 @@ public:
         if (inside_arguments.empty())
             return;
 
+        if (rewritten.contains(node.get()))
+        {
+            node = rewritten.at(node.get());
+            return;
+        }
+
         /// checking done, rewrite function
+        bool pushed = false;
         for (auto & inside_argument : inside_arguments)
         {
             if (inside_argument->as<ConstantNode>()) /// skip constant node
@@ -100,10 +107,22 @@ public:
             any_function_arguments.push_back(std::move(inside_argument));
 
             inside_argument = std::move(any_function);
+            pushed = true;
+        }
+
+        if (pushed)
+        {
+            rewritten.insert({node.get(), arguments[0]});
+            node = arguments[0];
         }
-        node = arguments[0];
     }
 
+private:
+    /// After query analysis alias will be rewritten to QueryTreeNode
+    /// whose memory address is same with the original one.
+    /// So we can reuse the rewritten one.
+    std::unordered_map<IQueryTreeNode *, QueryTreeNodePtr > rewritten;
+
 };
 
 }
diff --git a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
index 3afb2cc353f..025c04af1da 100644
--- a/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
+++ b/tests/queries/0_stateless/02813_analyzer_push_any_to_functions.reference
@@ -93,26 +93,32 @@ QUERY id: 0
             FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
               ARGUMENTS
                 LIST id: 5, nodes: 1
-                  FUNCTION id: 6, function_name: anyLast, function_type: aggregate, result_type: UInt64
-                    ARGUMENTS
-                      LIST id: 7, nodes: 1
-                        COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
-            CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
+                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
+            CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
       FUNCTION id: 2, function_name: multiply, function_type: ordinary, result_type: UInt64
         ARGUMENTS
           LIST id: 3, nodes: 2
             FUNCTION id: 4, function_name: anyLast, function_type: aggregate, result_type: UInt64
               ARGUMENTS
                 LIST id: 5, nodes: 1
-                  FUNCTION id: 6, function_name: anyLast, function_type: aggregate, result_type: UInt64
-                    ARGUMENTS
-                      LIST id: 7, nodes: 1
-                        COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9
-            CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
+                  COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7
+            CONSTANT id: 8, constant_value: UInt64_3, constant_value_type: UInt8
   JOIN TREE
-    TABLE_FUNCTION id: 9, table_function_name: numbers
+    TABLE_FUNCTION id: 7, table_function_name: numbers
       ARGUMENTS
-        LIST id: 11, nodes: 2
-          CONSTANT id: 12, constant_value: UInt64_1, constant_value_type: UInt8
-          CONSTANT id: 13, constant_value: UInt64_2, constant_value_type: UInt8
+        LIST id: 9, nodes: 2
+          CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
+          CONSTANT id: 11, constant_value: UInt64_2, constant_value_type: UInt8
 SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
+6	6
+SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }
+SET optimize_move_functions_out_of_any = 0;
+SELECT any(number + number * 2) FROM numbers(1, 2);
+3
+SELECT anyLast(number + number * 2) FROM numbers(1, 2);
+6
+WITH any(number * 3) AS x SELECT x FROM numbers(1, 2);
+3
+SELECT anyLast(number * 3) AS x, x FROM numbers(1, 2);
+6	6
+SELECT any(anyLast(number)) FROM numbers(1); -- { serverError 184 }

From 154b3a1ae4867329206ac6a5d5ef12dc1e9d1685 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 16 Aug 2023 21:17:34 +0000
Subject: [PATCH 012/243] preserve previous steps output

---
 .../MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp         | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
index 7ca8983bfda..77ae9fa25ad 100644
--- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
+++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
@@ -239,6 +239,8 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction
     };
     std::vector<Step> steps;
 
+    NameSet all_output_names;
+
     OriginalToNewNodeMap node_remap;
 
     for (const auto & condition_group : condition_groups)
@@ -285,13 +287,13 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction
         }
 
         steps.push_back({step_dag, result_name});
+        all_output_names.insert(result_name);
     }
 
     /// 6. Find all outputs of the original DAG
     auto original_outputs = prewhere_info->prewhere_actions->getOutputs();
     /// 7. Find all outputs that were computed in the already built DAGs, mark these nodes as outputs in the steps where they were computed
     /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4
-    NameSet all_output_names;
     for (const auto * output : original_outputs)
     {
         all_output_names.insert(output->result_name);

From 91929fb7e9f05430733f83128488c0cc269146e9 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Thu, 17 Aug 2023 01:29:23 +0000
Subject: [PATCH 013/243] test is added

---
 .../0_stateless/02845_prewhere_preserve_column.reference | 1 +
 .../0_stateless/02845_prewhere_preserve_column.sql       | 9 +++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 tests/queries/0_stateless/02845_prewhere_preserve_column.reference
 create mode 100644 tests/queries/0_stateless/02845_prewhere_preserve_column.sql

diff --git a/tests/queries/0_stateless/02845_prewhere_preserve_column.reference b/tests/queries/0_stateless/02845_prewhere_preserve_column.reference
new file mode 100644
index 00000000000..89b3f8277f1
--- /dev/null
+++ b/tests/queries/0_stateless/02845_prewhere_preserve_column.reference
@@ -0,0 +1 @@
+6	6	6
diff --git a/tests/queries/0_stateless/02845_prewhere_preserve_column.sql b/tests/queries/0_stateless/02845_prewhere_preserve_column.sql
new file mode 100644
index 00000000000..8f791d8b9ef
--- /dev/null
+++ b/tests/queries/0_stateless/02845_prewhere_preserve_column.sql
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS 02845_prewhere;
+
+SET move_all_conditions_to_prewhere = 1;
+
+CREATE TABLE  02845_prewhere  ( e String, c String, q String ) ENGINE = MergeTree ORDER BY tuple();
+
+INSERT INTO 02845_prewhere SELECT number, number, number from numbers(10);
+
+SELECT * FROM (SELECT * FROM 02845_prewhere WHERE e = '5' OR q = '6') WHERE (q = '6');

From 01cb96d39a628fa88514467340feac11b36f51e3 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 17 Aug 2023 15:27:24 +0000
Subject: [PATCH 014/243] Leave only necessary conditions in output

---
 .../MergeTreeSplitPrewhereIntoReadSteps.cpp   | 35 +++++++++++++------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
index 77ae9fa25ad..f858cb95846 100644
--- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
+++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp
@@ -53,9 +53,15 @@ struct DAGNodeRef
 
 /// Result name -> DAGNodeRef
 using OriginalToNewNodeMap = std::unordered_map<String, DAGNodeRef>;
+using NodeNameToLastUsedStepMap = std::unordered_map<String, size_t>;
 
 /// Clones the part of original DAG responsible for computing the original_dag_node and adds it to the new DAG.
-const ActionsDAG::Node & addClonedDAGToDAG(const ActionsDAG::Node * original_dag_node, ActionsDAGPtr new_dag, OriginalToNewNodeMap & node_remap)
+const ActionsDAG::Node & addClonedDAGToDAG(
+    size_t step,
+    const ActionsDAG::Node * original_dag_node,
+    ActionsDAGPtr new_dag,
+    OriginalToNewNodeMap & node_remap,
+    NodeNameToLastUsedStepMap & node_to_step_map)
 {
     const String & node_name = original_dag_node->result_name;
     /// Look for the node in the map of already known nodes
@@ -72,6 +78,10 @@ const ActionsDAG::Node & addClonedDAGToDAG(const ActionsDAG::Node * original_dag
             node_ref.dag->addOrReplaceInOutputs(*node_ref.node);
             const auto & new_node = new_dag->addInput(node_ref.node->result_name, node_ref.node->result_type);
             node_remap[node_name] = {new_dag, &new_node}; /// TODO: here we update the node reference. Is it always correct?
+
+            /// Remember the index of the last step which reuses this node.
+            /// We cannot remove this node from the outputs before that step.
+            node_to_step_map[node_name] = step;
             return new_node;
         }
     }
@@ -95,7 +105,7 @@ const ActionsDAG::Node & addClonedDAGToDAG(const ActionsDAG::Node * original_dag
 
     if (original_dag_node->type == ActionsDAG::ActionType::ALIAS)
     {
-        const auto & alias_child = addClonedDAGToDAG(original_dag_node->children[0], new_dag, node_remap);
+        const auto & alias_child = addClonedDAGToDAG(step, original_dag_node->children[0], new_dag, node_remap, node_to_step_map);
         const auto & new_node = new_dag->addAlias(alias_child, original_dag_node->result_name);
         node_remap[node_name] = {new_dag, &new_node};
         return new_node;
@@ -107,7 +117,7 @@ const ActionsDAG::Node & addClonedDAGToDAG(const ActionsDAG::Node * original_dag
         ActionsDAG::NodeRawConstPtrs new_children;
         for (const auto & child : original_dag_node->children)
         {
-            const auto & new_child = addClonedDAGToDAG(child, new_dag, node_remap);
+            const auto & new_child = addClonedDAGToDAG(step, child, new_dag, node_remap, node_to_step_map);
             new_children.push_back(&new_child);
         }
 
@@ -239,19 +249,19 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction
     };
     std::vector<Step> steps;
 
-    NameSet all_output_names;
-
     OriginalToNewNodeMap node_remap;
+    NodeNameToLastUsedStepMap node_to_step;
 
-    for (const auto & condition_group : condition_groups)
+    for (size_t step_index = 0; step_index < condition_groups.size(); ++step_index)
     {
+        const auto & condition_group = condition_groups[step_index];
         ActionsDAGPtr step_dag = std::make_shared<ActionsDAG>();
         String result_name;
 
         std::vector<const ActionsDAG::Node *> new_condition_nodes;
         for (const auto * node : condition_group)
         {
-            const auto & node_in_new_dag = addClonedDAGToDAG(node, step_dag, node_remap);
+            const auto & node_in_new_dag = addClonedDAGToDAG(step_index, node, step_dag, node_remap, node_to_step);
             new_condition_nodes.push_back(&node_in_new_dag);
         }
 
@@ -287,13 +297,13 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction
         }
 
         steps.push_back({step_dag, result_name});
-        all_output_names.insert(result_name);
     }
 
     /// 6. Find all outputs of the original DAG
     auto original_outputs = prewhere_info->prewhere_actions->getOutputs();
     /// 7. Find all outputs that were computed in the already built DAGs, mark these nodes as outputs in the steps where they were computed
     /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4
+    NameSet all_output_names;
     for (const auto * output : original_outputs)
     {
         all_output_names.insert(output->result_name);
@@ -323,21 +333,24 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction
         }
         else
         {
-            const auto & node_in_new_dag = addClonedDAGToDAG(output, steps.back().actions, node_remap);
+            const auto & node_in_new_dag = addClonedDAGToDAG(steps.size() - 1, output, steps.back().actions, node_remap, node_to_step);
             steps.back().actions->addOrReplaceInOutputs(node_in_new_dag);
         }
     }
 
     /// 9. Build PrewhereExprInfo
     {
-        for (const auto & step : steps)
+        for (size_t step_index = 0; step_index < steps.size(); ++step_index)
         {
+            const auto & step = steps[step_index];
             PrewhereExprStep new_step
             {
                 .type = PrewhereExprStep::Filter,
                 .actions = std::make_shared<ExpressionActions>(step.actions, actions_settings),
                 .filter_column_name = step.column_name,
-                .remove_filter_column = !all_output_names.contains(step.column_name), /// Don't remove if it's in the list of original outputs
+                /// Don't remove if it's in the list of original outputs
+                .remove_filter_column =
+                    !all_output_names.contains(step.column_name) && node_to_step[step.column_name] <= step_index,
                 .need_filter = false,
                 .perform_alter_conversions = true,
             };

From 3fe2e1084bb3a7cc0def8b9b720b32455d2f10e7 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 5 Sep 2023 22:25:34 +0000
Subject: [PATCH 015/243] test_read_equally_from_each_replica +
 prefer_localhost_replica=1

---
 .../test.py                                           | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
index c9b424c74d4..80104d77a9f 100644
--- a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
+++ b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
@@ -95,7 +95,14 @@ def create_tables(cluster, table_name):
     return "60\t0\t59\t1770\n"
 
 
-def test_read_equally_from_each_replica(start_cluster):
+@pytest.mark.parametrize(
+    "prefer_localhost_replica",
+    [
+        pytest.param(0),
+        pytest.param(1),
+    ]
+)
+def test_read_equally_from_each_replica(start_cluster, prefer_localhost_replica):
     """create and populate table in special way (see create_table()),
     so parallel replicas will read equal number of rows from each replica
     """
@@ -110,7 +117,7 @@ def test_read_equally_from_each_replica(start_cluster):
             f"SELECT count(), min(key), max(key), sum(key) FROM {table_name}_d",
             settings={
                 "allow_experimental_parallel_reading_from_replicas": 2,
-                "prefer_localhost_replica": 0,
+                "prefer_localhost_replica": prefer_localhost_replica,
                 "max_parallel_replicas": 3,
                 "use_hedged_requests": 0,
             },

From 87a59e7084696d69871d8874c8a9a4ce66fa353d Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 5 Sep 2023 23:12:35 +0000
Subject: [PATCH 016/243] Automatic style fix

---
 .../test_parallel_replicas_distributed_read_from_all/test.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
index 80104d77a9f..482192668f4 100644
--- a/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
+++ b/tests/integration/test_parallel_replicas_distributed_read_from_all/test.py
@@ -100,7 +100,7 @@ def create_tables(cluster, table_name):
     [
         pytest.param(0),
         pytest.param(1),
-    ]
+    ],
 )
 def test_read_equally_from_each_replica(start_cluster, prefer_localhost_replica):
     """create and populate table in special way (see create_table()),

From 9881e75c05142813f2195997a23d6566c61bb075 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 6 Sep 2023 11:28:52 +0000
Subject: [PATCH 017/243] Create read from remote step if there are remote
 replicas

---
 src/Interpreters/Cluster.cpp                          | 8 +-------
 src/Interpreters/Cluster.h                            | 1 -
 src/Interpreters/ClusterProxy/SelectStreamFactory.cpp | 8 +++++++-
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 891586d88b6..bb161702401 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -420,8 +420,6 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
             if (address.is_local)
                 info.local_addresses.push_back(address);
 
-            info.all_addresses.push_back(address);
-
             auto pool = ConnectionPoolFactory::instance().get(
                 static_cast<unsigned>(settings.distributed_connections_pool_size),
                 address.host_name, address.port,
@@ -564,7 +562,6 @@ void Cluster::addShard(const Settings & settings, Addresses && addresses, bool t
                        ShardInfoInsertPathForInternalReplication && insert_paths, UInt32 weight, bool internal_replication)
 {
     Addresses shard_local_addresses;
-    Addresses shard_all_addresses;
 
     ConnectionPoolPtrs all_replicas_pools;
     all_replicas_pools.reserve(addresses.size());
@@ -582,7 +579,6 @@ void Cluster::addShard(const Settings & settings, Addresses && addresses, bool t
         all_replicas_pools.emplace_back(replica_pool);
         if (replica.is_local && !treat_local_as_remote)
             shard_local_addresses.push_back(replica);
-        shard_all_addresses.push_back(replica);
     }
     ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
         all_replicas_pools, settings.load_balancing,
@@ -596,7 +592,6 @@ void Cluster::addShard(const Settings & settings, Addresses && addresses, bool t
         current_shard_num,
         weight,
         std::move(shard_local_addresses),
-        std::move(shard_all_addresses),
         std::move(shard_pool),
         std::move(all_replicas_pools),
         internal_replication
@@ -647,6 +642,7 @@ void Cluster::initMisc()
 
 std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard) const
 {
+    LOG_DEBUG(&Poco::Logger::get(__FUNCTION__), "max_replicas_from_shard={}\n{}", max_replicas_from_shard, StackTrace().toString());
     return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings, max_replicas_from_shard)};
 }
 
@@ -720,8 +716,6 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
                 if (address.is_local)
                     info.local_addresses.push_back(address);
 
-                info.all_addresses.push_back(address);
-
                 auto pool = ConnectionPoolFactory::instance().get(
                     static_cast<unsigned>(settings.distributed_connections_pool_size),
                     address.host_name,
diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h
index cb75487cbbc..b2bc03dd74d 100644
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@@ -217,7 +217,6 @@ public:
         UInt32 shard_num = 0;
         UInt32 weight = 1;
         Addresses local_addresses;
-        Addresses all_addresses;
         /// nullptr if there are no remote addresses
         ConnectionPoolWithFailoverPtr pool;
         /// Connection pool for each replica, contains nullptr for local replicas
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index da716d57f88..09a7bc41d33 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -178,6 +178,12 @@ void SelectStreamFactory::createForShard(
             return;
         }
 
+        if (shard_info.hasRemoteConnections())
+        {
+            emplace_remote_stream();
+            return;
+        }
+
         const auto * replicated_storage = dynamic_cast<const StorageReplicatedMergeTree *>(main_table_storage.get());
 
         if (!replicated_storage)
@@ -187,7 +193,7 @@ void SelectStreamFactory::createForShard(
             return;
         }
 
-        UInt64 max_allowed_delay = settings.max_replica_delay_for_distributed_queries;
+        const UInt64 max_allowed_delay = settings.max_replica_delay_for_distributed_queries;
 
         if (!max_allowed_delay)
         {

From 220dc339377de870bf44e7777ca59cbeac1df2fb Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 6 Sep 2023 18:46:24 +0000
Subject: [PATCH 018/243] Fast fix

---
 src/Interpreters/ClusterProxy/SelectStreamFactory.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 09a7bc41d33..7c6c93f5dde 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -146,7 +146,9 @@ void SelectStreamFactory::createForShard(
         return;
     });
 
-    if (settings.prefer_localhost_replica && shard_info.isLocal())
+    if (settings.prefer_localhost_replica && shard_info.isLocal()
+        && !context->canUseParallelReplicasOnInitiator())   // fast fix for parallel replicas over distributed with enabled perfer_localhost_replica
+                                                            // basically, prefer_localhost_replica is disabled for now with parallel replicas over distributed
     {
         StoragePtr main_table_storage;
 
@@ -178,12 +180,6 @@ void SelectStreamFactory::createForShard(
             return;
         }
 
-        if (shard_info.hasRemoteConnections())
-        {
-            emplace_remote_stream();
-            return;
-        }
-
         const auto * replicated_storage = dynamic_cast<const StorageReplicatedMergeTree *>(main_table_storage.get());
 
         if (!replicated_storage)

From 96657ba5a22816755c3e53ff8a9cdc93009971a5 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 6 Sep 2023 18:49:17 +0000
Subject: [PATCH 019/243] Remove debug code

---
 src/Interpreters/Cluster.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index bb161702401..82c3d48bc05 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -642,7 +642,6 @@ void Cluster::initMisc()
 
 std::unique_ptr<Cluster> Cluster::getClusterWithReplicasAsShards(const Settings & settings, size_t max_replicas_from_shard) const
 {
-    LOG_DEBUG(&Poco::Logger::get(__FUNCTION__), "max_replicas_from_shard={}\n{}", max_replicas_from_shard, StackTrace().toString());
     return std::unique_ptr<Cluster>{ new Cluster(ReplicasAsShardsTag{}, *this, settings, max_replicas_from_shard)};
 }
 

From 6c3ac83c07bfd03ab289b1ae8522cb71c221b8e2 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 6 Sep 2023 20:08:05 +0000
Subject: [PATCH 020/243] Fix

---
 src/Interpreters/ClusterProxy/SelectStreamFactory.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index ada2c1679da..32625ec0ca4 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -147,8 +147,8 @@ void SelectStreamFactory::createForShard(
     });
 
     if (settings.prefer_localhost_replica && shard_info.isLocal()
-        && !context->canUseParallelReplicasOnInitiator())   // fast fix for parallel replicas over distributed with enabled perfer_localhost_replica
-                                                            // basically, prefer_localhost_replica is disabled for now with parallel replicas over distributed
+        // fast fix for parallel replicas over distributed with enabled perfer_localhost_replica, -> disable it for now
+        && (context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS && settings.max_parallel_replicas > 1))
     {
         StoragePtr main_table_storage;
 

From 9c61ceb05100c968ca9665565e91e1007f66802a Mon Sep 17 00:00:00 2001
From: Petr Vasilev <vahpetr@gmail.com>
Date: Thu, 7 Sep 2023 13:59:32 +0300
Subject: [PATCH 021/243] Fix nats high cpu usage

---
 src/Storages/NATS/NATSConsumer.cpp | 5 +++++
 src/Storages/NATS/NATSConsumer.h   | 2 ++
 src/Storages/NATS/NATSHandler.cpp  | 2 +-
 src/Storages/NATS/StorageNATS.cpp  | 4 +++-
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Storages/NATS/NATSConsumer.cpp b/src/Storages/NATS/NATSConsumer.cpp
index c7b40973b72..61fda8ffc0f 100644
--- a/src/Storages/NATS/NATSConsumer.cpp
+++ b/src/Storages/NATS/NATSConsumer.cpp
@@ -66,6 +66,8 @@ void NATSConsumer::unsubscribe()
 
 ReadBufferPtr NATSConsumer::consume()
 {
+    std::unique_lock<std::mutex> lock(received_mutex);
+    received_cv.wait(lock, [this] { return stopped || !received.empty(); });
     if (stopped || !received.tryPop(current))
         return nullptr;
 
@@ -86,8 +88,11 @@ void NATSConsumer::onMsg(natsConnection *, natsSubscription *, natsMsg * msg, vo
             .message = message_received,
             .subject = subject,
         };
+
+        std::unique_lock<std::mutex> lock(nats_consumer->received_mutex);
         if (!nats_consumer->received.push(std::move(data)))
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to received queue");
+        nats_consumer->received_cv.notify_one();
 
         nats_consumer->storage.startStreaming();
     }
diff --git a/src/Storages/NATS/NATSConsumer.h b/src/Storages/NATS/NATSConsumer.h
index a6f950329aa..b2c29ff949c 100644
--- a/src/Storages/NATS/NATSConsumer.h
+++ b/src/Storages/NATS/NATSConsumer.h
@@ -57,6 +57,8 @@ private:
     StorageNATS & storage;
     std::vector<SubscriptionPtr> subscriptions;
     std::vector<String> subjects;
+    std::mutex received_mutex;
+    std::condition_variable received_cv;
     Poco::Logger * log;
     const std::atomic<bool> & stopped;
 
diff --git a/src/Storages/NATS/NATSHandler.cpp b/src/Storages/NATS/NATSHandler.cpp
index 7006e5633a9..f73b338380b 100644
--- a/src/Storages/NATS/NATSHandler.cpp
+++ b/src/Storages/NATS/NATSHandler.cpp
@@ -43,7 +43,7 @@ void NATSHandler::startLoop()
 
     while (loop_state.load() == Loop::RUN && duration.count() < MAX_THREAD_WORK_DURATION_MS)
     {
-        uv_run(loop, UV_RUN_NOWAIT);
+        uv_run(loop, UV_RUN_DEFAULT);
         end_time = std::chrono::steady_clock::now();
         duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
     }
diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp
index a3478069356..a0827691d78 100644
--- a/src/Storages/NATS/StorageNATS.cpp
+++ b/src/Storages/NATS/StorageNATS.cpp
@@ -108,6 +108,7 @@ StorageNATS::StorageNATS(
             }
 
             LOG_DEBUG(log, "Connect attempt #{} failed, error: {}. Reconnecting...", i + 1, nats_GetLastError(nullptr));
+            std::this_thread::sleep_for(std::chrono::milliseconds(configuration.reconnect_wait));
         }
     }
     catch (...)
@@ -586,8 +587,9 @@ void StorageNATS::streamingToViewsFunc()
                 if (streamToViews())
                 {
                     /// Reschedule with backoff.
-                    do_reschedule = false;
                     break;
+                } else {
+                    do_reschedule = false;
                 }
 
                 auto end_time = std::chrono::steady_clock::now();

From c0333263a097db1d067005928da909ab097aa2c1 Mon Sep 17 00:00:00 2001
From: Petr Vasilev <vahpetr@gmail.com>
Date: Thu, 7 Sep 2023 15:07:21 +0300
Subject: [PATCH 022/243] Rollback do_reschedule flag

---
 src/Storages/NATS/StorageNATS.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp
index a0827691d78..36ef302fa49 100644
--- a/src/Storages/NATS/StorageNATS.cpp
+++ b/src/Storages/NATS/StorageNATS.cpp
@@ -587,9 +587,8 @@ void StorageNATS::streamingToViewsFunc()
                 if (streamToViews())
                 {
                     /// Reschedule with backoff.
-                    break;
-                } else {
                     do_reschedule = false;
+                    break;
                 }
 
                 auto end_time = std::chrono::steady_clock::now();

From 4ef241445f01edb00c90ec75701e292f7edbb7c0 Mon Sep 17 00:00:00 2001
From: Petr Vasilev <vahpetr@gmail.com>
Date: Thu, 7 Sep 2023 18:44:18 +0300
Subject: [PATCH 023/243] Remove received_mutex and received_cv

---
 src/Storages/NATS/NATSConsumer.cpp | 4 ----
 src/Storages/NATS/NATSConsumer.h   | 2 --
 2 files changed, 6 deletions(-)

diff --git a/src/Storages/NATS/NATSConsumer.cpp b/src/Storages/NATS/NATSConsumer.cpp
index 61fda8ffc0f..4abe960d32c 100644
--- a/src/Storages/NATS/NATSConsumer.cpp
+++ b/src/Storages/NATS/NATSConsumer.cpp
@@ -66,8 +66,6 @@ void NATSConsumer::unsubscribe()
 
 ReadBufferPtr NATSConsumer::consume()
 {
-    std::unique_lock<std::mutex> lock(received_mutex);
-    received_cv.wait(lock, [this] { return stopped || !received.empty(); });
     if (stopped || !received.tryPop(current))
         return nullptr;
 
@@ -89,10 +87,8 @@ void NATSConsumer::onMsg(natsConnection *, natsSubscription *, natsMsg * msg, vo
             .subject = subject,
         };
 
-        std::unique_lock<std::mutex> lock(nats_consumer->received_mutex);
         if (!nats_consumer->received.push(std::move(data)))
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to received queue");
-        nats_consumer->received_cv.notify_one();
 
         nats_consumer->storage.startStreaming();
     }
diff --git a/src/Storages/NATS/NATSConsumer.h b/src/Storages/NATS/NATSConsumer.h
index b2c29ff949c..a6f950329aa 100644
--- a/src/Storages/NATS/NATSConsumer.h
+++ b/src/Storages/NATS/NATSConsumer.h
@@ -57,8 +57,6 @@ private:
     StorageNATS & storage;
     std::vector<SubscriptionPtr> subscriptions;
     std::vector<String> subjects;
-    std::mutex received_mutex;
-    std::condition_variable received_cv;
     Poco::Logger * log;
     const std::atomic<bool> & stopped;
 

From cb969aa54ad7213fade9e6b23e9534b097c8017f Mon Sep 17 00:00:00 2001
From: Petr Vasilev <vahpetr@gmail.com>
Date: Thu, 7 Sep 2023 18:47:31 +0300
Subject: [PATCH 024/243] Remove empty line

---
 src/Storages/NATS/NATSConsumer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/NATS/NATSConsumer.cpp b/src/Storages/NATS/NATSConsumer.cpp
index 4abe960d32c..c7b40973b72 100644
--- a/src/Storages/NATS/NATSConsumer.cpp
+++ b/src/Storages/NATS/NATSConsumer.cpp
@@ -86,7 +86,6 @@ void NATSConsumer::onMsg(natsConnection *, natsSubscription *, natsMsg * msg, vo
             .message = message_received,
             .subject = subject,
         };
-
         if (!nats_consumer->received.push(std::move(data)))
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to received queue");
 

From 52dfaa54bf4cbba46ed45c6b3ec85f14259ee91c Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Fri, 8 Sep 2023 08:02:30 +0000
Subject: [PATCH 025/243] pass http retry timeout as ms

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/IO/HTTPCommon.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index add3e96c2c1..ef03acdde5d 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -321,7 +321,7 @@ namespace
             /// To avoid such a deadlock we unlock `lock` before entering `pool_ptr->second->get`.
             lock.unlock();
 
-            auto retry_timeout = timeouts.connection_timeout.totalMicroseconds();
+            auto retry_timeout = timeouts.connection_timeout.totalMilliseconds();
             auto session = pool_ptr->second->get(retry_timeout);
 
             setTimeouts(*session, timeouts);

From ffa82e9297c1d01f77793cc7b43aa4fb7bbec9c4 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 8 Sep 2023 16:58:13 +0200
Subject: [PATCH 026/243] Fix filtering parts with indexHint for non analyzer

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Processors/QueryPlan/ReadFromMergeTree.cpp           | 5 ++++-
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp   | 3 +++
 tests/analyzer_tech_debt.txt                             | 1 +
 .../0_stateless/02880_indexHint__partition_id.reference  | 9 +++++++++
 .../0_stateless/02880_indexHint__partition_id.sql        | 9 +++++++++
 5 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02880_indexHint__partition_id.reference
 create mode 100644 tests/queries/0_stateless/02880_indexHint__partition_id.sql

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 4b95b74c3af..32802d5fa02 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1271,7 +1271,10 @@ static void buildIndexes(
     }
 
     /// TODO Support row_policy_filter and additional_filters
-    indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context);
+    if (settings.allow_experimental_analyzer)
+        indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context);
+    else
+        indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, query_info.query, context);
 
     indexes->use_skip_indexes = settings.use_skip_indexes;
     bool final = query_info.isFinal();
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 31aa2dbb61f..9c93ecb14c9 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -814,6 +814,9 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
     ASTPtr expression_ast;
     auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, true /* one_part */);
 
+    if (virtual_columns_block.rows() == 0)
+        return {};
+
     // Generate valid expressions for filtering
     VirtualColumnUtils::prepareFilterBlockWithQuery(query, context, virtual_columns_block, expression_ast);
 
diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 4419190e12c..652ab0b99de 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -54,6 +54,7 @@
 01710_projection_additional_filters
 01721_join_implicit_cast_long
 01739_index_hint
+02880_indexHint__partition_id
 01747_join_view_filter_dictionary
 01748_partition_id_pruning
 01756_optimize_skip_unused_shards_rewrite_in
diff --git a/tests/queries/0_stateless/02880_indexHint__partition_id.reference b/tests/queries/0_stateless/02880_indexHint__partition_id.reference
new file mode 100644
index 00000000000..365e7b676c7
--- /dev/null
+++ b/tests/queries/0_stateless/02880_indexHint__partition_id.reference
@@ -0,0 +1,9 @@
+-- { echoOn }
+select * from data prewhere indexHint(_partition_id = '1');
+1
+select count() from data prewhere indexHint(_partition_id = '1');
+1
+select * from data where indexHint(_partition_id = '1');
+1
+select count() from data where indexHint(_partition_id = '1');
+1
diff --git a/tests/queries/0_stateless/02880_indexHint__partition_id.sql b/tests/queries/0_stateless/02880_indexHint__partition_id.sql
new file mode 100644
index 00000000000..d15b3f4ccea
--- /dev/null
+++ b/tests/queries/0_stateless/02880_indexHint__partition_id.sql
@@ -0,0 +1,9 @@
+drop table if exists data;
+create table data (part Int) engine=MergeTree() order by tuple() partition by part;
+insert into data values (1)(2);
+
+-- { echoOn }
+select * from data prewhere indexHint(_partition_id = '1');
+select count() from data prewhere indexHint(_partition_id = '1');
+select * from data where indexHint(_partition_id = '1');
+select count() from data where indexHint(_partition_id = '1');

From 3a78ba6de4acd52ea22f6a9ea6757f7b740344a7 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sat, 9 Sep 2023 21:36:39 +0000
Subject: [PATCH 027/243] Fix, enable tests, make cluster_for_parallel_replicas
 empty by default

---
 src/Core/Settings.h                                |  2 +-
 .../ClusterProxy/SelectStreamFactory.cpp           |  7 +++----
 .../ClusterProxy/SelectStreamFactory.h             |  3 ++-
 src/Interpreters/ClusterProxy/executeQuery.cpp     | 14 ++++++++++----
 .../test.py                                        |  8 ++++++++
 5 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 620cc8fd67f..3c94c792ef4 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -169,7 +169,7 @@ class IColumn;
     M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \
     M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \
     \
-    M(String, cluster_for_parallel_replicas, "default", "Cluster for a shard in which current server is located", 0) \
+    M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \
     M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \
     M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
     M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 32625ec0ca4..2f598fda3a9 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -114,7 +114,8 @@ void SelectStreamFactory::createForShard(
     ContextPtr context,
     std::vector<QueryPlanPtr> & local_plans,
     Shards & remote_shards,
-    UInt32 shard_count)
+    UInt32 shard_count,
+    bool parallel_replicas_enabled)
 {
     auto it = objects_by_shard.find(shard_info.shard_num);
     if (it != objects_by_shard.end())
@@ -146,9 +147,7 @@ void SelectStreamFactory::createForShard(
         return;
     });
 
-    if (settings.prefer_localhost_replica && shard_info.isLocal()
-        // fast fix for parallel replicas over distributed with enabled perfer_localhost_replica, -> disable it for now
-        && (context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS && settings.max_parallel_replicas > 1))
+    if (settings.prefer_localhost_replica && shard_info.isLocal() && !parallel_replicas_enabled)
     {
         StoragePtr main_table_storage;
 
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
index ca07fd5deda..a821730657d 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h
@@ -78,7 +78,8 @@ public:
         ContextPtr context,
         std::vector<QueryPlanPtr> & local_plans,
         Shards & remote_shards,
-        UInt32 shard_count);
+        UInt32 shard_count,
+        bool parallel_replicas_enabled);
 
     struct ShardPlans
     {
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 34be2636917..12d98f44ec8 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -178,8 +178,9 @@ void executeQuery(
                                                 main_table, query_info.additional_filter_ast, log);
     new_context->increaseDistributedDepth();
 
-    size_t shards = query_info.getCluster()->getShardCount();
-    for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
+    ClusterPtr cluster = query_info.getCluster();
+    const size_t shards = cluster->getShardCount();
+    for (const auto & shard_info : cluster->getShardsInfo())
     {
         ASTPtr query_ast_for_shard = query_ast->clone();
         if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
@@ -210,9 +211,14 @@ void executeQuery(
             }
         }
 
+        const auto & addresses = cluster->getShardsAddresses().at(shard_info.shard_num - 1);
+        const bool parallel_replicas_enabled = addresses.size() > 1
+            && context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS && settings.max_parallel_replicas > 1;
+
         stream_factory.createForShard(shard_info,
             query_ast_for_shard, main_table, table_func_ptr,
-            new_context, plans, remote_shards, static_cast<UInt32>(shards));
+            new_context, plans, remote_shards, static_cast<UInt32>(shards),
+            parallel_replicas_enabled);
     }
 
     if (!remote_shards.empty())
@@ -236,7 +242,7 @@ void executeQuery(
             log,
             shards,
             query_info.storage_limits,
-            query_info.getCluster()->getName());
+            not_optimized_cluster->getName());
 
         read_from_remote->setStepDescription("Read from remote replica");
         plan->addStep(std::move(read_from_remote));
diff --git a/tests/integration/test_parallel_replicas_over_distributed/test.py b/tests/integration/test_parallel_replicas_over_distributed/test.py
index 50db95c90b2..5716a33aa09 100644
--- a/tests/integration/test_parallel_replicas_over_distributed/test.py
+++ b/tests/integration/test_parallel_replicas_over_distributed/test.py
@@ -106,10 +106,18 @@ def create_tables(cluster, table_name):
         pytest.param("test_single_shard_multiple_replicas", 3, 0),
         pytest.param("test_single_shard_multiple_replicas", 4, 0),
         pytest.param("test_single_shard_multiple_replicas", 10, 0),
+        pytest.param("test_single_shard_multiple_replicas", 2, 1),
+        pytest.param("test_single_shard_multiple_replicas", 3, 1),
+        pytest.param("test_single_shard_multiple_replicas", 4, 1),
+        pytest.param("test_single_shard_multiple_replicas", 10, 1),
         pytest.param("test_multiple_shards_multiple_replicas", 2, 0),
         pytest.param("test_multiple_shards_multiple_replicas", 3, 0),
         pytest.param("test_multiple_shards_multiple_replicas", 4, 0),
         pytest.param("test_multiple_shards_multiple_replicas", 10, 0),
+        pytest.param("test_multiple_shards_multiple_replicas", 2, 1),
+        pytest.param("test_multiple_shards_multiple_replicas", 3, 1),
+        pytest.param("test_multiple_shards_multiple_replicas", 4, 1),
+        pytest.param("test_multiple_shards_multiple_replicas", 10, 1),
     ],
 )
 def test_parallel_replicas_over_distributed(

From fac0b2f62dc95f776ee6ad174c3070005ae47009 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sun, 10 Sep 2023 19:05:17 +0000
Subject: [PATCH 028/243] Adapt code to inconsistency between shard_info and
 shard addresses

---
 src/Interpreters/ClusterProxy/executeQuery.cpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 12d98f44ec8..401eea7c39a 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -211,9 +211,19 @@ void executeQuery(
             }
         }
 
-        const auto & addresses = cluster->getShardsAddresses().at(shard_info.shard_num - 1);
-        const bool parallel_replicas_enabled = addresses.size() > 1
-            && context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS && settings.max_parallel_replicas > 1;
+        bool parallel_replicas_enabled = false;
+        if (shard_info.shard_num > 0 && shard_info.shard_num <= cluster->getShardsAddresses().size())
+        {
+            const auto & addresses = cluster->getShardsAddresses().at(shard_info.shard_num - 1);
+            parallel_replicas_enabled = addresses.size() > 1
+                && context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS && settings.max_parallel_replicas > 1;
+        }
+        else
+        {
+            chassert(shard_info.shard_num > 0);
+
+            // FIXME or code: when can it happened (shard_num bigger than shard's addresses)? looks inconsistent
+        }
 
         stream_factory.createForShard(shard_info,
             query_ast_for_shard, main_table, table_func_ptr,

From 7d630b6b1bee7839d295eb5a548e17e94012fb93 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sun, 10 Sep 2023 20:38:17 +0000
Subject: [PATCH 029/243] Added comments

---
 src/Interpreters/ClusterProxy/SelectStreamFactory.cpp | 3 +++
 src/Interpreters/ClusterProxy/executeQuery.cpp        | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 2f598fda3a9..3935028f27c 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -147,6 +147,9 @@ void SelectStreamFactory::createForShard(
         return;
     });
 
+    // prefer_localhost_replica is not effective in case of parallel replicas
+    // (1) prefer_localhost_replica is about choosing one replica on a shard
+    // (2) parallel replica coordinator has own logic to choose replicas to read from
     if (settings.prefer_localhost_replica && shard_info.isLocal() && !parallel_replicas_enabled)
     {
         StoragePtr main_table_storage;
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 401eea7c39a..bd8f5cea7f3 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -211,6 +211,8 @@ void executeQuery(
             }
         }
 
+        // decide for each shard if parallel reading from replicas should be enabled
+        // according to settings and number of replicas declared per shard
         bool parallel_replicas_enabled = false;
         if (shard_info.shard_num > 0 && shard_info.shard_num <= cluster->getShardsAddresses().size())
         {

From 918bd814bd4a274675dc42b12231795a5f582e31 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 11 Sep 2023 12:19:07 +0000
Subject: [PATCH 030/243] Add canUseParallelReplicas() for convenience

---
 src/Interpreters/ClusterProxy/executeQuery.cpp |  3 +--
 src/Interpreters/Context.cpp                   | 12 ++++++++----
 src/Interpreters/Context.h                     |  1 +
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index bd8f5cea7f3..4395f8373b2 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -217,8 +217,7 @@ void executeQuery(
         if (shard_info.shard_num > 0 && shard_info.shard_num <= cluster->getShardsAddresses().size())
         {
             const auto & addresses = cluster->getShardsAddresses().at(shard_info.shard_num - 1);
-            parallel_replicas_enabled = addresses.size() > 1
-                && context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS && settings.max_parallel_replicas > 1;
+            parallel_replicas_enabled = addresses.size() > 1 && context->canUseParallelReplicas();
         }
         else
         {
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index ee7efdfeb1b..7834f5893d3 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4626,18 +4626,22 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const
     return SAMPLE_KEY;
 }
 
+bool Context::canUseParallelReplicas() const
+{
+    const auto & settings_ref = getSettingsRef();
+    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1;
+}
+
 bool Context::canUseParallelReplicasOnInitiator() const
 {
     const auto & settings_ref = getSettingsRef();
-    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1
-        && !getClientInfo().collaborate_with_initiator;
+    return canUseParallelReplicas() && !getClientInfo().collaborate_with_initiator;
 }
 
 bool Context::canUseParallelReplicasOnFollower() const
 {
     const auto & settings_ref = getSettingsRef();
-    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1
-        && getClientInfo().collaborate_with_initiator;
+    return canUseParallelReplicas() && getClientInfo().collaborate_with_initiator;
 }
 
 void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache)
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index b4a5b3d8c85..e2b9c930249 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1174,6 +1174,7 @@ public:
     WriteSettings getWriteSettings() const;
 
     /** There are multiple conditions that have to be met to be able to use parallel replicas */
+    bool canUseParallelReplicas() const;
     bool canUseParallelReplicasOnInitiator() const;
     bool canUseParallelReplicasOnFollower() const;
 

From 1785e1c00c0987f8f1cf5ccff23db04fe18e7009 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 11 Sep 2023 12:30:55 +0000
Subject: [PATCH 031/243] Fix build: remove unused variables

---
 src/Interpreters/Context.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 7834f5893d3..526a644ce6f 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4634,13 +4634,11 @@ bool Context::canUseParallelReplicas() const
 
 bool Context::canUseParallelReplicasOnInitiator() const
 {
-    const auto & settings_ref = getSettingsRef();
     return canUseParallelReplicas() && !getClientInfo().collaborate_with_initiator;
 }
 
 bool Context::canUseParallelReplicasOnFollower() const
 {
-    const auto & settings_ref = getSettingsRef();
     return canUseParallelReplicas() && getClientInfo().collaborate_with_initiator;
 }
 

From d86d92fa2df676776334c4a41ce7a9752056d2b8 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 11 Sep 2023 12:58:04 +0000
Subject: [PATCH 032/243] Flush logs for system.backup_log test.

---
 tests/integration/test_backup_log/test.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/integration/test_backup_log/test.py b/tests/integration/test_backup_log/test.py
index a1c09d8e091..deb1979b075 100644
--- a/tests/integration/test_backup_log/test.py
+++ b/tests/integration/test_backup_log/test.py
@@ -33,10 +33,13 @@ def restore_table(backup_name):
 
 
 def test_backup_log():
+    instance.query("SYSTEM FLUSH LOGS")
+
     backup_name = "File('/backups/test_backup/')"
     assert instance.query("SELECT * FROM system.tables WHERE name = 'backup_log'") == ""
 
     backup_id = backup_table(backup_name)
+    instance.query("SYSTEM FLUSH LOGS")
     assert instance.query(
         f"SELECT status, error FROM system.backup_log WHERE id='{backup_id}' ORDER BY event_date, event_time_microseconds"
     ) == TSV([["CREATING_BACKUP", ""], ["BACKUP_CREATED", ""]])
@@ -44,6 +47,7 @@ def test_backup_log():
     instance.query("DROP TABLE test.table SYNC")
 
     restore_id = restore_table(backup_name)
+    instance.query("SYSTEM FLUSH LOGS")
     assert instance.query(
         f"SELECT status, error FROM system.backup_log WHERE id='{restore_id}' ORDER BY event_date, event_time_microseconds"
     ) == TSV([["RESTORING", ""], ["RESTORED", ""]])

From 2293923f66e1b2d0ccda674c2102b102a6c18944 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 11 Sep 2023 21:46:46 +0000
Subject: [PATCH 033/243] Disable parallel replicas on shards with not enough
 nodes

---
 src/Interpreters/Context.cpp                  | 14 ++++---
 src/Interpreters/Context.h                    |  1 +
 src/Processors/QueryPlan/ReadFromRemote.cpp   | 41 ++++++++++++++-----
 ...el_replicas_cluster_all_replicas.reference |  2 +
 ...parallel_replicas_cluster_all_replicas.sql | 13 ++++++
 5 files changed, 55 insertions(+), 16 deletions(-)
 create mode 100644 tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.reference
 create mode 100644 tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 306db8ff491..ada77d821b0 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -4624,18 +4624,20 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const
     return SAMPLE_KEY;
 }
 
-bool Context::canUseParallelReplicasOnInitiator() const
+bool Context::canUseParallelReplicas() const
 {
     const auto & settings_ref = getSettingsRef();
-    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1
-        && !getClientInfo().collaborate_with_initiator;
+    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1;
+}
+
+bool Context::canUseParallelReplicasOnInitiator() const
+{
+    return canUseParallelReplicas() && !getClientInfo().collaborate_with_initiator;
 }
 
 bool Context::canUseParallelReplicasOnFollower() const
 {
-    const auto & settings_ref = getSettingsRef();
-    return getParallelReplicasMode() == ParallelReplicasMode::READ_TASKS && settings_ref.max_parallel_replicas > 1
-        && getClientInfo().collaborate_with_initiator;
+    return canUseParallelReplicas() && getClientInfo().collaborate_with_initiator;
 }
 
 void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache)
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 0eeea5b4fbd..1500c2f903c 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1170,6 +1170,7 @@ public:
     WriteSettings getWriteSettings() const;
 
     /** There are multiple conditions that have to be met to be able to use parallel replicas */
+    bool canUseParallelReplicas() const;
     bool canUseParallelReplicasOnInitiator() const;
     bool canUseParallelReplicasOnFollower() const;
 
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 78da19f48a0..5f156bc624c 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -236,24 +236,45 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
     scalars["_shard_num"]
         = Block{{DataTypeUInt32().createColumnConst(1, shard.shard_info.shard_num), std::make_shared<DataTypeUInt32>(), "_shard_num"}};
 
-    if (context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS)
+    ContextPtr execution_context = context;
+    if (context->canUseParallelReplicas())
     {
-        if (context->getSettingsRef().cluster_for_parallel_replicas.changed)
+        if (shard.shard_info.getAllNodeCount() > 1)
         {
-            const String cluster_for_parallel_replicas = context->getSettingsRef().cluster_for_parallel_replicas;
-            if (cluster_for_parallel_replicas != cluster_name)
-                LOG_INFO(log, "cluster_for_parallel_replicas has been set for the query but has no effect: {}. Distributed table cluster is used: {}",
-                         cluster_for_parallel_replicas, cluster_name);
-        }
+            if (context->getSettingsRef().cluster_for_parallel_replicas.changed)
+            {
+                const String cluster_for_parallel_replicas = context->getSettingsRef().cluster_for_parallel_replicas;
+                if (cluster_for_parallel_replicas != cluster_name)
+                    LOG_INFO(
+                        log,
+                        "cluster_for_parallel_replicas has been set for the query but has no effect: {}. Distributed table cluster is "
+                        "used: {}",
+                        cluster_for_parallel_replicas,
+                        cluster_name);
+            }
 
-        LOG_TRACE(&Poco::Logger::get("ReadFromRemote"), "Setting `cluster_for_parallel_replicas` to {}", cluster_name);
-        context->setSetting("cluster_for_parallel_replicas", cluster_name);
+            LOG_TRACE(&Poco::Logger::get("ReadFromRemote"), "Setting `cluster_for_parallel_replicas` to {}", cluster_name);
+            context->setSetting("cluster_for_parallel_replicas", cluster_name);
+        }
+        else
+        {
+            ContextMutablePtr tmp = Context::createCopy(context);
+            tmp->setSetting("allow_experimental_parallel_reading_from_replicas", Field{0});
+            execution_context = tmp;
+
+            LOG_TRACE(
+                &Poco::Logger::get("ReadFromRemote"),
+                "Parallel reading from replicas is disabled for shard. Not enough nodes: shard={} cluster={} nodes={}",
+                shard.shard_info.shard_num,
+                cluster_name,
+                shard.shard_info.getAllNodeCount());
+        }
     }
 
     std::shared_ptr<RemoteQueryExecutor> remote_query_executor;
 
     remote_query_executor = std::make_shared<RemoteQueryExecutor>(
-            shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage);
+        shard.shard_info.pool, query_string, output_stream->header, execution_context, throttler, scalars, external_tables, stage);
 
     remote_query_executor->setLogger(log);
 
diff --git a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.reference b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.reference
new file mode 100644
index 00000000000..79ebceba739
--- /dev/null
+++ b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.reference
@@ -0,0 +1,2 @@
+60
+1
diff --git a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
new file mode 100644
index 00000000000..82a17e9bd01
--- /dev/null
+++ b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
@@ -0,0 +1,13 @@
+DROP TABLE IF EXISTS tt;
+CREATE TABLE tt (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
+INSERT INTO tt SELECT * FROM numbers(10);
+
+SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, use_hedged_requests=0, parallel_replicas_for_non_replicated_merge_tree=1;
+SELECT count() FROM clusterAllReplicas('test_cluster_two_shard_three_replicas_localhost', currentDatabase(), tt);
+
+SYSTEM FLUSH LOGS;
+
+SET allow_experimental_parallel_reading_from_replicas=0;
+SELECT count() > 0 FROM system.text_log WHERE event_time >= now() - INTERVAL 2 MINUTE AND message LIKE '%Parallel reading from replicas is disabled for shard. Not enough nodes%';
+
+DROP TABLE tt;

From 894024a83047da0daf9741e793f9734d2777b8f2 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Tue, 12 Sep 2023 13:54:26 +0200
Subject: [PATCH 034/243] Update test.py

---
 tests/integration/test_backup_log/test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_backup_log/test.py b/tests/integration/test_backup_log/test.py
index deb1979b075..d0abd12f787 100644
--- a/tests/integration/test_backup_log/test.py
+++ b/tests/integration/test_backup_log/test.py
@@ -34,6 +34,7 @@ def restore_table(backup_name):
 
 def test_backup_log():
     instance.query("SYSTEM FLUSH LOGS")
+    instance.query("truncate table system.backup_log")
 
     backup_name = "File('/backups/test_backup/')"
     assert instance.query("SELECT * FROM system.tables WHERE name = 'backup_log'") == ""

From 1287f68745952aa84f19b58d24596b8d1c95f8b0 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 12 Sep 2023 12:52:29 +0000
Subject: [PATCH 035/243] Handle clusterAllReplicas/remote cases to avoid
 unnecessary logging

---
 .../ClusterProxy/executeQuery.cpp             | 37 ++++++++++++++++---
 src/Interpreters/ClusterProxy/executeQuery.h  |  2 +-
 src/Processors/QueryPlan/ReadFromRemote.cpp   |  2 +-
 src/Storages/getStructureOfRemoteTable.cpp    |  4 +-
 ...parallel_replicas_cluster_all_replicas.sql |  6 ++-
 5 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 34be2636917..372376afc02 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -34,7 +34,7 @@ namespace ErrorCodes
 namespace ClusterProxy
 {
 
-ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
+ContextMutablePtr updateSettingsForCluster(const Cluster & cluster,
     ContextPtr context,
     const Settings & settings,
     const StorageID & main_table,
@@ -47,6 +47,7 @@ ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
     /// If "secret" (in remote_servers) is not in use,
     /// user on the shard is not the same as the user on the initiator,
     /// hence per-user limits should not be applied.
+    const bool interserver_mode = !cluster.getSecret().empty();
     if (!interserver_mode)
     {
         /// Does not matter on remote servers, because queries are sent under different user.
@@ -123,6 +124,22 @@ ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
         new_settings.additional_table_filters.value.push_back(std::move(tuple));
     }
 
+    /// disable parallel replicas if cluster contains only shards with 1 replica
+    if (context->canUseParallelReplicas())
+    {
+        bool disable_parallel_replicas = false;
+        for (const auto & shard : cluster.getShardsInfo())
+        {
+            if (shard.getAllNodeCount() <= 1)
+            {
+                disable_parallel_replicas = true;
+                break;
+            }
+        }
+        if (disable_parallel_replicas)
+            new_settings.allow_experimental_parallel_reading_from_replicas = false;
+    }
+
     auto new_context = Context::createCopy(context);
     new_context->setSettings(new_settings);
     return new_context;
@@ -174,12 +191,22 @@ void executeQuery(
     std::vector<QueryPlanPtr> plans;
     SelectStreamFactory::Shards remote_shards;
 
-    auto new_context = updateSettingsForCluster(!not_optimized_cluster->getSecret().empty(), context, settings,
-                                                main_table, query_info.additional_filter_ast, log);
+    auto cluster = query_info.getCluster();
+    auto new_context
+        = updateSettingsForCluster(*cluster, context, settings, main_table, query_info.additional_filter_ast, log);
+    if (new_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value
+        != context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value)
+    {
+        LOG_TRACE(
+            log,
+            "Parallel reading from replicas is disabled for cluster. There are no shards with more then 1 replica: cluster={}",
+            cluster->getName());
+    }
+
     new_context->increaseDistributedDepth();
 
-    size_t shards = query_info.getCluster()->getShardCount();
-    for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
+    size_t shards = cluster->getShardCount();
+    for (const auto & shard_info : cluster->getShardsInfo())
     {
         ASTPtr query_ast_for_shard = query_ast->clone();
         if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h
index b663dffb7fa..032458bea60 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/src/Interpreters/ClusterProxy/executeQuery.h
@@ -37,7 +37,7 @@ class SelectStreamFactory;
 ///   - optimize_skip_unused_shards_nesting
 ///
 /// @return new Context with adjusted settings
-ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
+ContextMutablePtr updateSettingsForCluster(const Cluster & cluster,
     ContextPtr context,
     const Settings & settings,
     const StorageID & main_table,
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 5f156bc624c..884bc85aaaf 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -278,7 +278,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
 
     remote_query_executor->setLogger(log);
 
-    if (context->getParallelReplicasMode() == Context::ParallelReplicasMode::READ_TASKS)
+    if (execution_context->canUseParallelReplicas())
     {
         // when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard:
         // establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard.
diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp
index ebd02f424fa..32266f20923 100644
--- a/src/Storages/getStructureOfRemoteTable.cpp
+++ b/src/Storages/getStructureOfRemoteTable.cpp
@@ -57,7 +57,7 @@ ColumnsDescription getStructureOfRemoteTableInShard(
     }
 
     ColumnsDescription res;
-    auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), table_id);
+    auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id);
 
     /// Ignore limit for result number of rows (that could be set during handling CSE/CTE),
     /// since this is a service query and should not lead to query failure.
@@ -176,7 +176,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables(
     const auto & shards_info = cluster.getShardsInfo();
     auto query = "DESC TABLE " + remote_table_id.getFullTableName();
 
-    auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), remote_table_id);
+    auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), remote_table_id);
     new_context->setSetting("describe_extend_object_types", true);
 
     /// Expect only needed columns from the result of DESC TABLE.
diff --git a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
index 82a17e9bd01..c1cbc404aa6 100644
--- a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
+++ b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
@@ -3,11 +3,13 @@ CREATE TABLE tt (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
 INSERT INTO tt SELECT * FROM numbers(10);
 
 SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, use_hedged_requests=0, parallel_replicas_for_non_replicated_merge_tree=1;
-SELECT count() FROM clusterAllReplicas('test_cluster_two_shard_three_replicas_localhost', currentDatabase(), tt);
+SELECT count() FROM clusterAllReplicas('test_cluster_two_shard_three_replicas_localhost', currentDatabase(), tt) settings log_comment='02875_190aed82-2423-413b-ad4c-24dcca50f65b';
 
 SYSTEM FLUSH LOGS;
 
 SET allow_experimental_parallel_reading_from_replicas=0;
-SELECT count() > 0 FROM system.text_log WHERE event_time >= now() - INTERVAL 2 MINUTE AND message LIKE '%Parallel reading from replicas is disabled for shard. Not enough nodes%';
+SELECT count() > 0 FROM system.text_log
+WHERE query_id in (select query_id from system.query_log where log_comment = '02875_190aed82-2423-413b-ad4c-24dcca50f65b')
+    AND message LIKE '%Parallel reading from replicas is disabled for cluster%';
 
 DROP TABLE tt;

From 0998b0bbc740f26fccd0d01f23014b40c75b6359 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 13 Sep 2023 03:14:02 +0000
Subject: [PATCH 036/243] Fixes

---
 .../en/sql-reference/statements/alter/user.md |  1 +
 .../sql-reference/statements/create/user.md   |  2 +-
 .../integration/test_user_valid_until/test.py | 20 +++++++++++++++++--
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md
index 8785610f58a..fd7da05167c 100644
--- a/docs/en/sql-reference/statements/alter/user.md
+++ b/docs/en/sql-reference/statements/alter/user.md
@@ -14,6 +14,7 @@ ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
         [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
     [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}]
     [[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
+    [VALID UNTIL datetime]
     [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
     [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
     [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...]
diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md
index d5b429b7349..c41777130a3 100644
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@@ -165,7 +165,7 @@ ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technica
 
 ## VALID UNTIL Clause
 
-Allows you to specify the expiration date and, optionally, the time for a user. It accepts a string as a parameter. It is recommended to use the `YYYY-MM-DD [hh:mm:ss] [timezone]` format for datetime.
+Allows you to specify the expiration date and, optionally, the time for user credentials. It accepts a string as a parameter. It is recommended to use the `YYYY-MM-DD [hh:mm:ss] [timezone]` format for datetime. By default, this parameter equals `'infinity'`.
 
 Examples:
 
diff --git a/tests/integration/test_user_valid_until/test.py b/tests/integration/test_user_valid_until/test.py
index 787250e6005..e34771e55a9 100644
--- a/tests/integration/test_user_valid_until/test.py
+++ b/tests/integration/test_user_valid_until/test.py
@@ -1,4 +1,6 @@
 import pytest
+from datetime import datetime, timedelta
+from time import sleep
 
 from helpers.cluster import ClickHouseCluster
 
@@ -32,7 +34,7 @@ def test_basic(started_cluster):
     )
     assert node.query("SELECT 1", user="user_basic") == "1\n"
 
-    # 3. With invalid VALID UNTIL
+    # 3. With expired VALID UNTIL
     node.query("ALTER USER user_basic VALID UNTIL '06/11/2010 08:03:20 Z+3'")
 
     assert (
@@ -48,6 +50,20 @@ def test_basic(started_cluster):
 
     assert node.query("SHOW CREATE USER user_basic") == "CREATE USER user_basic\n"
     assert node.query("SELECT 1", user="user_basic") == "1\n"
+    node.query("DROP USER user_basic")
+
+    # 5. Make VALID UNTIL expire
+    until_datetime = datetime.today() + timedelta(0, 10)
+    until_string = until_datetime.strftime("%Y-%m-%d %H:%M:%S")
+
+    node.query(f"CREATE USER user_basic VALID UNTIL '{until_string}'")
+
+    assert node.query("SELECT 1", user="user_basic") == "1\n"
+
+    sleep(12)
+
+    error = "Authentication failed"
+    assert error in node.query_and_get_error("SELECT 1", user="user_basic")
 
 
 def test_details(started_cluster):
@@ -64,5 +80,5 @@ def test_details(started_cluster):
 
     assert (
         node.query("SHOW CREATE USER user_details_time_only")
-        == "CREATE USER user_details_time_only VALID UNTIL \\'2000-01-01 22:03:40\\'\n"
+        == "CREATE USER user_details_time_only VALID UNTIL \\'2023-01-01 22:03:40\\'\n"
     )

From 09d4bf6930ddb59a1c0f1b703912deaf206da3bf Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 14 Sep 2023 08:51:47 +0000
Subject: [PATCH 037/243] add IO scheduling node for bandwidth throttling

---
 docs/en/operations/system-tables/scheduler.md |   8 +
 docs/en/operations/workload-scheduling.md     |   1 +
 src/IO/ISchedulerNode.h                       | 136 ++++++++++--
 src/IO/Resource/FairPolicy.h                  |   3 +-
 src/IO/Resource/PriorityPolicy.h              |   3 +-
 src/IO/Resource/SemaphoreConstraint.h         |   6 +-
 src/IO/Resource/ThrottlerConstraint.cpp       |  13 ++
 src/IO/Resource/ThrottlerConstraint.h         | 196 ++++++++++++++++++
 src/IO/SchedulerRoot.h                        |   4 +-
 .../System/StorageSystemScheduler.cpp         |  19 ++
 10 files changed, 368 insertions(+), 21 deletions(-)
 create mode 100644 src/IO/Resource/ThrottlerConstraint.cpp
 create mode 100644 src/IO/Resource/ThrottlerConstraint.h

diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md
index 168bfe708c4..ca2d4be9642 100644
--- a/docs/en/operations/system-tables/scheduler.md
+++ b/docs/en/operations/system-tables/scheduler.md
@@ -38,6 +38,10 @@ inflight_requests: ᴺᵁᴸᴸ
 inflight_cost:     ᴺᵁᴸᴸ
 max_requests:      ᴺᵁᴸᴸ
 max_cost:          ᴺᵁᴸᴸ
+max_burst:         ᴺᵁᴸᴸ
+max_speed:         ᴺᵁᴸᴸ
+throttling_us:     ᴺᵁᴸᴸ
+tokens:            ᴺᵁᴸᴸ
 ```
 
 Columns:
@@ -62,3 +66,7 @@ Columns:
 - `inflight_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The sum of costs (e.g. bytes) of all resource requests dequeued from this node, that are currently in consumption state.
 - `max_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_requests` leading to constraint violation.
 - `max_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_cost` leading to constraint violation.
+- `max_burst` (`Nullable(Float64)`) - For `bandwidth_limit` nodes only. Upper limit for `tokens` available in token-bucket throttler.
+- `max_speed` (`Nullable(Float64)`) - For `bandwidth_limit` nodes only. Upper limit for bandwidth in tokens per second.
+- `throttling_us` (`Nullable(Int64)`) - For `bandwidth_limit` nodes only. Total number of microseconds this node was in throttling state.
+- `tokens` (`Nullable(Float64)`) - For `bandwidth_limit` nodes only. Number of tokens currently available in token-bucket throttler.
diff --git a/docs/en/operations/workload-scheduling.md b/docs/en/operations/workload-scheduling.md
index 6ed6ced41b4..e061a2ab413 100644
--- a/docs/en/operations/workload-scheduling.md
+++ b/docs/en/operations/workload-scheduling.md
@@ -79,6 +79,7 @@ graph TD
 
 **Possible node types:**
 * `inflight_limit` (constraint) - blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost`; must have a single child.
+* `bandwidth_limit` (constraint) - blocks if burst exceeds `max_burst` (default 0) or current bandwidth exceeds `max_speed` (0 means unlimited); must have a single child.
 * `fair` (policy) - selects the next request to serve from one of its children nodes according to max-min fairness; children nodes can specify `weight` (default is 1).
 * `priority` (policy) - selects the next request to serve from one of its children nodes according to static priorities (lower value means higher priority); children nodes can specify `priority` (default is 0).
 * `fifo` (queue) - leaf of the hierarchy capable of holding requests that exceed resource capacity.
diff --git a/src/IO/ISchedulerNode.h b/src/IO/ISchedulerNode.h
index 5cf1ae94216..1dc19e5b18a 100644
--- a/src/IO/ISchedulerNode.h
+++ b/src/IO/ISchedulerNode.h
@@ -11,7 +11,10 @@
 
 #include <boost/noncopyable.hpp>
 
+#include <chrono>
 #include <deque>
+#include <queue>
+#include <algorithm>
 #include <functional>
 #include <memory>
 #include <mutex>
@@ -86,8 +89,64 @@ class EventQueue
 {
 public:
     using Event = std::function<void()>;
+    using TimePoint = std::chrono::system_clock::time_point;
+    static constexpr UInt64 not_postponed = 0;
 
-    void enqueue(Event&& event)
+    struct Postponed
+    {
+        TimePoint key;
+        UInt64 id; // for canceling
+        std::unique_ptr<Event> event;
+
+        Postponed(TimePoint key_, UInt64 id_, Event && event_)
+            : key(key_)
+            , id(id_)
+            , event(std::make_unique<Event>(std::move(event_)))
+        {}
+
+        bool operator<(const Postponed & rhs) const
+        {
+            return std::tie(key, id) > std::tie(rhs.key, rhs.id); // reversed for min-heap
+        }
+    };
+
+    /// Add an `event` to be processed after `until` time point.
+    /// Returns a unique id for canceling.
+    [[nodiscard]] UInt64 postpone(TimePoint until, Event && event)
+    {
+        std::unique_lock lock{mutex};
+        if (postponed.empty() || until < postponed.front().key)
+            pending.notify_one();
+        auto id = ++last_id;
+        postponed.emplace_back(until, id, std::move(event));
+        std::push_heap(postponed.begin(), postponed.end());
+        return id;
+    }
+
+    /// Cancel a postponed event using its unique id.
+    /// NOTE: Only postponed events can be canceled.
+    /// NOTE: If you need to cancel enqueued event, consider doing your actions inside another enqueued
+    /// NOTE: event instead. This ensures that all previous events are processed.
+    bool cancelPostponed(UInt64 postponed_id)
+    {
+        if (postponed_id == not_postponed)
+            return false;
+        std::unique_lock lock{mutex};
+        for (auto i = postponed.begin(), e = postponed.end(); i != e; ++i)
+        {
+            if (i->id == postponed_id)
+            {
+                postponed.erase(i);
+                // It is O(n), but we do not expect neither big heaps, nor frequent cancels. So it is fine.
+                std::make_heap(postponed.begin(), postponed.end());
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /// Add an `event` for immediate processing
+    void enqueue(Event && event)
     {
         std::unique_lock lock{mutex};
         bool was_empty = queue.empty();
@@ -97,34 +156,87 @@ public:
     }
 
     /// Process single event if it exists
+    /// Note that postponing constraint are ignored, use it to empty the queue including postponed events on shutdown
+    /// Returns `true` iff event has been processed
+    bool forceProcess()
+    {
+        std::unique_lock lock{mutex};
+        if (!queue.empty()) {
+            processQueue(lock);
+            return true;
+        }
+        if (!postponed.empty())
+        {
+            processPostponed(lock);
+            return true;
+        }
+        return false;
+    }
+
+    /// Process single event if it exists and meets postponing constraint
     /// Returns `true` iff event has been processed
     bool tryProcess()
     {
         std::unique_lock lock{mutex};
-        if (queue.empty())
+        if (!queue.empty()) {
+            processQueue(lock);
+            return true;
+        }
+        if (postponed.empty())
             return false;
-        Event event = std::move(queue.front());
-        queue.pop_front();
-        lock.unlock(); // do not hold queue mutext while processing events
-        event();
-        return true;
+        else
+        {
+            if (postponed.front().key <= std::chrono::system_clock::now())
+            {
+                processPostponed(lock);
+                return true;
+            }
+            return false;
+        }
     }
 
     /// Wait for single event (if not available) and process it
     void process()
     {
         std::unique_lock lock{mutex};
-        pending.wait(lock, [&] { return !queue.empty(); });
-        Event event = std::move(queue.front());
-        queue.pop_front();
-        lock.unlock(); // do not hold queue mutext while processing events
-        event();
+        while (true) {
+            if (!queue.empty()) {
+                return processQueue(lock);
+            }
+            if (postponed.empty())
+                pending.wait(lock);
+            else
+            {
+                if (postponed.front().key <= std::chrono::system_clock::now())
+                    return processPostponed(lock);
+                pending.wait_until(lock, postponed.front().key);
+            }
+        }
     }
 
 private:
+    void processQueue(std::unique_lock<std::mutex> & lock)
+    {
+        Event event = std::move(queue.front());
+        queue.pop_front();
+        lock.unlock(); // do not hold queue mutex while processing events
+        event();
+    }
+
+    void processPostponed(std::unique_lock<std::mutex> & lock)
+    {
+        Event event = std::move(*postponed.front().event);
+        std::pop_heap(postponed.begin(), postponed.end());
+        postponed.pop_back();
+        lock.unlock(); // do not hold queue mutex while processing events
+        event();
+    }
+
     std::mutex mutex;
     std::condition_variable pending;
     std::deque<Event> queue;
+    std::vector<Postponed> postponed;
+    UInt64 last_id = 0;
 };
 
 /*
diff --git a/src/IO/Resource/FairPolicy.h b/src/IO/Resource/FairPolicy.h
index 57b26344658..a8608b5bea3 100644
--- a/src/IO/Resource/FairPolicy.h
+++ b/src/IO/Resource/FairPolicy.h
@@ -1,7 +1,6 @@
 #pragma once
 
-#include <IO/ISchedulerQueue.h>
-#include <IO/SchedulerRoot.h>
+#include <IO/ISchedulerNode.h>
 
 #include <Common/Stopwatch.h>
 
diff --git a/src/IO/Resource/PriorityPolicy.h b/src/IO/Resource/PriorityPolicy.h
index 86d4fadb9dc..ee34c38f7e5 100644
--- a/src/IO/Resource/PriorityPolicy.h
+++ b/src/IO/Resource/PriorityPolicy.h
@@ -1,7 +1,6 @@
 #pragma once
 
-#include <IO/ISchedulerQueue.h>
-#include <IO/SchedulerRoot.h>
+#include <IO/ISchedulerNode.h>
 
 #include <algorithm>
 #include <unordered_map>
diff --git a/src/IO/Resource/SemaphoreConstraint.h b/src/IO/Resource/SemaphoreConstraint.h
index 9c6ce43d6ea..c4ab4ad59ae 100644
--- a/src/IO/Resource/SemaphoreConstraint.h
+++ b/src/IO/Resource/SemaphoreConstraint.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <IO/ISchedulerConstraint.h>
-#include <IO/SchedulerRoot.h>
 
 #include <mutex>
 #include <limits>
@@ -154,14 +153,15 @@ private:
     }
 
 private:
+    const Int64 max_requests = default_max_requests;
+    const Int64 max_cost = default_max_cost;
+
     std::mutex mutex;
     Int64 requests = 0;
     Int64 cost = 0;
     bool child_active = false;
 
     SchedulerNodePtr child;
-    Int64 max_requests = default_max_requests;
-    Int64 max_cost = default_max_cost;
 };
 
 }
diff --git a/src/IO/Resource/ThrottlerConstraint.cpp b/src/IO/Resource/ThrottlerConstraint.cpp
new file mode 100644
index 00000000000..ca52d5f3e83
--- /dev/null
+++ b/src/IO/Resource/ThrottlerConstraint.cpp
@@ -0,0 +1,13 @@
+#include <IO/Resource/ThrottlerConstraint.h>
+
+#include <IO/SchedulerNodeFactory.h>
+
+namespace DB
+{
+
+void registerSemaphoreConstraint(SchedulerNodeFactory & factory)
+{
+    factory.registerMethod<ThrottlerConstraint>("bandwidth_limit");
+}
+
+}
diff --git a/src/IO/Resource/ThrottlerConstraint.h b/src/IO/Resource/ThrottlerConstraint.h
new file mode 100644
index 00000000000..97f544dc787
--- /dev/null
+++ b/src/IO/Resource/ThrottlerConstraint.h
@@ -0,0 +1,196 @@
+#pragma once
+
+#include <IO/ISchedulerConstraint.h>
+#include <__chrono/duration.h>
+#include <__chrono/time_point.h>
+#include "IO/ISchedulerNode.h"
+
+#include <chrono>
+#include <mutex>
+#include <limits>
+#include <utility>
+
+namespace DB
+{
+
+/*
+ * Limited throughput constraint. Blocks if token-bucket constraint is violated:
+ * i.e. more than `max_burst + duration * max_speed` cost units (aka tokens) dequeued from this node in last `duration` seconds.
+ */
+class ThrottlerConstraint : public ISchedulerConstraint
+{
+public:
+    ThrottlerConstraint(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
+        : ISchedulerConstraint(event_queue_, config, config_prefix)
+        , max_burst(config.getDouble(config_prefix + ".max_burst", 0))
+        , max_speed(config.getDouble(config_prefix + ".max_speed", 0))
+    {}
+
+    ~ThrottlerConstraint() override
+    {
+        // We should cancel event on destruction to avoid dangling references from event queue
+        event_queue->cancelPostponed(postponed);
+    }
+
+    bool equals(ISchedulerNode * other) override
+    {
+        if (!ISchedulerNode::equals(other))
+            return false;
+        if (auto * o = dynamic_cast<ThrottlerConstraint *>(other))
+            return max_burst == o->max_burst && max_speed == o->max_speed;
+        return false;
+    }
+
+    void attachChild(const std::shared_ptr<ISchedulerNode> & child_) override
+    {
+        // Take ownership
+        child = child_;
+        child->setParent(this);
+
+        // Activate if required
+        if (child->isActive())
+            activateChild(child.get());
+    }
+
+    void removeChild(ISchedulerNode * child_) override
+    {
+        if (child.get() == child_)
+        {
+            child_active = false; // deactivate
+            child->setParent(nullptr); // detach
+            child.reset();
+        }
+    }
+
+    ISchedulerNode * getChild(const String & child_name) override
+    {
+        if (child->basename == child_name)
+            return child.get();
+        else
+            return nullptr;
+    }
+
+    std::pair<ResourceRequest *, bool> dequeueRequest() override
+    {
+        // Dequeue request from the child
+        auto [request, child_now_active] = child->dequeueRequest();
+        if (!request)
+            return {nullptr, false};
+
+        // Request has reference to the first (closest to leaf) `constraint`, which can have `parent_constraint`.
+        // The former is initialized here dynamically and the latter is initialized once during hierarchy construction.
+        if (!request->constraint)
+            request->constraint = this;
+
+        updateBucket(request->cost);
+
+        child_active = child_now_active;
+        if (!active())
+            busy_periods++;
+        dequeued_requests++;
+        dequeued_cost += request->cost;
+        return {request, active()};
+    }
+
+    void finishRequest(ResourceRequest * request) override
+    {
+        // Recursive traverse of parent flow controls in reverse order
+        if (parent_constraint)
+            parent_constraint->finishRequest(request);
+
+        // NOTE: Token-bucket constraint does not require any action when consumption ends
+    }
+
+    void activateChild(ISchedulerNode * child_) override
+    {
+        if (child_ == child.get())
+            if (!std::exchange(child_active, true) && satisfied() && parent)
+                parent->activateChild(this);
+    }
+
+    bool isActive() override
+    {
+        return active();
+    }
+
+    size_t activeChildren() override
+    {
+        return child_active;
+    }
+
+    bool isSatisfied() override
+    {
+        return satisfied();
+    }
+
+    double getTokens() const
+    {
+        return tokens;
+    }
+
+    std::chrono::nanoseconds getThrottlingDuration() const
+    {
+        return throttling_duration;
+    }
+
+    std::pair<double, double> getParams() const
+    {
+        return {max_burst, max_speed};
+    }
+
+private:
+    void onPostponed()
+    {
+        postponed = EventQueue::not_postponed;
+        bool was_active = active();
+        updateBucket();
+        if (!was_active && active())
+            parent->activateChild(this);
+    }
+
+    void updateBucket(ResourceCost use = 0)
+    {
+        auto now = std::chrono::system_clock::now();
+        if (max_speed > 0.0) {
+            double elapsed = std::chrono::nanoseconds(now - last_update).count() / 1e9;
+            tokens = std::min(tokens + max_speed * elapsed - use, max_burst);
+
+            // Postpone activation until there is positive amount of tokens
+            if (tokens < 0.0)
+            {
+                auto delay_ns = std::chrono::nanoseconds(static_cast<Int64>(-tokens / max_speed * 1e9));
+                if (postponed == EventQueue::not_postponed)
+                {
+                    postponed = event_queue->postpone(std::chrono::time_point_cast<std::chrono::microseconds>(now + delay_ns), [this] { onPostponed(); });
+                    throttling_duration += delay_ns;
+                }
+            }
+        }
+        last_update = now;
+    }
+
+    bool satisfied() const
+    {
+        return tokens >= 0.0;
+    }
+
+    bool active() const
+    {
+        return satisfied() && child_active;
+    }
+
+private:
+    const double max_burst{0}; /// in tokens
+    const double max_speed{0}; /// in tokens per second
+
+    EventQueue::TimePoint last_update;
+    UInt64 postponed = EventQueue::not_postponed;
+    double tokens; /// in ResourceCost units
+    bool child_active = false;
+
+    std::chrono::nanoseconds throttling_duration;
+
+    SchedulerNodePtr child;
+};
+
+}
diff --git a/src/IO/SchedulerRoot.h b/src/IO/SchedulerRoot.h
index 748632615bc..7ce279f9ee6 100644
--- a/src/IO/SchedulerRoot.h
+++ b/src/IO/SchedulerRoot.h
@@ -79,7 +79,7 @@ public:
             scheduler.join();
             if (graceful)
             {
-                // Do the same cycle as schedulerThread() but never block, just exit instead
+                // Do the same cycle as schedulerThread() but never block or wait postponed events, just proces
                 bool has_work = true;
                 while (has_work)
                 {
@@ -88,7 +88,7 @@ public:
                         execute(request);
                     else
                         has_work = false;
-                    while (events.tryProcess())
+                    while (events.forceProcess())
                         has_work = true;
                 }
             }
diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp
index 9f4307fca3a..9eb4ce82c54 100644
--- a/src/Storages/System/StorageSystemScheduler.cpp
+++ b/src/Storages/System/StorageSystemScheduler.cpp
@@ -9,6 +9,7 @@
 #include <IO/Resource/FairPolicy.h>
 #include <IO/Resource/PriorityPolicy.h>
 #include <IO/Resource/SemaphoreConstraint.h>
+#include <IO/Resource/ThrottlerConstraint.h>
 #include <IO/Resource/FifoQueue.h>
 #include <Interpreters/Context.h>
 #include "IO/ResourceRequest.h"
@@ -40,6 +41,10 @@ NamesAndTypesList StorageSystemScheduler::getNamesAndTypes()
         {"inflight_cost", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>())},
         {"max_requests", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>())},
         {"max_cost", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>())},
+        {"max_burst", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())},
+        {"max_speed", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())},
+        {"throttling_us", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>())},
+        {"tokens", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())},
     };
     return names_and_types;
 }
@@ -71,6 +76,10 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c
         Field inflight_cost;
         Field max_requests;
         Field max_cost;
+        Field max_burst;
+        Field max_speed;
+        Field throttling_us;
+        Field tokens;
 
         if (auto * parent = dynamic_cast<FairPolicy *>(node->parent))
         {
@@ -90,6 +99,12 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c
             std::tie(inflight_requests, inflight_cost) = ptr->getInflights();
             std::tie(max_requests, max_cost) = ptr->getLimits();
         }
+        if (auto * ptr = dynamic_cast<ThrottlerConstraint *>(node.get()))
+        {
+            std::tie(max_burst, max_speed) = ptr->getParams();
+            throttling_us = ptr->getThrottlingDuration().count() / 1000;
+            tokens = ptr->getTokens();
+        }
 
         res_columns[i++]->insert(vruntime);
         res_columns[i++]->insert(system_vruntime);
@@ -101,6 +116,10 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c
         res_columns[i++]->insert(inflight_cost);
         res_columns[i++]->insert(max_requests);
         res_columns[i++]->insert(max_cost);
+        res_columns[i++]->insert(max_burst);
+        res_columns[i++]->insert(max_speed);
+        res_columns[i++]->insert(throttling_us);
+        res_columns[i++]->insert(tokens);
     });
 }
 

From f3e6ba9bd1f389ec4efe6d43717b5ee763c7df76 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 14 Sep 2023 09:17:55 +0000
Subject: [PATCH 038/243] fix style

---
 src/IO/ISchedulerNode.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/IO/ISchedulerNode.h b/src/IO/ISchedulerNode.h
index 1dc19e5b18a..57d7b1c70e8 100644
--- a/src/IO/ISchedulerNode.h
+++ b/src/IO/ISchedulerNode.h
@@ -161,7 +161,8 @@ public:
     bool forceProcess()
     {
         std::unique_lock lock{mutex};
-        if (!queue.empty()) {
+        if (!queue.empty())
+        {
             processQueue(lock);
             return true;
         }
@@ -178,7 +179,8 @@ public:
     bool tryProcess()
     {
         std::unique_lock lock{mutex};
-        if (!queue.empty()) {
+        if (!queue.empty())
+        {
             processQueue(lock);
             return true;
         }
@@ -199,10 +201,10 @@ public:
     void process()
     {
         std::unique_lock lock{mutex};
-        while (true) {
-            if (!queue.empty()) {
+        while (true)
+        {
+            if (!queue.empty())
                 return processQueue(lock);
-            }
             if (postponed.empty())
                 pending.wait(lock);
             else

From 2ab922777aff0010f0a89024f4b4847cd2b1b88a Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 14 Sep 2023 09:21:23 +0000
Subject: [PATCH 039/243] fix typos

---
 src/IO/SchedulerRoot.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/SchedulerRoot.h b/src/IO/SchedulerRoot.h
index 7ce279f9ee6..68aa14a289d 100644
--- a/src/IO/SchedulerRoot.h
+++ b/src/IO/SchedulerRoot.h
@@ -79,7 +79,7 @@ public:
             scheduler.join();
             if (graceful)
             {
-                // Do the same cycle as schedulerThread() but never block or wait postponed events, just proces
+                // Do the same cycle as schedulerThread() but never block or wait postponed events
                 bool has_work = true;
                 while (has_work)
                 {

From 8b00e29ce9eac8e6b1eb26559aa625a1887b5f5e Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 14 Sep 2023 10:40:41 +0000
Subject: [PATCH 040/243] fix style

---
 src/IO/Resource/ThrottlerConstraint.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/IO/Resource/ThrottlerConstraint.h b/src/IO/Resource/ThrottlerConstraint.h
index 97f544dc787..48ae60228b0 100644
--- a/src/IO/Resource/ThrottlerConstraint.h
+++ b/src/IO/Resource/ThrottlerConstraint.h
@@ -151,7 +151,8 @@ private:
     void updateBucket(ResourceCost use = 0)
     {
         auto now = std::chrono::system_clock::now();
-        if (max_speed > 0.0) {
+        if (max_speed > 0.0)
+        {
             double elapsed = std::chrono::nanoseconds(now - last_update).count() / 1e9;
             tokens = std::min(tokens + max_speed * elapsed - use, max_burst);
 
@@ -161,7 +162,8 @@ private:
                 auto delay_ns = std::chrono::nanoseconds(static_cast<Int64>(-tokens / max_speed * 1e9));
                 if (postponed == EventQueue::not_postponed)
                 {
-                    postponed = event_queue->postpone(std::chrono::time_point_cast<std::chrono::microseconds>(now + delay_ns), [this] { onPostponed(); });
+                    postponed = event_queue->postpone(std::chrono::time_point_cast<std::chrono::system_clock::duration>(now + delay_ns),
+                        [this] { onPostponed(); });
                     throttling_duration += delay_ns;
                 }
             }

From f427ca771e60953209918eb3fe009c54f50d8444 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 14 Sep 2023 12:41:43 +0200
Subject: [PATCH 041/243] Fix optimize read in order for views.

---
 src/Functions/materialize.h                   |  9 ++++++++
 ...optimize_read_in_order_from_view.reference | 21 +++++++++++++++++++
 ...02877_optimize_read_in_order_from_view.sql | 21 +++++++++++++++++++
 3 files changed, 51 insertions(+)
 create mode 100644 tests/queries/0_stateless/02877_optimize_read_in_order_from_view.reference
 create mode 100644 tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sql

diff --git a/src/Functions/materialize.h b/src/Functions/materialize.h
index aab4e5bdbdf..73bfdec48ab 100644
--- a/src/Functions/materialize.h
+++ b/src/Functions/materialize.h
@@ -52,6 +52,15 @@ public:
     {
         return arguments[0].column->convertToFullColumnIfConst();
     }
+
+    bool hasInformationAboutMonotonicity() const override { return true; }
+
+    Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
+    {
+        /// Depending on the argument the function materialize() is either a constant or works as identity().
+        /// In both cases this function is monotonic and non-decreasing.
+        return {.is_monotonic = true, .is_always_monotonic = true};
+    }
 };
 
 }
diff --git a/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.reference b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.reference
new file mode 100644
index 00000000000..05893173f2b
--- /dev/null
+++ b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.reference
@@ -0,0 +1,21 @@
+300
+299
+298
+297
+296
+295
+294
+293
+292
+291
+290
+289
+288
+287
+286
+285
+284
+283
+282
+281
+read_rows=ok
diff --git a/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sql b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sql
new file mode 100644
index 00000000000..3397681d0d9
--- /dev/null
+++ b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sql
@@ -0,0 +1,21 @@
+SET optimize_read_in_order=1;
+
+DROP TABLE IF EXISTS view1;
+DROP TABLE IF EXISTS table1;
+
+CREATE TABLE table1 (number UInt64) ENGINE=MergeTree ORDER BY number SETTINGS index_granularity=1;
+INSERT INTO table1 SELECT number FROM numbers(1, 300);
+
+CREATE VIEW view1 AS SELECT number FROM table1;
+
+-- The following SELECT is expected to read 20 rows. In fact it may decide to read more than 20 rows, but not too many anyway.
+-- So we'll check that the number of read rows is less than 40.
+
+SELECT /* test 02877, query 1 */ * FROM (SELECT * FROM view1) ORDER BY number DESC LIMIT 20 SETTINGS log_queries=1;
+
+SYSTEM FLUSH LOGS;
+
+SELECT concat('read_rows=', if(read_rows<40, 'ok', toString(read_rows))) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%test 02877, query 1%' AND type='QueryFinish';
+
+DROP TABLE view1;
+DROP TABLE table1;

From 1eec898274d2a01538111e47106674f4b1ece108 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 14 Sep 2023 14:36:09 +0200
Subject: [PATCH 042/243] Add test.

---
 .../02876_sort_union_of_sorted.reference      | 63 +++++++++++++++++++
 .../02876_sort_union_of_sorted.sql            | 20 ++++++
 2 files changed, 83 insertions(+)
 create mode 100644 tests/queries/0_stateless/02876_sort_union_of_sorted.reference
 create mode 100644 tests/queries/0_stateless/02876_sort_union_of_sorted.sql

diff --git a/tests/queries/0_stateless/02876_sort_union_of_sorted.reference b/tests/queries/0_stateless/02876_sort_union_of_sorted.reference
new file mode 100644
index 00000000000..f3af221a036
--- /dev/null
+++ b/tests/queries/0_stateless/02876_sort_union_of_sorted.reference
@@ -0,0 +1,63 @@
+1..20:
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+20..1:
+20
+19
+18
+17
+16
+15
+14
+13
+12
+11
+10
+9
+8
+7
+6
+5
+4
+3
+2
+1
+20..1:
+20
+19
+18
+17
+16
+15
+14
+13
+12
+11
+10
+9
+8
+7
+6
+5
+4
+3
+2
+1
diff --git a/tests/queries/0_stateless/02876_sort_union_of_sorted.sql b/tests/queries/0_stateless/02876_sort_union_of_sorted.sql
new file mode 100644
index 00000000000..23d3772bc82
--- /dev/null
+++ b/tests/queries/0_stateless/02876_sort_union_of_sorted.sql
@@ -0,0 +1,20 @@
+DROP TABLE IF EXISTS table1;
+DROP TABLE IF EXISTS table2;
+
+CREATE TABLE table1 (number UInt64) ENGINE=MergeTree ORDER BY tuple();
+CREATE TABLE table2 (number UInt64) ENGINE=MergeTree ORDER BY tuple();
+
+INSERT INTO table1 SELECT number FROM numbers_mt(1, 10);
+INSERT INTO table2 SELECT number FROM numbers_mt(11, 10);
+
+SELECT '1..20:';
+SELECT * FROM ((SELECT * FROM table1 ORDER BY number) UNION ALL (SELECT * FROM table2 ORDER BY number)) ORDER BY number;
+
+SELECT '20..1:';
+SELECT * FROM ((SELECT * FROM table1 ORDER BY number) UNION ALL (SELECT * FROM table2 ORDER BY number)) ORDER BY number DESC;
+
+SELECT '20..1:';
+SELECT * FROM ((SELECT * FROM table1 ORDER BY number DESC) UNION ALL (SELECT * FROM table2 ORDER BY number DESC)) ORDER BY number DESC;
+
+DROP TABLE table1;
+DROP TABLE table2;

From 9a0e1ef5929ca0c734af14999c5c7db7d25d5f03 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Thu, 14 Sep 2023 14:36:41 +0200
Subject: [PATCH 043/243] Fix sorting of UNION ALL of already sorted results.

---
 src/Processors/QueryPlan/IQueryPlanStep.h    | 28 ++++++++++++++++++++
 src/Processors/QueryPlan/ITransformingStep.h | 15 ++---------
 src/Processors/QueryPlan/JoinStep.cpp        | 22 ++++-----------
 src/Processors/QueryPlan/JoinStep.h          |  4 ++-
 src/Processors/QueryPlan/QueryPlan.cpp       | 20 +++++++++-----
 src/Processors/QueryPlan/UnionStep.cpp       | 10 +++----
 src/Processors/QueryPlan/UnionStep.h         |  4 ++-
 7 files changed, 59 insertions(+), 44 deletions(-)

diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h
index a608c6f8058..df5c13a7f3b 100644
--- a/src/Processors/QueryPlan/IQueryPlanStep.h
+++ b/src/Processors/QueryPlan/IQueryPlanStep.h
@@ -16,6 +16,11 @@ using Processors = std::vector<ProcessorPtr>;
 
 namespace JSONBuilder { class JSONMap; }
 
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
 /// Description of data stream.
 /// Single logical data stream may relate to many ports of pipeline.
 class DataStream
@@ -107,7 +112,30 @@ public:
     /// Append extra processors for this step.
     void appendExtraProcessors(const Processors & extra_processors);
 
+    /// Updates the input streams of the given step. Used during query plan optimizations.
+    /// It won't do any validation of new streams, so it is your responsibility to ensure that this update doesn't break anything
+    /// (e.g. you update data stream traits or correctly remove / add columns).
+    void updateInputStreams(DataStreams input_streams_)
+    {
+        chassert(canUpdateInputStream());
+        input_streams = std::move(input_streams_);
+        updateOutputStream();
+    }
+
+    void updateInputStream(DataStream input_stream) { updateInputStreams(DataStreams{input_stream}); }
+
+    void updateInputStream(DataStream input_stream, size_t idx)
+    {
+        chassert(canUpdateInputStream() && idx < input_streams.size());
+        input_streams[idx] = input_stream;
+        updateOutputStream();
+    }
+
+    virtual bool canUpdateInputStream() const { return false; }
+
 protected:
+    virtual void updateOutputStream() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); }
+
     DataStreams input_streams;
     std::optional<DataStream> output_stream;
 
diff --git a/src/Processors/QueryPlan/ITransformingStep.h b/src/Processors/QueryPlan/ITransformingStep.h
index 77de668fbdb..32bf3b6af90 100644
--- a/src/Processors/QueryPlan/ITransformingStep.h
+++ b/src/Processors/QueryPlan/ITransformingStep.h
@@ -55,17 +55,6 @@ public:
     const TransformTraits & getTransformTraits() const { return transform_traits; }
     const DataStreamTraits & getDataStreamTraits() const { return data_stream_traits; }
 
-    /// Updates the input stream of the given step. Used during query plan optimizations.
-    /// It won't do any validation of a new stream, so it is your responsibility to ensure that this update doesn't break anything
-    /// (e.g. you update data stream traits or correctly remove / add columns).
-    void updateInputStream(DataStream input_stream)
-    {
-        input_streams.clear();
-        input_streams.emplace_back(std::move(input_stream));
-
-        updateOutputStream();
-    }
-
     void describePipeline(FormatSettings & settings) const override;
 
     /// Enforcement is supposed to be done through the special settings that will be taken into account by remote nodes during query planning (e.g. force_aggregation_in_order).
@@ -75,6 +64,8 @@ public:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented");
     }
 
+    bool canUpdateInputStream() const override { return true; }
+
 protected:
     /// Create output stream from header and traits.
     static DataStream createOutputStream(
@@ -85,8 +76,6 @@ protected:
     TransformTraits transform_traits;
 
 private:
-    virtual void updateOutputStream() = 0;
-
     /// If we should collect processors got after pipeline transformation.
     bool collect_processors;
 
diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp
index 63a5eeb51d2..9ac0f18d2c3 100644
--- a/src/Processors/QueryPlan/JoinStep.cpp
+++ b/src/Processors/QueryPlan/JoinStep.cpp
@@ -24,11 +24,7 @@ JoinStep::JoinStep(
     bool keep_left_read_in_order_)
     : join(std::move(join_)), max_block_size(max_block_size_), max_streams(max_streams_), keep_left_read_in_order(keep_left_read_in_order_)
 {
-    input_streams = {left_stream_, right_stream_};
-    output_stream = DataStream
-    {
-        .header = JoiningTransform::transformHeader(left_stream_.header, join),
-    };
+    updateInputStreams(DataStreams{left_stream_, right_stream_});
 }
 
 QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &)
@@ -95,20 +91,12 @@ void JoinStep::describeActions(JSONBuilder::JSONMap & map) const
         map.add("Clauses", table_join.formatClauses(table_join.getClauses(), true /*short_format*/));
 }
 
-void JoinStep::updateInputStream(const DataStream & new_input_stream_, size_t idx)
+void JoinStep::updateOutputStream()
 {
-    if (idx == 0)
+    output_stream = DataStream
     {
-        input_streams = {new_input_stream_, input_streams.at(1)};
-        output_stream = DataStream
-        {
-            .header = JoiningTransform::transformHeader(new_input_stream_.header, join),
-        };
-    }
-    else
-    {
-        input_streams = {input_streams.at(0), new_input_stream_};
-    }
+        .header = JoiningTransform::transformHeader(input_streams[0].header, join),
+    };
 }
 
 static ITransformingStep::Traits getStorageJoinTraits()
diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h
index 369ee9bec8b..5bfaa41f9b6 100644
--- a/src/Processors/QueryPlan/JoinStep.h
+++ b/src/Processors/QueryPlan/JoinStep.h
@@ -33,9 +33,11 @@ public:
     const JoinPtr & getJoin() const { return join; }
     bool allowPushDownToRight() const;
 
-    void updateInputStream(const DataStream & new_input_stream_, size_t idx);
+    bool canUpdateInputStream() const override { return true; }
 
 private:
+    void updateOutputStream() override;
+
     JoinPtr join;
     size_t max_block_size;
     size_t max_streams;
diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp
index 8054209c1c3..2d2dc66a8c9 100644
--- a/src/Processors/QueryPlan/QueryPlan.cpp
+++ b/src/Processors/QueryPlan/QueryPlan.cpp
@@ -455,16 +455,24 @@ static void updateDataStreams(QueryPlan::Node & root)
 
         static bool visitTopDownImpl(QueryPlan::Node * /*current_node*/, QueryPlan::Node * /*parent_node*/) { return true; }
 
-        static void visitBottomUpImpl(QueryPlan::Node * current_node, QueryPlan::Node * parent_node)
+        static void visitBottomUpImpl(QueryPlan::Node * current_node, QueryPlan::Node * /*parent_node*/)
         {
-            if (!parent_node || parent_node->children.size() != 1)
+            auto & current_step = *current_node->step;
+            if (!current_step.canUpdateInputStream() || current_node->children.empty())
                 return;
 
-            if (!current_node->step->hasOutputStream())
-                return;
+            for (const auto * child : current_node->children)
+            {
+                if (!child->step->hasOutputStream())
+                    return;
+            }
 
-            if (auto * parent_transform_step = dynamic_cast<ITransformingStep *>(parent_node->step.get()); parent_transform_step)
-                parent_transform_step->updateInputStream(current_node->step->getOutputStream());
+            DataStreams streams;
+            streams.reserve(current_node->children.size());
+            for (const auto * child : current_node->children)
+                streams.emplace_back(child->step->getOutputStream());
+
+            current_step.updateInputStreams(std::move(streams));
         }
     };
 
diff --git a/src/Processors/QueryPlan/UnionStep.cpp b/src/Processors/QueryPlan/UnionStep.cpp
index e111890a833..dde12271de1 100644
--- a/src/Processors/QueryPlan/UnionStep.cpp
+++ b/src/Processors/QueryPlan/UnionStep.cpp
@@ -30,18 +30,16 @@ UnionStep::UnionStep(DataStreams input_streams_, size_t max_threads_)
     : header(checkHeaders(input_streams_))
     , max_threads(max_threads_)
 {
-    input_streams = std::move(input_streams_);
+    updateInputStreams(std::move(input_streams_));
+}
 
+void UnionStep::updateOutputStream()
+{
     if (input_streams.size() == 1)
         output_stream = input_streams.front();
     else
         output_stream = DataStream{.header = header};
 
-    updateOutputSortDescription();
-}
-
-void UnionStep::updateOutputSortDescription()
-{
     SortDescription common_sort_description = input_streams.front().sort_description;
     DataStream::SortScope sort_scope = input_streams.front().sort_scope;
     for (const auto & input_stream : input_streams)
diff --git a/src/Processors/QueryPlan/UnionStep.h b/src/Processors/QueryPlan/UnionStep.h
index 6278de07673..4ab08785b01 100644
--- a/src/Processors/QueryPlan/UnionStep.h
+++ b/src/Processors/QueryPlan/UnionStep.h
@@ -19,9 +19,11 @@ public:
 
     size_t getMaxThreads() const { return max_threads; }
 
-    void updateOutputSortDescription();
+    bool canUpdateInputStream() const override { return true; }
 
 private:
+    void updateOutputStream() override;
+
     Block header;
     size_t max_threads;
 };

From ca57eb95466b3532bae295fda9e42004d9e88301 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Thu, 14 Sep 2023 15:55:37 +0200
Subject: [PATCH 044/243] Fix addData function

---
 src/Functions/FunctionsOpDate.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Functions/FunctionsOpDate.cpp b/src/Functions/FunctionsOpDate.cpp
index 0d8ca2b58cc..dcbc1fc1256 100644
--- a/src/Functions/FunctionsOpDate.cpp
+++ b/src/Functions/FunctionsOpDate.cpp
@@ -53,7 +53,6 @@ public:
     }
 
     bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 2}; }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {

From 7643e216bead8c98d742427869cd121655cbdd28 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Thu, 14 Sep 2023 15:58:00 +0200
Subject: [PATCH 045/243] Better test

---
 tests/queries/0_stateless/02834_add_sub_date_functions.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02834_add_sub_date_functions.sql b/tests/queries/0_stateless/02834_add_sub_date_functions.sql
index 44d9bb8a2aa..9c77ba7e378 100644
--- a/tests/queries/0_stateless/02834_add_sub_date_functions.sql
+++ b/tests/queries/0_stateless/02834_add_sub_date_functions.sql
@@ -1,6 +1,6 @@
 SET session_timezone = 'UTC';
 
-SELECT ADDDATE('2022-05-07'::Date, INTERVAL 5 MINUTE);
+SELECT ADDDATE(materialize('2022-05-07'::Date), INTERVAL 5 MINUTE);
 
 SELECT addDate('2022-05-07'::Date, INTERVAL 5 MINUTE);
 SELECT addDate('2022-05-07'::Date32, INTERVAL 5 MINUTE);
@@ -14,7 +14,7 @@ SELECT addDate('1234', INTERVAL 5 MINUTE);  -- { serverError ILLEGAL_TYPE_OF_ARG
 
 SELECT '---';
 
-SELECT SUBDATE('2022-05-07'::Date, INTERVAL 5 MINUTE);
+SELECT SUBDATE(materialize('2022-05-07'::Date), INTERVAL 5 MINUTE);
 
 SELECT subDate('2022-05-07'::Date, INTERVAL 5 MINUTE);
 SELECT subDate('2022-05-07'::Date32, INTERVAL 5 MINUTE);

From 52de64c2bd019ae9c06a6a3aa1c770f4dcd0946c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 14 Sep 2023 18:30:12 +0200
Subject: [PATCH 046/243] remove unneeded code

---
 src/Storages/StorageReplicatedMergeTree.cpp | 22 ++++++++-------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 7c7e6dbd42c..e56055a717a 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -9033,7 +9033,7 @@ namespace
 /// But sometimes we need an opposite. When we deleting all_0_0_0_1 it can be non replicated to other replicas, so we are the only owner of this part.
 /// In this case when we will drop all_0_0_0_1 we will drop blobs for all_0_0_0. But it will lead to dataloss. For such case we need to check that other replicas
 /// still need parent part.
-std::pair<bool, std::optional<NameSet>> getParentLockedBlobs(const ZooKeeperWithFaultInjectionPtr & zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const MergeTreePartInfo & part_info, MergeTreeDataFormatVersion format_version, Poco::Logger * log)
+std::pair<bool, NameSet> getParentLockedBlobs(const ZooKeeperWithFaultInjectionPtr & zookeeper_ptr, const std::string & zero_copy_part_path_prefix, const MergeTreePartInfo & part_info, MergeTreeDataFormatVersion format_version, Poco::Logger * log)
 {
     NameSet files_not_to_remove;
 
@@ -9078,8 +9078,9 @@ std::pair<bool, std::optional<NameSet>> getParentLockedBlobs(const ZooKeeperWith
             zookeeper_ptr->tryGet(fs::path(zero_copy_part_path_prefix) / part_candidate_info_str, files_not_to_remove_str, nullptr, nullptr, &code);
             if (code != Coordination::Error::ZOK)
             {
-                LOG_TRACE(log, "Cannot get parent files from ZooKeeper on path ({}), error {}", (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), code);
-                return {true, std::nullopt};
+                LOG_INFO(log, "Cannot get parent files from ZooKeeper on path ({}), error {}, assuming the parent was removed concurrently",
+                            (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), code);
+                continue;
             }
 
             if (!files_not_to_remove_str.empty())
@@ -9093,8 +9094,9 @@ std::pair<bool, std::optional<NameSet>> getParentLockedBlobs(const ZooKeeperWith
                 code = zookeeper_ptr->tryGetChildren(fs::path(zero_copy_part_path_prefix) / part_candidate_info_str, children);
                 if (code != Coordination::Error::ZOK)
                 {
-                    LOG_TRACE(log, "Cannot get parent locks in ZooKeeper on path ({}), error {}", (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), errorMessage(code));
-                    return {true, std::nullopt};
+                    LOG_INFO(log, "Cannot get parent locks in ZooKeeper on path ({}), error {}, assuming the parent was removed concurrently",
+                              (fs::path(zero_copy_part_path_prefix) / part_candidate_info_str).string(), errorMessage(code));
+                    continue;
                 }
 
                 if (children.size() > 1 || (children.size() == 1 && children[0] != ZeroCopyLock::ZERO_COPY_LOCK_NAME))
@@ -9150,15 +9152,7 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
         auto [has_parent, parent_not_to_remove] = getParentLockedBlobs(
             zookeeper_ptr, fs::path(zc_zookeeper_path).parent_path(), part_info, data_format_version, logger);
 
-        // parent_not_to_remove == std::nullopt means that we were unable to retrieve parts set
-        if (has_parent && parent_not_to_remove == std::nullopt)
-        {
-            LOG_TRACE(logger, "Failed to get mutation parent on {} for part {}, refusing to remove blobs", zookeeper_part_replica_node, part_name);
-            return {false, {}};
-        }
-
-        files_not_to_remove.insert(parent_not_to_remove->begin(), parent_not_to_remove->end());
-
+        files_not_to_remove.insert(parent_not_to_remove.begin(), parent_not_to_remove.end());
 
         LOG_TRACE(logger, "Remove zookeeper lock {} for part {}", zookeeper_part_replica_node, part_name);
 

From c422a8f0dc14269041aba360ea0ac645e2a635e1 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 13 Sep 2023 13:41:02 +0000
Subject: [PATCH 047/243] Cosmetics

---
 docs/en/sql-reference/data-types/array.md     |  2 +-
 .../MergeTree/MergeTreeIndexAnnoy.cpp         | 26 +++++++++----------
 .../MergeTree/MergeTreeIndexUSearch.cpp       | 24 ++++++++---------
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md
index 20ce7d2ed52..0ee7c8de93c 100644
--- a/docs/en/sql-reference/data-types/array.md
+++ b/docs/en/sql-reference/data-types/array.md
@@ -4,7 +4,7 @@ sidebar_position: 52
 sidebar_label: Array(T)
 ---
 
-# Array(t)
+# Array(T)
 
 An array of `T`-type items, with the starting array index as 1. `T` can be any data type, including an array.
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
index 3ad4f81716e..15830513162 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
@@ -154,36 +154,36 @@ void MergeTreeIndexAggregatorAnnoy<Distance>::update(const Block & block, size_t
 
     if (const auto & column_array = typeid_cast<const ColumnArray *>(column_cut.get()))
     {
-        const auto & data = column_array->getData();
-        const auto & array = typeid_cast<const ColumnFloat32 &>(data).getData();
+        const auto & column_array_data = column_array->getData();
+        const auto & column_arary_data_float_data = typeid_cast<const ColumnFloat32 &>(column_array_data).getData();
 
-        if (array.empty())
+        if (column_arary_data_float_data.empty())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read);
 
-        const auto & offsets = column_array->getOffsets();
-        const size_t num_rows = offsets.size();
+        const auto & column_array_offsets = column_array->getOffsets();
+        const size_t num_rows = column_array_offsets.size();
 
         /// Check all sizes are the same
-        size_t size = offsets[0];
+        size_t dimension = column_array_offsets[0];
         for (size_t i = 0; i < num_rows - 1; ++i)
-            if (offsets[i + 1] - offsets[i] != size)
+            if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension)
                 throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name);
 
         if (!index)
-            index = std::make_shared<AnnoyIndexWithSerialization<Distance>>(size);
+            index = std::make_shared<AnnoyIndexWithSerialization<Distance>>(dimension);
 
         /// Add all rows of block
-        index->add_item(index->get_n_items(), array.data());
+        index->add_item(index->get_n_items(), column_arary_data_float_data.data());
         for (size_t current_row = 1; current_row < num_rows; ++current_row)
-            index->add_item(index->get_n_items(), &array[offsets[current_row - 1]]);
+            index->add_item(index->get_n_items(), &column_arary_data_float_data[column_array_offsets[current_row - 1]]);
     }
     else if (const auto & column_tuple = typeid_cast<const ColumnTuple *>(column_cut.get()))
     {
-        const auto & columns = column_tuple->getColumns();
+        const auto & column_tuple_columns = column_tuple->getColumns();
 
         /// TODO check if calling index->add_item() directly on the block's tuples is faster than materializing everything
-        std::vector<std::vector<Float32>> data{column_tuple->size(), std::vector<Float32>()};
-        for (const auto & column : columns)
+        std::vector<std::vector<Float32>> data(column_tuple->size(), std::vector<Float32>());
+        for (const auto & column : column_tuple_columns)
         {
             const auto & pod_array = typeid_cast<const ColumnFloat32 *>(column.get())->getData();
             for (size_t i = 0; i < pod_array.size(); ++i)
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index 1ab85e6bbaf..de556eb7e07 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -173,23 +173,23 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
 
     if (const auto & column_array = typeid_cast<const ColumnArray *>(column_cut.get()))
     {
-        const auto & data = column_array->getData();
-        const auto & array = typeid_cast<const ColumnFloat32 &>(data).getData();
+        const auto & column_array_data = column_array->getData();
+        const auto & column_array_data_float_data = typeid_cast<const ColumnFloat32 &>(column_array_data).getData();
 
-        if (array.empty())
+        if (column_array_data_float_data.empty())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read);
 
-        const auto & offsets = column_array->getOffsets();
-        const size_t num_rows = offsets.size();
+        const auto & column_array_offsets = column_array->getOffsets();
+        const size_t num_rows = column_array_offsets.size();
 
         /// Check all sizes are the same
-        size_t size = offsets[0];
+        size_t dimension = column_array_offsets[0];
         for (size_t i = 0; i < num_rows - 1; ++i)
-            if (offsets[i + 1] - offsets[i] != size)
+            if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension)
                 throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name);
 
         if (!index)
-            index = std::make_shared<USearchIndexWithSerialization<Metric>>(size, scalar_kind);
+            index = std::make_shared<USearchIndexWithSerialization<Metric>>(dimension, scalar_kind);
 
         /// Add all rows of block
         if (!index->reserve(unum::usearch::ceil2(index->size() + num_rows)))
@@ -197,7 +197,7 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
 
         for (size_t current_row = 0; current_row < num_rows; ++current_row)
         {
-            auto rc = index->add(static_cast<uint32_t>(index->size()), &array[offsets[current_row - 1]]);
+            auto rc = index->add(static_cast<uint32_t>(index->size()), &column_array_data_float_data[column_array_offsets[current_row - 1]]);
             if (!rc)
                 throw Exception(ErrorCodes::INCORRECT_DATA, rc.error.release());
 
@@ -208,9 +208,9 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
     }
     else if (const auto & column_tuple = typeid_cast<const ColumnTuple *>(column_cut.get()))
     {
-        const auto & columns = column_tuple->getColumns();
-        std::vector<std::vector<Float32>> data{column_tuple->size(), std::vector<Float32>()};
-        for (const auto & column : columns)
+        const auto & column_tuple_columns = column_tuple->getColumns();
+        std::vector<std::vector<Float32>> data(column_tuple->size(), std::vector<Float32>());
+        for (const auto & column : column_tuple_columns)
         {
             const auto & pod_array = typeid_cast<const ColumnFloat32 *>(column.get())->getData();
             for (size_t i = 0; i < pod_array.size(); ++i)

From 945179be46bcc1b07741d180a0fdaa64396994ef Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 13 Sep 2023 14:23:09 +0000
Subject: [PATCH 048/243] Annoy: Fix LOGICAL_ERROR with default values #52258

---
 .../table-engines/mergetree-family/annindexes.md |  8 +++++---
 src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp   | 15 +++++++++++----
 src/Storages/MergeTree/MergeTreeIndexUSearch.cpp | 16 ++++++++++++----
 .../0_stateless/02354_annoy_index.reference      |  1 +
 tests/queries/0_stateless/02354_annoy_index.sql  | 12 ++++++++++++
 .../0_stateless/02354_usearch_index.reference    |  1 +
 .../queries/0_stateless/02354_usearch_index.sql  | 14 ++++++++++++++
 7 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md
index 8996133f667..d6ff7f23bb4 100644
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@@ -203,9 +203,10 @@ Parameter `NumTrees` is the number of trees which the algorithm creates (default
 more accurate search results but slower index creation / query times (approximately linearly) as well as larger index sizes.
 
 :::note
-Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays **must** have same length. Use
-[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1
-CHECK length(vectors) = 256`.
+Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays must have same length. To avoid
+errors, you can use a [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT
+constraint_name_1 CHECK length(vectors) = 256`. Also, unspecified `Array` values in INSERT statements (i.e. default values) are not
+supported.
 :::
 
 Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger
@@ -223,6 +224,7 @@ SETTINGS annoy_index_search_k_nodes=100;
 The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
 [here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
 :::
+
 ## USearch {#usearch}
 
 This type of ANN index is based on the [the USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
index 15830513162..f00f11359e1 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
@@ -157,18 +157,25 @@ void MergeTreeIndexAggregatorAnnoy<Distance>::update(const Block & block, size_t
         const auto & column_array_data = column_array->getData();
         const auto & column_arary_data_float_data = typeid_cast<const ColumnFloat32 &>(column_array_data).getData();
 
-        if (column_arary_data_float_data.empty())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read);
-
         const auto & column_array_offsets = column_array->getOffsets();
         const size_t num_rows = column_array_offsets.size();
 
+        /// The index dimension is inferred from the inserted arrays (array cardinality). If no value was specified in the INSERT statement
+        /// for the annoy-indexed column (i.e. default value), we have a problem. Reject such values.
+        if (column_array_offsets.empty() || column_array_offsets[0] == 0)
+            /// (The if condition is a bit weird but I have seen either with default values)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Tried to insert {} rows into Annoy index but there were no values to insert. Likely, the INSERT used default values - these are not supported for Annoy.", rows_read);
+
         /// Check all sizes are the same
         size_t dimension = column_array_offsets[0];
         for (size_t i = 0; i < num_rows - 1; ++i)
             if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension)
                 throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name);
 
+        /// Also check that previously inserted blocks have the same size as this block
+        if (index && index->getDimensions() != dimension)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name);
+
         if (!index)
             index = std::make_shared<AnnoyIndexWithSerialization<Distance>>(dimension);
 
@@ -363,7 +370,7 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */)
     {
         throw Exception(
             ErrorCodes::ILLEGAL_COLUMN,
-            "Annoy indexes can only be created on columns of type Array(Float32) and Tuple(Float32)");
+            "Annoy indexes can only be created on columns of type Array(Float32) and Tuple(Float32[, Float32[, ...]])");
     };
 
     DataTypePtr data_type = index.sample_block.getDataTypes()[0];
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index de556eb7e07..9531b9188bf 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -176,18 +176,25 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
         const auto & column_array_data = column_array->getData();
         const auto & column_array_data_float_data = typeid_cast<const ColumnFloat32 &>(column_array_data).getData();
 
-        if (column_array_data_float_data.empty())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Array has 0 rows, {} rows expected", rows_read);
-
         const auto & column_array_offsets = column_array->getOffsets();
         const size_t num_rows = column_array_offsets.size();
 
+        /// The index dimension is inferred from the inserted arrays (array cardinality). If no value was specified in the INSERT statement
+        /// for the usearch-indexed column (i.e. default value), we have a problem. Reject such values.
+        if (column_array_offsets.empty() || column_array_offsets[0] == 0)
+            /// (The if condition is a bit weird but I have seen either with default values)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Tried to insert {} rows into usearch index but there were no values to insert. Likely, the INSERT used default values - these are not supported for Annoy.", rows_read);
+
         /// Check all sizes are the same
         size_t dimension = column_array_offsets[0];
         for (size_t i = 0; i < num_rows - 1; ++i)
             if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension)
                 throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name);
 
+        /// Also check that previously inserted blocks have the same size as this block
+        if (index && index->getDimensions() != dimension)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name);
+
         if (!index)
             index = std::make_shared<USearchIndexWithSerialization<Metric>>(dimension, scalar_kind);
 
@@ -413,7 +420,8 @@ void usearchIndexValidator(const IndexDescription & index, bool /* attach */)
     auto throw_unsupported_underlying_column_exception = []()
     {
         throw Exception(
-            ErrorCodes::ILLEGAL_COLUMN, "USearch indexes can only be created on columns of type Array(Float32) and Tuple(Float32)");
+            ErrorCodes::ILLEGAL_COLUMN,
+            "USearch can only be created on columns of type Array(Float32) and Tuple(Float32[, Float32[, ...]])");
     };
 
     DataTypePtr data_type = index.sample_block.getDataTypes()[0];
diff --git a/tests/queries/0_stateless/02354_annoy_index.reference b/tests/queries/0_stateless/02354_annoy_index.reference
index a0ffb1e1f7f..81f2ff8aa59 100644
--- a/tests/queries/0_stateless/02354_annoy_index.reference
+++ b/tests/queries/0_stateless/02354_annoy_index.reference
@@ -147,3 +147,4 @@ Expression (Projection)
 9000	[9000,0,0,0]
 1	(1,0,0,0)
 9000	(9000,0,0,0)
+--- Bugs ---
diff --git a/tests/queries/0_stateless/02354_annoy_index.sql b/tests/queries/0_stateless/02354_annoy_index.sql
index eab7a62c5f0..67ef64cc301 100644
--- a/tests/queries/0_stateless/02354_annoy_index.sql
+++ b/tests/queries/0_stateless/02354_annoy_index.sql
@@ -281,3 +281,15 @@ ORDER BY L2Distance(vector, (9000.0, 0.0, 0.0, 0.0))
 LIMIT 1;
 
 DROP TABLE tab;
+
+SELECT '--- Bugs ---';
+
+-- Arrays with default values are rejected, issue #52258
+CREATE TABLE tab (`uuid` String, `vector` Array(Float32), `version` UInt32, INDEX idx vector TYPE annoy()) ENGINE = MergeTree() ORDER BY (uuid);
+INSERT INTO tab (uuid, version) VALUES ('1', 3); -- { serverError INCORRECT_DATA }
+DROP TABLE tab;
+
+-- Tuples with default value work
+CREATE TABLE tab (`uuid` String, `vector` Tuple(Float32, Float32), `version` UInt32, INDEX idx vector TYPE annoy()) ENGINE = MergeTree() ORDER BY (uuid);
+INSERT INTO tab (uuid, version) VALUES ('1', 3); -- works fine
+DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_usearch_index.reference b/tests/queries/0_stateless/02354_usearch_index.reference
index 893a092a386..c2791e99a54 100644
--- a/tests/queries/0_stateless/02354_usearch_index.reference
+++ b/tests/queries/0_stateless/02354_usearch_index.reference
@@ -150,3 +150,4 @@ Expression (Projection)
 1	[0,0,10]
 2	[0,0,10.5]
 3	[0,0,9.5]
+--- Bugs ---
diff --git a/tests/queries/0_stateless/02354_usearch_index.sql b/tests/queries/0_stateless/02354_usearch_index.sql
index e534c91b615..fc2954d6c5d 100644
--- a/tests/queries/0_stateless/02354_usearch_index.sql
+++ b/tests/queries/0_stateless/02354_usearch_index.sql
@@ -274,3 +274,17 @@ SELECT *
 FROM tab
 WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0
 LIMIT 3;
+
+DROP TABLE tab;
+
+SELECT '--- Bugs ---';
+
+-- Arrays with default values are rejected, issue #52258
+CREATE TABLE tab (`uuid` String, `vector` Array(Float32), `version` UInt32, INDEX idx vector TYPE usearch()) ENGINE = MergeTree() ORDER BY (uuid);
+INSERT INTO tab (uuid, version) VALUES ('1', 3); -- { serverError INCORRECT_DATA }
+DROP TABLE tab;
+
+-- Tuples with default value work
+CREATE TABLE tab (`uuid` String, `vector` Tuple(Float32, Float32), `version` UInt32, INDEX idx vector TYPE usearch()) ENGINE = MergeTree() ORDER BY (uuid);
+INSERT INTO tab (uuid, version) VALUES ('1', 3); -- works fine
+DROP TABLE tab;

From 27ee17ab02628113450bf41b3c21cedcd5e141a6 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 12 Sep 2023 18:06:57 +0000
Subject: [PATCH 049/243] Better integer types inference for Int64/UInt64
 fields

---
 src/DataTypes/DataTypeUInt64OrInt64.h         | 13 +++++
 src/DataTypes/DataTypesNumber.h               | 23 +++++++-
 src/DataTypes/FieldToDataType.cpp             | 52 ++-----------------
 src/DataTypes/FieldToDataType.h               | 10 ----
 src/DataTypes/getLeastSupertype.cpp           | 31 +++++++++++
 .../02832_integer_type_inference.reference    |  8 +++
 .../02832_integer_type_inference.sql          |  9 ++++
 7 files changed, 87 insertions(+), 59 deletions(-)
 create mode 100644 src/DataTypes/DataTypeUInt64OrInt64.h

diff --git a/src/DataTypes/DataTypeUInt64OrInt64.h b/src/DataTypes/DataTypeUInt64OrInt64.h
new file mode 100644
index 00000000000..8a11e42cde1
--- /dev/null
+++ b/src/DataTypes/DataTypeUInt64OrInt64.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <DataTypes/DataTypesNumber.h>
+
+namespace DB
+{
+
+class DataTypeUInt64OrInt64 : public DataTypeUInt64
+{
+
+};
+
+}
diff --git a/src/DataTypes/DataTypesNumber.h b/src/DataTypes/DataTypesNumber.h
index 5843086248c..92f01579459 100644
--- a/src/DataTypes/DataTypesNumber.h
+++ b/src/DataTypes/DataTypesNumber.h
@@ -9,10 +9,17 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 template <typename T>
-class DataTypeNumber final : public DataTypeNumberBase<T>
+class DataTypeNumber : public DataTypeNumberBase<T>
 {
 public:
+    DataTypeNumber() = default;
+
     bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); }
 
     bool canBeUsedAsVersion() const override { return true; }
@@ -32,6 +39,20 @@ public:
     {
         return std::make_shared<SerializationNumber<T>>();
     }
+
+    /// Special constructor for unsigned integers that can also fit into signed integer.
+    /// It's used for better type inference from fields.
+    /// See getLeastSupertype.cpp::convertUInt64toInt64IfPossible and FieldToDataType.cpp
+    explicit DataTypeNumber(bool unsigned_can_be_signed_) : DataTypeNumberBase<T>(), unsigned_can_be_signed(unsigned_can_be_signed_)
+    {
+        if constexpr (std::is_signed_v<T>)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "DataTypeNumber constructor with bool argument should not be used with signed integers");
+    }
+
+    bool canUnsignedBeSigned() const { return unsigned_can_be_signed; }
+
+private:
+    bool unsigned_can_be_signed = false;
 };
 
 using DataTypeUInt8 = DataTypeNumber<UInt8>;
diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp
index 837aae6753a..0131a3956f4 100644
--- a/src/DataTypes/FieldToDataType.cpp
+++ b/src/DataTypes/FieldToDataType.cpp
@@ -36,6 +36,7 @@ DataTypePtr FieldToDataType<on_error>::operator() (const UInt64 & x) const
     if (x <= std::numeric_limits<UInt8>::max()) return std::make_shared<DataTypeUInt8>();
     if (x <= std::numeric_limits<UInt16>::max()) return std::make_shared<DataTypeUInt16>();
     if (x <= std::numeric_limits<UInt32>::max()) return std::make_shared<DataTypeUInt32>();
+    if (x <= std::numeric_limits<Int64>::max()) return std::make_shared<DataTypeUInt64>(/*unsigned_can_be_signed=*/true);
     return std::make_shared<DataTypeUInt64>();
 }
 
@@ -136,17 +137,8 @@ DataTypePtr FieldToDataType<on_error>::operator() (const Array & x) const
     DataTypes element_types;
     element_types.reserve(x.size());
 
-    bool has_signed_int = false;
-    bool uint64_convert_possible = true;
     for (const Field & elem : x)
-    {
-        DataTypePtr type = applyVisitor(*this, elem);
-        element_types.emplace_back(type);
-        checkUInt64ToIn64Conversion(has_signed_int, uint64_convert_possible, type, elem);
-    }
-
-    if (has_signed_int && uint64_convert_possible)
-        convertUInt64ToInt64IfPossible(element_types);
+        element_types.emplace_back(applyVisitor(*this, elem));
 
     return std::make_shared<DataTypeArray>(getLeastSupertype<on_error>(element_types));
 }
@@ -174,28 +166,14 @@ DataTypePtr FieldToDataType<on_error>::operator() (const Map & map) const
     key_types.reserve(map.size());
     value_types.reserve(map.size());
 
-    bool k_has_signed_int = false;
-    bool k_uint64_convert_possible = true;
-    bool v_has_signed_int = false;
-    bool v_uint64_convert_possible = true;
     for (const auto & elem : map)
     {
         const auto & tuple = elem.safeGet<const Tuple &>();
         assert(tuple.size() == 2);
-        DataTypePtr k_type = applyVisitor(*this, tuple[0]);
-        key_types.push_back(k_type);
-        checkUInt64ToIn64Conversion(k_has_signed_int, k_uint64_convert_possible, k_type, tuple[0]);
-        DataTypePtr v_type = applyVisitor(*this, tuple[1]);
-        value_types.push_back(v_type);
-        checkUInt64ToIn64Conversion(v_has_signed_int, v_uint64_convert_possible, v_type, tuple[1]);
+        key_types.push_back(applyVisitor(*this, tuple[0]));
+        value_types.push_back(applyVisitor(*this, tuple[1]));
     }
 
-    if (k_has_signed_int && k_uint64_convert_possible)
-        convertUInt64ToInt64IfPossible(key_types);
-
-    if (v_has_signed_int && v_uint64_convert_possible)
-        convertUInt64ToInt64IfPossible(value_types);
-
     return std::make_shared<DataTypeMap>(
         getLeastSupertype<on_error>(key_types),
         getLeastSupertype<on_error>(value_types));
@@ -227,28 +205,6 @@ DataTypePtr FieldToDataType<on_error>::operator()(const bool &) const
     return DataTypeFactory::instance().get("Bool");
 }
 
-template <LeastSupertypeOnError on_error>
-void FieldToDataType<on_error>::checkUInt64ToIn64Conversion(bool & has_signed_int, bool & uint64_convert_possible, const DataTypePtr & type, const Field & elem) const
-{
-    if (uint64_convert_possible)
-    {
-        bool is_native_int = WhichDataType(type).isNativeInt();
-
-        if (is_native_int)
-            has_signed_int |= is_native_int;
-        else if (type->getTypeId() == TypeIndex::UInt64)
-            uint64_convert_possible &= (elem.template get<UInt64>() <= std::numeric_limits<Int64>::max());
-    }
-}
-
-template <LeastSupertypeOnError on_error>
-void FieldToDataType<on_error>::convertUInt64ToInt64IfPossible(DataTypes & data_types) const
-{
-    for (auto& type : data_types)
-        if (type->getTypeId() == TypeIndex::UInt64)
-            type = std::make_shared<DataTypeInt64>();
-}
-
 template class FieldToDataType<LeastSupertypeOnError::Throw>;
 template class FieldToDataType<LeastSupertypeOnError::String>;
 template class FieldToDataType<LeastSupertypeOnError::Null>;
diff --git a/src/DataTypes/FieldToDataType.h b/src/DataTypes/FieldToDataType.h
index d1a3f11e8de..8febadc1a0d 100644
--- a/src/DataTypes/FieldToDataType.h
+++ b/src/DataTypes/FieldToDataType.h
@@ -45,16 +45,6 @@ public:
     DataTypePtr operator() (const UInt256 & x) const;
     DataTypePtr operator() (const Int256 & x) const;
     DataTypePtr operator() (const bool & x) const;
-
-private:
-    // The conditions for converting UInt64 to Int64 are:
-    // 1. The existence of Int.
-    // 2. The existence of UInt64, and the UInt64 value must be <= Int64.max.
-    void checkUInt64ToIn64Conversion(bool& has_signed_int, bool& uint64_convert_possible, const DataTypePtr & type, const Field & elem) const;
-
-    // Convert the UInt64 type to Int64 in order to cover other signed_integer types
-    // and obtain the least super type of all ints.
-    void convertUInt64ToInt64IfPossible(DataTypes & data_types) const;
 };
 
 }
diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp
index 9d42d82ce91..e5bdb4b267f 100644
--- a/src/DataTypes/getLeastSupertype.cpp
+++ b/src/DataTypes/getLeastSupertype.cpp
@@ -198,6 +198,35 @@ DataTypePtr getNumericType(const TypeIndexSet & types)
     return {};
 }
 
+/// Check if we can convert UInt64 to Int64 to avoid error "There is no supertype for types UInt64, Int64"
+/// during inferring field types.
+/// Example:
+/// [-3236599669630092879, 5607475129431807682]
+/// First field is inferred as Int64, but second one as UInt64, although it also can be Int64.
+/// We don't support Int128 as supertype for Int64 and UInt64, because Int128 is inefficient.
+/// But in this case the result type can be inferred as Array(Int64).
+void convertUInt64toInt64IfPossible(const DataTypes & types, TypeIndexSet & types_set)
+{
+    /// Check if we have UInt64 and at least one Integer type.
+    if (!types_set.contains(TypeIndex::UInt64)
+        || (!types_set.contains(TypeIndex::Int8) && !types_set.contains(TypeIndex::Int16) && !types_set.contains(TypeIndex::Int32)
+            && !types_set.contains(TypeIndex::Int64)))
+        return;
+
+    bool all_uint64_can_be_int64 = true;
+    for (const auto & type : types)
+    {
+        if (const auto * uint64_type = typeid_cast<const DataTypeUInt64 *>(type.get()))
+            all_uint64_can_be_int64 &= uint64_type->canUnsignedBeSigned();
+    }
+
+    if (all_uint64_can_be_int64)
+    {
+        types_set.erase(TypeIndex::UInt64);
+        types_set.insert(TypeIndex::Int64);
+    }
+}
+
 }
 
 template <LeastSupertypeOnError on_error>
@@ -592,6 +621,8 @@ DataTypePtr getLeastSupertype(const DataTypes & types)
 
     /// For numeric types, the most complicated part.
     {
+        /// First, if we have signed integers, try to convert all UInt64 to Int64 if possible.
+        convertUInt64toInt64IfPossible(types, type_ids);
         auto numeric_type = getNumericType<on_error>(type_ids);
         if (numeric_type)
             return numeric_type;
diff --git a/tests/queries/0_stateless/02832_integer_type_inference.reference b/tests/queries/0_stateless/02832_integer_type_inference.reference
index 5a01bd4cd11..775fae2e0d2 100644
--- a/tests/queries/0_stateless/02832_integer_type_inference.reference
+++ b/tests/queries/0_stateless/02832_integer_type_inference.reference
@@ -1,2 +1,10 @@
 [-4741124612489978151,-3236599669630092879,5607475129431807682]
 [100,-100,5607475129431807682,5607475129431807683]
+[[-4741124612489978151],[-3236599669630092879,5607475129431807682]]
+[[-4741124612489978151,-3236599669630092879],[5607475129431807682]]
+[(-4741124612489978151,1),(-3236599669630092879,2),(560747512943180768,3)]
+[-4741124612489978151,1,-3236599669630092879,2,560747512943180768,3]
+{-4741124612489978151:1,-3236599669630092879:2,5607475129431807682:3}
+[{-4741124612489978151:1,-3236599669630092879:2,5607475129431807682:3},{-1:1}]
+{1:-4741124612489978151,2:-3236599669630092879,3:5607475129431807682}
+[{1:-4741124612489978151,2:-3236599669630092879,3:5607475129431807682},{-1:1}]
diff --git a/tests/queries/0_stateless/02832_integer_type_inference.sql b/tests/queries/0_stateless/02832_integer_type_inference.sql
index 221e929d705..85ad7f55869 100644
--- a/tests/queries/0_stateless/02832_integer_type_inference.sql
+++ b/tests/queries/0_stateless/02832_integer_type_inference.sql
@@ -1,2 +1,11 @@
 select [-4741124612489978151, -3236599669630092879, 5607475129431807682];
 select [100, -100, 5607475129431807682, 5607475129431807683];
+select [[-4741124612489978151], [-3236599669630092879, 5607475129431807682]];
+select [[-4741124612489978151, -3236599669630092879], [5607475129431807682]];
+select [tuple(-4741124612489978151, 1), tuple(-3236599669630092879, 2), tuple(560747512943180768, 3)];
+select array(-4741124612489978151, 1, -3236599669630092879, 2, 560747512943180768, 3);
+select map(-4741124612489978151, 1, -3236599669630092879, 2, 5607475129431807682, 3);
+select [map(-4741124612489978151, 1, -3236599669630092879, 2, 5607475129431807682, 3), map(-1, 1)];
+select map(1, -4741124612489978151, 2, -3236599669630092879, 3, 5607475129431807682);
+select [map(1, -4741124612489978151, 2, -3236599669630092879, 3, 5607475129431807682), map(-1, 1)];
+

From c6884d4cb90e2a38bdeb00d042703305932497cb Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 12 Sep 2023 20:19:02 +0200
Subject: [PATCH 050/243] Remove unused file

---
 src/DataTypes/DataTypeUInt64OrInt64.h | 13 -------------
 1 file changed, 13 deletions(-)
 delete mode 100644 src/DataTypes/DataTypeUInt64OrInt64.h

diff --git a/src/DataTypes/DataTypeUInt64OrInt64.h b/src/DataTypes/DataTypeUInt64OrInt64.h
deleted file mode 100644
index 8a11e42cde1..00000000000
--- a/src/DataTypes/DataTypeUInt64OrInt64.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#include <DataTypes/DataTypesNumber.h>
-
-namespace DB
-{
-
-class DataTypeUInt64OrInt64 : public DataTypeUInt64
-{
-
-};
-
-}

From e229fd54c9a2adab5905cba2a9f76048cdc2ccf6 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 12 Sep 2023 20:19:36 +0200
Subject: [PATCH 051/243] Return final

---
 src/DataTypes/DataTypesNumber.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/DataTypesNumber.h b/src/DataTypes/DataTypesNumber.h
index 92f01579459..63d98dbf0f8 100644
--- a/src/DataTypes/DataTypesNumber.h
+++ b/src/DataTypes/DataTypesNumber.h
@@ -15,7 +15,7 @@ namespace ErrorCodes
 }
 
 template <typename T>
-class DataTypeNumber : public DataTypeNumberBase<T>
+class DataTypeNumber final : public DataTypeNumberBase<T>
 {
 public:
     DataTypeNumber() = default;

From 68c6bd882663ed6070d0cbbb0f6c62b5ee0e7ddc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Fri, 15 Sep 2023 12:04:22 +0000
Subject: [PATCH 052/243] Make the failed message single line to fix result
 parsing

---
 src/Parsers/tests/gtest_Parser.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp
index 18e91c533e0..57588d1d5e5 100644
--- a/src/Parsers/tests/gtest_Parser.cpp
+++ b/src/Parsers/tests/gtest_Parser.cpp
@@ -39,7 +39,9 @@ std::ostream & operator<<(std::ostream & ostr, const std::shared_ptr<IParser> pa
 
 std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case)
 {
-    return ostr << "ParserTestCase input: " << test_case.input_text;
+    std::string input_text{test_case.input_text};
+    std::replace(input_text.begin(), input_text.end(),'\n', ' ');
+    return ostr << "ParserTestCase input: " << input_text;
 }
 
 class ParserTest : public ::testing::TestWithParam<std::tuple<std::shared_ptr<IParser>, ParserTestCase>>

From 358f22d18a57ef85481742c45cb67c6c4515ac87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Fri, 15 Sep 2023 12:31:30 +0000
Subject: [PATCH 053/243] Update PRQL unit tests to latest PRQL version

---
 src/Parsers/tests/gtest_Parser.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp
index 57588d1d5e5..a20d6b2f111 100644
--- a/src/Parsers/tests/gtest_Parser.cpp
+++ b/src/Parsers/tests/gtest_Parser.cpp
@@ -496,11 +496,11 @@ INSTANTIATE_TEST_SUITE_P(
         ::testing::Values(std::make_shared<ParserPRQLQuery>(kDummyMaxQuerySize, kDummyMaxParserDepth)),
         ::testing::ValuesIn(std::initializer_list<ParserTestCase>{
             {
-                "from albums\ngroup [author_id] (\n  aggregate [first_pushlied = min published]\n)\njoin a=author side:left [==author_id]\njoin p=purchases side:right [==author_id]\ngroup [a.id, p.purchase_id] (\n  aggregate [avg_sell = min first_pushlied]\n)",
-                "WITH table_1 AS\n    (\n        SELECT\n            MIN(published) AS _expr_0,\n            author_id\n        FROM albums\n        GROUP BY author_id\n    )\nSELECT\n    a.id,\n    p.purchase_id,\n    MIN(table_0._expr_0) AS avg_sell\nFROM table_1 AS table_0\nLEFT JOIN author AS a ON table_0.author_id = a.author_id\nRIGHT JOIN purchases AS p ON table_0.author_id = p.author_id\nGROUP BY\n    a.id,\n    p.purchase_id",
+                "from albums\ngroup {author_id} (\n  aggregate {first_published = min published}\n)\njoin a=author side:left (==author_id)\njoin p=purchases side:right (==author_id)\ngroup {a.id, p.purchase_id} (\n  aggregate {avg_sell = min first_published}\n)",
+                "WITH table_0 AS\n    (\n        SELECT\n            MIN(published) AS _expr_0,\n            author_id\n        FROM albums\n        GROUP BY author_id\n    )\nSELECT\n    a.id,\n    p.purchase_id,\n    MIN(table_0._expr_0) AS avg_sell\nFROM table_0\nLEFT JOIN author AS a ON table_0.author_id = a.author_id\nRIGHT JOIN purchases AS p ON table_0.author_id = p.author_id\nGROUP BY\n    a.id,\n    p.purchase_id",
             },
             {
-                "from matches\nfilter start_date > @2023-05-30                 # Some comment here\nderive [\n  some_derived_value_1 = a + (b ?? 0),          # And there\n  some_derived_value_2 = c + some_derived_value\n]\nfilter some_derived_value_2 > 0\ngroup [country, city] (\n  aggregate [\n    average some_derived_value_2,\n    aggr = max some_derived_value_2,\n  ]\n)\nderive place = f\"{city} in {country}\"\nderive country_code = s\"LEFT(country, 2)\"\nsort [aggr, -country]\ntake 1..20",
-                "WITH\n    table_3 AS\n    (\n        SELECT\n            country,\n            city,\n            c + some_derived_value AS _expr_1\n        FROM matches\n        WHERE start_date > toDate('2023-05-30')\n    ),\n    table_1 AS\n    (\n        SELECT\n            country,\n            city,\n            AVG(_expr_1) AS _expr_0,\n            MAX(_expr_1) AS aggr\n        FROM table_3 AS table_2\n        WHERE _expr_1 > 0\n        GROUP BY\n            country,\n            city\n    )\nSELECT\n    country,\n    city,\n    _expr_0,\n    aggr,\n    CONCAT(city, ' in ', country) AS place,\n    LEFT(country, 2) AS country_code\nFROM table_1 AS table_0\nORDER BY\n    aggr ASC,\n    country DESC\nLIMIT 20",
+                "from matches\nfilter start_date > @2023-05-30                 # Some comment here\nderive {\n  some_derived_value_1 = a + (b ?? 0),          # And there\n  some_derived_value_2 = c + some_derived_value\n}\nfilter some_derived_value_2 > 0\ngroup {country, city} (\n  aggregate {\n    average some_derived_value_2,\n    aggr = max some_derived_value_2\n  }\n)\nderive place = f\"{city} in {country}\"\nderive country_code = s\"LEFT(country, 2)\"\nsort {aggr, -country}\ntake 1..20",
+                "WITH\n    table_1 AS\n    (\n        SELECT\n            country,\n            city,\n            c + some_derived_value AS _expr_1\n        FROM matches\n        WHERE start_date > toDate('2023-05-30')\n    ),\n    table_0 AS\n    (\n        SELECT\n            country,\n            city,\n            AVG(_expr_1) AS _expr_0,\n            MAX(_expr_1) AS aggr\n        FROM table_1\n        WHERE _expr_1 > 0\n        GROUP BY\n            country,\n            city\n    )\nSELECT\n    country,\n    city,\n    _expr_0,\n    aggr,\n    CONCAT(city, ' in ', country) AS place,\n    LEFT(country, 2) AS country_code\nFROM table_0\nORDER BY\n    aggr ASC,\n    country DESC\nLIMIT 20",
             },
         })));

From 2e8ac2bdf9718e2f0b7e09af134cad7911b730db Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 15 Sep 2023 12:53:50 +0000
Subject: [PATCH 054/243] Fix function if

---
 src/DataTypes/DataTypesNumber.cpp                         | 6 ++++++
 src/DataTypes/DataTypesNumber.h                           | 2 ++
 src/Functions/if.cpp                                      | 8 ++++++++
 tests/queries/0_stateless/01065_if_not_finite.sql         | 2 +-
 .../0_stateless/02832_integer_type_inference.reference    | 1 +
 .../queries/0_stateless/02832_integer_type_inference.sql  | 2 +-
 6 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp
index 232a5101cbe..1c0c418411b 100644
--- a/src/DataTypes/DataTypesNumber.cpp
+++ b/src/DataTypes/DataTypesNumber.cpp
@@ -35,6 +35,12 @@ static DataTypePtr createNumericDataType(const ASTPtr & arguments)
     return std::make_shared<DataTypeNumber<T>>();
 }
 
+bool isUInt64ThatCanBeInt64(const DataTypePtr & type)
+{
+    const DataTypeUInt64 * uint64_type = typeid_cast<const DataTypeUInt64 *>(type.get());
+    return uint64_type && uint64_type->canUnsignedBeSigned();
+}
+
 
 void registerDataTypeNumbers(DataTypeFactory & factory)
 {
diff --git a/src/DataTypes/DataTypesNumber.h b/src/DataTypes/DataTypesNumber.h
index 63d98dbf0f8..0c1f88a7925 100644
--- a/src/DataTypes/DataTypesNumber.h
+++ b/src/DataTypes/DataTypesNumber.h
@@ -71,4 +71,6 @@ using DataTypeInt128 = DataTypeNumber<Int128>;
 using DataTypeUInt256 = DataTypeNumber<UInt256>;
 using DataTypeInt256 = DataTypeNumber<Int256>;
 
+bool isUInt64ThatCanBeInt64(const DataTypePtr & type);
+
 }
diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 65e2212e894..a955230f3d3 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -1105,6 +1105,14 @@ public:
         if (const auto * right_array = checkAndGetDataType<DataTypeArray>(arg_else.type.get()))
             right_id = right_array->getNestedType()->getTypeId();
 
+        /// Special case when one column is Integer and another is UInt64 that can be actually Int64.
+        /// The result type for this case is Int64 and we need to change UInt64 type to Int64
+        /// so the NumberTraits::ResultOfIf will return Int64 instead if Int128.
+        if (isNativeInteger(arg_then.type) && isUInt64ThatCanBeInt64(arg_else.type))
+            right_id = TypeIndex::Int64;
+        else if (isNativeInteger(arg_else.type) && isUInt64ThatCanBeInt64(arg_then.type))
+            left_id = TypeIndex::Int64;
+
         if (!(callOnBasicTypes<true, true, true, false>(left_id, right_id, call)
             || (res = executeTyped<UUID, UUID>(cond_col, arguments, result_type, input_rows_count))
             || (res = executeString(cond_col, arguments, result_type))
diff --git a/tests/queries/0_stateless/01065_if_not_finite.sql b/tests/queries/0_stateless/01065_if_not_finite.sql
index 495932692ea..c0f0721b2dc 100644
--- a/tests/queries/0_stateless/01065_if_not_finite.sql
+++ b/tests/queries/0_stateless/01065_if_not_finite.sql
@@ -6,6 +6,6 @@ SELECT ifNotFinite(nan, 2);
 SELECT ifNotFinite(-1 / 0, 2);
 SELECT ifNotFinite(log(0), NULL);
 SELECT ifNotFinite(sqrt(-1), -42);
-SELECT ifNotFinite(1234567890123456789, -1234567890123456789); -- { serverError 386 }
+SELECT ifNotFinite(12345678901234567890, -12345678901234567890); -- { serverError 386 }
 
 SELECT ifNotFinite(NULL, 1);
diff --git a/tests/queries/0_stateless/02832_integer_type_inference.reference b/tests/queries/0_stateless/02832_integer_type_inference.reference
index 775fae2e0d2..bc738afd538 100644
--- a/tests/queries/0_stateless/02832_integer_type_inference.reference
+++ b/tests/queries/0_stateless/02832_integer_type_inference.reference
@@ -8,3 +8,4 @@
 [{-4741124612489978151:1,-3236599669630092879:2,5607475129431807682:3},{-1:1}]
 {1:-4741124612489978151,2:-3236599669630092879,3:5607475129431807682}
 [{1:-4741124612489978151,2:-3236599669630092879,3:5607475129431807682},{-1:1}]
+-1234567890123456789
diff --git a/tests/queries/0_stateless/02832_integer_type_inference.sql b/tests/queries/0_stateless/02832_integer_type_inference.sql
index 85ad7f55869..c6e7c744f39 100644
--- a/tests/queries/0_stateless/02832_integer_type_inference.sql
+++ b/tests/queries/0_stateless/02832_integer_type_inference.sql
@@ -8,4 +8,4 @@ select map(-4741124612489978151, 1, -3236599669630092879, 2, 5607475129431807682
 select [map(-4741124612489978151, 1, -3236599669630092879, 2, 5607475129431807682, 3), map(-1, 1)];
 select map(1, -4741124612489978151, 2, -3236599669630092879, 3, 5607475129431807682);
 select [map(1, -4741124612489978151, 2, -3236599669630092879, 3, 5607475129431807682), map(-1, 1)];
-
+select if(materialize(1), -1234567890123456789, 1234567890123456789);

From 32cad222bf4875075d7a7e41c6095b8ae11109a3 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 12 Sep 2023 17:10:57 +0300
Subject: [PATCH 055/243] KeyCondition IN function support different types

---
 src/Storages/MergeTree/KeyCondition.cpp       | 48 ++++++++--
 src/Storages/MergeTree/RPNBuilder.cpp         | 58 ------------
 src/Storages/MergeTree/RPNBuilder.h           |  5 --
 ...ndex_in_function_different_types.reference | 88 +++++++++++++++++++
 ..._key_index_in_function_different_types.sql | 24 +++++
 5 files changed, 155 insertions(+), 68 deletions(-)
 create mode 100644 tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
 create mode 100644 tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql

diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 42731bac19b..bea0111fdf3 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -1253,10 +1253,21 @@ bool KeyCondition::tryPrepareSetIndex(
 
     const auto right_arg = func.getArgumentAt(1);
 
-    auto future_set = right_arg.tryGetPreparedSet(indexes_mapping, data_types);
+    auto future_set = right_arg.tryGetPreparedSet();
     if (!future_set)
         return false;
 
+    const auto & set_types = future_set->getTypes();
+    size_t set_types_size = set_types.size();
+    size_t indexes_mapping_size = indexes_mapping.size();
+
+    if (set_types_size != indexes_mapping_size)
+        return false;
+
+    for (auto & index_mapping : indexes_mapping)
+        if (index_mapping.tuple_index >= set_types_size)
+            return false;
+
     auto prepared_set = future_set->buildOrderedSetInplace(right_arg.getTreeContext().getQueryContext());
     if (!prepared_set)
         return false;
@@ -1265,11 +1276,38 @@ bool KeyCondition::tryPrepareSetIndex(
     if (!prepared_set->hasExplicitSetElements())
         return false;
 
-    prepared_set->checkColumnsNumber(left_args_count);
-    for (size_t i = 0; i < indexes_mapping.size(); ++i)
-        prepared_set->checkTypesEqual(indexes_mapping[i].tuple_index, data_types[i]);
+    /** Try to convert set columns to primary key columns.
+      * Example: SELECT id FROM test_table WHERE id IN (SELECT 1);
+      * In this example table `id` column has type UInt64, Set column has type UInt8. To use index
+      * we need to convert set column to primary key column.
+      */
+    const auto & set_elements = prepared_set->getSetElements();
+    size_t set_elements_size = set_elements.size();
+    assert(set_types_size == set_elements_size);
 
-    out.set_index = std::make_shared<MergeTreeSetIndex>(prepared_set->getSetElements(), std::move(indexes_mapping));
+    Columns set_columns;
+    set_columns.reserve(set_elements_size);
+
+    for (size_t i = 0; i < indexes_mapping_size; ++i)
+    {
+        size_t set_element_index = indexes_mapping[i].tuple_index;
+        const auto & set_element = set_elements[set_element_index];
+        const auto & set_element_type = set_types[set_element_index];
+
+        ColumnPtr set_column;
+        try
+        {
+            set_column = castColumnAccurate({set_element, set_element_type, {}}, data_types[i]);
+        }
+        catch (...)
+        {
+            return false;
+        }
+
+        set_columns.push_back(set_column);
+    }
+
+    out.set_index = std::make_shared<MergeTreeSetIndex>(set_columns, std::move(indexes_mapping));
     return true;
 }
 
diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp
index a0c96c13d59..29bcd8b87f1 100644
--- a/src/Storages/MergeTree/RPNBuilder.cpp
+++ b/src/Storages/MergeTree/RPNBuilder.cpp
@@ -351,64 +351,6 @@ FutureSetPtr RPNBuilderTreeNode::tryGetPreparedSet(const DataTypes & data_types)
     return nullptr;
 }
 
-FutureSetPtr RPNBuilderTreeNode::tryGetPreparedSet(
-    const std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
-    const DataTypes & data_types) const
-{
-    const auto & prepared_sets = getTreeContext().getPreparedSets();
-
-    /// We have `PreparedSetKey::forLiteral` but it is useless here as we don't have enough information
-    /// about types in left argument of the IN operator. Instead, we manually iterate through all the sets
-    /// and find the one for the right arg based on the AST structure (getTreeHash), after that we check
-    /// that the types it was prepared with are compatible with the types of the primary key.
-    auto types_match = [&indexes_mapping, &data_types](const DataTypes & set_types)
-    {
-        assert(indexes_mapping.size() == data_types.size());
-
-        for (size_t i = 0; i < indexes_mapping.size(); ++i)
-        {
-            if (indexes_mapping[i].tuple_index >= set_types.size())
-                return false;
-
-            auto lhs = removeNullable(recursiveRemoveLowCardinality(data_types[i]));
-            auto rhs = removeNullable(recursiveRemoveLowCardinality(set_types[indexes_mapping[i].tuple_index]));
-
-            if (!lhs->equals(*rhs))
-                return false;
-        }
-
-        return true;
-    };
-
-    if (prepared_sets && ast_node)
-    {
-        if (ast_node->as<ASTSubquery>() || ast_node->as<ASTTableIdentifier>())
-            return prepared_sets->findSubquery(ast_node->getTreeHash());
-
-        auto tree_hash = ast_node->getTreeHash();
-        const auto & sets = prepared_sets->getSetsFromTuple();
-        auto it = sets.find(tree_hash);
-        if (it == sets.end())
-            return nullptr;
-
-        for (const auto & future_set : it->second)
-            if (types_match(future_set->getTypes()))
-                return future_set;
-    }
-    else
-    {
-        const auto * node_without_alias = getNodeWithoutAlias(dag_node);
-        if (node_without_alias->column)
-        {
-            auto future_set = tryGetSetFromDAGNode(node_without_alias);
-            if (types_match(future_set->getTypes()))
-                return future_set;
-        }
-    }
-
-    return nullptr;
-}
-
 RPNBuilderFunctionTreeNode RPNBuilderTreeNode::toFunctionNode() const
 {
     if (!isFunction())
diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h
index 9eeb6deefd5..f14f241cac8 100644
--- a/src/Storages/MergeTree/RPNBuilder.h
+++ b/src/Storages/MergeTree/RPNBuilder.h
@@ -116,11 +116,6 @@ public:
     /// Try get prepared set from node that match data types
     FutureSetPtr tryGetPreparedSet(const DataTypes & data_types) const;
 
-    /// Try get prepared set from node that match indexes mapping and data types
-    FutureSetPtr tryGetPreparedSet(
-        const std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> & indexes_mapping,
-        const DataTypes & data_types) const;
-
     /** Convert node to function node.
       * Node must be function before calling these method, otherwise exception is thrown.
       */
diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
new file mode 100644
index 00000000000..f34aad737d4
--- /dev/null
+++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.reference
@@ -0,0 +1,88 @@
+CreatingSets (Create sets before main query execution)
+  Expression ((Projection + Before ORDER BY))
+    ReadFromMergeTree (default.test_table)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 1-element set))
+        Parts: 1/1
+        Granules: 1/1
+CreatingSets (Create sets before main query execution)
+  Expression ((Projection + Before ORDER BY))
+    ReadFromMergeTree (default.test_table)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 1-element set))
+        Parts: 1/1
+        Granules: 1/1
+CreatingSets (Create sets before main query execution)
+  Expression ((Projection + Before ORDER BY))
+    ReadFromMergeTree (default.test_table)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 5-element set))
+        Parts: 1/1
+        Granules: 1/1
+CreatingSets (Create sets before main query execution)
+  Expression ((Projection + Before ORDER BY))
+    ReadFromMergeTree (default.test_table)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 5-element set))
+        Parts: 1/1
+        Granules: 1/1
+CreatingSets (Create sets before main query execution)
+  Expression ((Project names + Projection))
+    ReadFromMergeTree (default.test_table)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 1-element set))
+        Parts: 1/1
+        Granules: 1/1
+CreatingSets (Create sets before main query execution)
+  Expression ((Project names + Projection))
+    ReadFromMergeTree (default.test_table)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 1-element set))
+        Parts: 1/1
+        Granules: 1/1
+CreatingSets (Create sets before main query execution)
+  Expression ((Project names + Projection))
+    ReadFromMergeTree (default.test_table)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 5-element set))
+        Parts: 1/1
+        Granules: 1/1
+CreatingSets (Create sets before main query execution)
+  Expression ((Project names + Projection))
+    ReadFromMergeTree (default.test_table)
+    Indexes:
+      PrimaryKey
+        Keys: 
+          id
+          value
+        Condition: and((id in (-Inf, 10]), (value in 5-element set))
+        Parts: 1/1
+        Granules: 1/1
diff --git a/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql
new file mode 100644
index 00000000000..648a8041b71
--- /dev/null
+++ b/tests/queries/0_stateless/02882_primary_key_index_in_function_different_types.sql
@@ -0,0 +1,24 @@
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table
+(
+    id UInt64,
+    value UInt64
+) ENGINE=MergeTree ORDER BY (id, value);
+
+INSERT INTO test_table SELECT number, number FROM numbers(10);
+
+SET allow_experimental_analyzer = 0;
+
+EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
+EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
+EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
+EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
+
+SET allow_experimental_analyzer = 1;
+
+EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
+EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
+EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
+EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
+
+DROP TABLE test_table;

From 60a63212c876995c7d7b458e767f87f7a6847c76 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 12 Sep 2023 21:26:53 +0300
Subject: [PATCH 056/243] Fixed tests

---
 src/Interpreters/PreparedSets.cpp | 11 +++++++----
 src/Interpreters/PreparedSets.h   |  8 ++++----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp
index e0551dff2ad..955d8892284 100644
--- a/src/Interpreters/PreparedSets.cpp
+++ b/src/Interpreters/PreparedSets.cpp
@@ -48,7 +48,7 @@ static bool equals(const DataTypes & lhs, const DataTypes & rhs)
 
 FutureSetFromStorage::FutureSetFromStorage(SetPtr set_) : set(std::move(set_)) {}
 SetPtr FutureSetFromStorage::get() const { return set; }
-const DataTypes & FutureSetFromStorage::getTypes() const { return set->getElementsTypes(); }
+DataTypes FutureSetFromStorage::getTypes() const { return set->getElementsTypes(); }
 
 SetPtr FutureSetFromStorage::buildOrderedSetInplace(const ContextPtr &)
 {
@@ -73,7 +73,7 @@ FutureSetFromTuple::FutureSetFromTuple(Block block, const Settings & settings)
     set->finishInsert();
 }
 
-const DataTypes & FutureSetFromTuple::getTypes() const { return set->getElementsTypes(); }
+DataTypes FutureSetFromTuple::getTypes() const { return set->getElementsTypes(); }
 
 SetPtr FutureSetFromTuple::buildOrderedSetInplace(const ContextPtr & context)
 {
@@ -138,7 +138,7 @@ void FutureSetFromSubquery::setQueryPlan(std::unique_ptr<QueryPlan> source_)
     set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName());
 }
 
-const DataTypes & FutureSetFromSubquery::getTypes() const
+DataTypes FutureSetFromSubquery::getTypes() const
 {
     return set_and_key->set->getElementsTypes();
 }
@@ -183,7 +183,10 @@ SetPtr FutureSetFromSubquery::buildOrderedSetInplace(const ContextPtr & context)
     {
         auto set = external_table_set->buildOrderedSetInplace(context);
         if (set)
-            return set_and_key->set = set;
+        {
+            set_and_key->set = set;
+            return set_and_key->set;
+        }
     }
 
     auto plan = build(context);
diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h
index b953b8470e1..e237789c63c 100644
--- a/src/Interpreters/PreparedSets.h
+++ b/src/Interpreters/PreparedSets.h
@@ -47,7 +47,7 @@ public:
     /// Returns set if set is ready (created and filled) or nullptr if not.
     virtual SetPtr get() const = 0;
     /// Returns set->getElementsTypes(), even if set is not created yet.
-    virtual const DataTypes & getTypes() const = 0;
+    virtual DataTypes getTypes() const = 0;
     /// If possible, return set with stored elements useful for PK analysis.
     virtual SetPtr buildOrderedSetInplace(const ContextPtr & context) = 0;
 };
@@ -62,7 +62,7 @@ public:
     FutureSetFromStorage(SetPtr set_);
 
     SetPtr get() const override;
-    const DataTypes & getTypes() const override;
+    DataTypes getTypes() const override;
     SetPtr buildOrderedSetInplace(const ContextPtr &) override;
 
 private:
@@ -79,7 +79,7 @@ public:
     SetPtr get() const override { return set; }
     SetPtr buildOrderedSetInplace(const ContextPtr & context) override;
 
-    const DataTypes & getTypes() const override;
+    DataTypes getTypes() const override;
 
 private:
     SetPtr set;
@@ -105,7 +105,7 @@ public:
         const Settings & settings);
 
     SetPtr get() const override;
-    const DataTypes & getTypes() const override;
+    DataTypes getTypes() const override;
     SetPtr buildOrderedSetInplace(const ContextPtr & context) override;
 
     std::unique_ptr<QueryPlan> build(const ContextPtr & context);

From 7a7697059c6b5fb8fb0925429f2d3cf0cc31f8c9 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 13 Sep 2023 17:44:38 +0300
Subject: [PATCH 057/243] Fixed tests

---
 src/Storages/MergeTree/KeyCondition.cpp | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index bea0111fdf3..53b7e170ea5 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -1261,9 +1261,6 @@ bool KeyCondition::tryPrepareSetIndex(
     size_t set_types_size = set_types.size();
     size_t indexes_mapping_size = indexes_mapping.size();
 
-    if (set_types_size != indexes_mapping_size)
-        return false;
-
     for (auto & index_mapping : indexes_mapping)
         if (index_mapping.tuple_index >= set_types_size)
             return false;
@@ -1281,33 +1278,26 @@ bool KeyCondition::tryPrepareSetIndex(
       * In this example table `id` column has type UInt64, Set column has type UInt8. To use index
       * we need to convert set column to primary key column.
       */
-    const auto & set_elements = prepared_set->getSetElements();
-    size_t set_elements_size = set_elements.size();
-    assert(set_types_size == set_elements_size);
-
-    Columns set_columns;
-    set_columns.reserve(set_elements_size);
+    auto set_elements = prepared_set->getSetElements();
+    assert(set_types_size == set_elements.size());
 
     for (size_t i = 0; i < indexes_mapping_size; ++i)
     {
         size_t set_element_index = indexes_mapping[i].tuple_index;
-        const auto & set_element = set_elements[set_element_index];
         const auto & set_element_type = set_types[set_element_index];
+        auto & set_element = set_elements[set_element_index];
 
-        ColumnPtr set_column;
         try
         {
-            set_column = castColumnAccurate({set_element, set_element_type, {}}, data_types[i]);
+            set_element = castColumnAccurate({set_element, set_element_type, {}}, data_types[i]);
         }
         catch (...)
         {
             return false;
         }
-
-        set_columns.push_back(set_column);
     }
 
-    out.set_index = std::make_shared<MergeTreeSetIndex>(set_columns, std::move(indexes_mapping));
+    out.set_index = std::make_shared<MergeTreeSetIndex>(set_elements, std::move(indexes_mapping));
     return true;
 }
 

From 29e4352c17d49e317914fae7e7bd01cceed62d1b Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 14 Sep 2023 19:54:39 +0300
Subject: [PATCH 058/243] Updated implementation

---
 src/Storages/MergeTree/KeyCondition.cpp | 46 +++++++++++++++++++------
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 53b7e170ea5..47521b9887b 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -1278,26 +1278,52 @@ bool KeyCondition::tryPrepareSetIndex(
       * In this example table `id` column has type UInt64, Set column has type UInt8. To use index
       * we need to convert set column to primary key column.
       */
-    auto set_elements = prepared_set->getSetElements();
-    assert(set_types_size == set_elements.size());
+    auto set_columns = prepared_set->getSetElements();
+    assert(set_types_size == set_columns.size());
 
-    for (size_t i = 0; i < indexes_mapping_size; ++i)
+    for (size_t indexes_mapping_index = 0; indexes_mapping_index < indexes_mapping_size; ++indexes_mapping_index)
     {
-        size_t set_element_index = indexes_mapping[i].tuple_index;
+        size_t set_element_index = indexes_mapping[indexes_mapping_index].tuple_index;
         const auto & set_element_type = set_types[set_element_index];
-        auto & set_element = set_elements[set_element_index];
+        auto & set_column = set_columns[set_element_index];
 
-        try
+        bool is_set_column_nullable = set_element_type->isNullable();
+        bool is_set_column_low_cardinality_nullable = set_element_type->isLowCardinalityNullable();
+
+        const NullMap * set_column_null_map = nullptr;
+
+        if (is_set_column_nullable || is_set_column_low_cardinality_nullable)
         {
-            set_element = castColumnAccurate({set_element, set_element_type, {}}, data_types[i]);
+            if (is_set_column_low_cardinality_nullable)
+                set_column = set_column->convertToFullColumnIfLowCardinality();
+
+            set_column_null_map = &assert_cast<const ColumnNullable &>(*set_column).getNullMapData();
         }
-        catch (...)
+
+        auto nullable_set_column = castColumnAccurateOrNull({set_column, set_element_type, {}}, data_types[indexes_mapping_index]);
+        const auto & nullable_set_column_typed = assert_cast<const ColumnNullable &>(*nullable_set_column);
+        const auto & nullable_set_column_null_map = nullable_set_column_typed.getNullMapData();
+        size_t nullable_set_column_null_map_size = nullable_set_column_null_map.size();
+
+        IColumn::Filter filter(nullable_set_column_null_map_size);
+
+        if (set_column_null_map)
         {
-            return false;
+            for (size_t i = 0; i < nullable_set_column_null_map_size; ++i)
+                filter[i] = (*set_column_null_map)[i] || !nullable_set_column_null_map[i];
+
+            set_column = nullable_set_column_typed.filter(filter, 0);
+        }
+        else
+        {
+            for (size_t i = 0; i < nullable_set_column_null_map_size; ++i)
+                filter[i] = !nullable_set_column_null_map[i];
+
+            set_column = nullable_set_column_typed.getNestedColumn().filter(filter, 0);
         }
     }
 
-    out.set_index = std::make_shared<MergeTreeSetIndex>(set_elements, std::move(indexes_mapping));
+    out.set_index = std::make_shared<MergeTreeSetIndex>(set_columns, std::move(indexes_mapping));
     return true;
 }
 

From 8ef910e1fa0c4d8d6053b111efab5b4aecaaffb2 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 15 Sep 2023 22:34:09 +0200
Subject: [PATCH 059/243] Updated to check if cleanup is enabled before adding
 to cleanedup_rows_count

---
 .../Algorithms/ReplacingSortedAlgorithm.cpp   |  4 ++--
 ...lacing_merge_tree_vertical_merge.reference |  3 +++
 ...77_replacing_merge_tree_vertical_merge.sql | 21 ++++++++++++++++++-
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
index 4d2443b1e46..18f144bee3d 100644
--- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
@@ -83,7 +83,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge()
                     uint8_t value = assert_cast<const ColumnUInt8 &>(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num];
                     if (!cleanup || !value)
                         insertRow();
-                    else if (cleanedup_rows_count != nullptr)
+                    else if (cleanup && cleanedup_rows_count != nullptr)
                         *cleanedup_rows_count += current_row_sources.size();
                 }
                 else
@@ -141,7 +141,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge()
             uint8_t value = assert_cast<const ColumnUInt8 &>(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num];
             if (!cleanup || !value)
                 insertRow();
-            else if (cleanedup_rows_count != nullptr)
+            else if (cleanup && cleanedup_rows_count != nullptr)
                 *cleanedup_rows_count += current_row_sources.size();
         }
         else
diff --git a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference
index e0f8c3bae3f..6bac6173183 100644
--- a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference
+++ b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference
@@ -2,3 +2,6 @@
 2018-01-01	1	1
 2018-01-01	2	2
 2018-01-01	2	2
+== (Replicas) Test optimize ==
+d2	1	0
+d4	1	0
diff --git a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql
index 8c51a6f34da..931297fdd3b 100644
--- a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql
+++ b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql
@@ -1,10 +1,29 @@
 set optimize_on_insert = 0;
 
 drop table if exists tab_00577;
-create table tab_00577 (date Date, version UInt64, val UInt64) engine = ReplacingMergeTree(version) partition by date order by date settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0;
+create table tab_00577 (date Date, version UInt64, val UInt64) engine = ReplacingMergeTree(version) partition by date order by date settings enable_vertical_merge_algorithm = 1,
+    vertical_merge_algorithm_min_rows_to_activate = 0, vertical_merge_algorithm_min_columns_to_activate = 0, min_rows_for_wide_part = 0,
+    min_bytes_for_wide_part = 0;
 insert into tab_00577 values ('2018-01-01', 2, 2), ('2018-01-01', 1, 1);
 insert into tab_00577 values ('2018-01-01', 0, 0);
 select * from tab_00577 order by version;
 OPTIMIZE TABLE tab_00577;
 select * from tab_00577;
 drop table tab_00577;
+
+
+DROP TABLE IF EXISTS testCleanupR1;
+CREATE TABLE testCleanupR1 (uid String, version UInt32, is_deleted UInt8)
+    ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version, is_deleted)
+    ORDER BY uid SETTINGS enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0;
+INSERT INTO testCleanupR1 (*) VALUES ('d1', 1, 0),('d2', 1, 0),('d3', 1, 0),('d4', 1, 0);
+INSERT INTO testCleanupR1 (*) VALUES ('d3', 2, 1);
+INSERT INTO testCleanupR1 (*) VALUES ('d1', 2, 1);
+SYSTEM SYNC REPLICA testCleanupR1; -- Avoid "Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet"
+
+OPTIMIZE TABLE testCleanupR1 FINAL CLEANUP;
+
+-- Only d3 to d5 remain
+SELECT '== (Replicas) Test optimize ==';
+SELECT * FROM testCleanupR1 order by uid;
+DROP TABLE IF EXISTS testCleanupR1
\ No newline at end of file

From 9f009cccd5d01be29ff8e8ab6063297ec2a73b46 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 17 Sep 2023 15:22:51 +0000
Subject: [PATCH 060/243] Incorporate review feedback

---
 src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp | 18 ++++++++++--------
 .../MergeTree/MergeTreeIndexUSearch.cpp        | 18 ++++++++++--------
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
index f00f11359e1..d15d89ad6f9 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
@@ -160,21 +160,23 @@ void MergeTreeIndexAggregatorAnnoy<Distance>::update(const Block & block, size_t
         const auto & column_array_offsets = column_array->getOffsets();
         const size_t num_rows = column_array_offsets.size();
 
-        /// The index dimension is inferred from the inserted arrays (array cardinality). If no value was specified in the INSERT statement
-        /// for the annoy-indexed column (i.e. default value), we have a problem. Reject such values.
-        if (column_array_offsets.empty() || column_array_offsets[0] == 0)
-            /// (The if condition is a bit weird but I have seen either with default values)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Tried to insert {} rows into Annoy index but there were no values to insert. Likely, the INSERT used default values - these are not supported for Annoy.", rows_read);
+        /// The Annoy algorithm naturally assumes that the indexed vectors have dimension >= 0. This condition is violated if empty arrays
+        /// are INSERTed into an Annoy-indexed column or if no value was specified at all in which case the arrays take on their default
+        /// value which is also an empty array.
+        if (column_array->isDefaultAt(0))
+            throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name);
 
         /// Check all sizes are the same
         size_t dimension = column_array_offsets[0];
         for (size_t i = 0; i < num_rows - 1; ++i)
             if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension)
-                throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name);
+                throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
 
-        /// Also check that previously inserted blocks have the same size as this block
+        /// Also check that previously inserted blocks have the same size as this block.
+        /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across
+        /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42.
         if (index && index->getDimensions() != dimension)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name);
+            throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
 
         if (!index)
             index = std::make_shared<AnnoyIndexWithSerialization<Distance>>(dimension);
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index 9531b9188bf..a00cab6ca59 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -179,21 +179,23 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
         const auto & column_array_offsets = column_array->getOffsets();
         const size_t num_rows = column_array_offsets.size();
 
-        /// The index dimension is inferred from the inserted arrays (array cardinality). If no value was specified in the INSERT statement
-        /// for the usearch-indexed column (i.e. default value), we have a problem. Reject such values.
-        if (column_array_offsets.empty() || column_array_offsets[0] == 0)
-            /// (The if condition is a bit weird but I have seen either with default values)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Tried to insert {} rows into usearch index but there were no values to insert. Likely, the INSERT used default values - these are not supported for Annoy.", rows_read);
+        /// The Usearch algorithm naturally assumes that the indexed vectors have dimension >= 0. This condition is violated if empty arrays
+        /// are INSERTed into an Usearch-indexed column or if no value was specified at all in which case the arrays take on their default
+        /// value which is also an empty array.
+        if (column_array->isDefaultAt(0))
+            throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name);
 
         /// Check all sizes are the same
         size_t dimension = column_array_offsets[0];
         for (size_t i = 0; i < num_rows - 1; ++i)
             if (column_array_offsets[i + 1] - column_array_offsets[i] != dimension)
-                throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name);
+                throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
 
-        /// Also check that previously inserted blocks have the same size as this block
+        /// Also check that previously inserted blocks have the same size as this block.
+        /// Note that this guarantees consistency of dimension only within parts. We are unable to detect inconsistent dimensions across
+        /// parts - for this, a little help from the user is needed, e.g. CONSTRAINT cnstr CHECK length(array) = 42.
         if (index && index->getDimensions() != dimension)
-            throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column {} must have equal length", index_column_name);
+            throw Exception(ErrorCodes::INCORRECT_DATA, "All arrays in column '{}' must have equal length", index_column_name);
 
         if (!index)
             index = std::make_shared<USearchIndexWithSerialization<Metric>>(dimension, scalar_kind);

From de4f22e20aa1c4d044a525474f5ed2d03e6c59a5 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 17 Sep 2023 15:26:36 +0000
Subject: [PATCH 061/243] Typo

---
 src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp   | 4 ++--
 src/Storages/MergeTree/MergeTreeIndexUSearch.cpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
index d15d89ad6f9..3eec8614dcd 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
@@ -160,9 +160,9 @@ void MergeTreeIndexAggregatorAnnoy<Distance>::update(const Block & block, size_t
         const auto & column_array_offsets = column_array->getOffsets();
         const size_t num_rows = column_array_offsets.size();
 
-        /// The Annoy algorithm naturally assumes that the indexed vectors have dimension >= 0. This condition is violated if empty arrays
+        /// The Annoy algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays
         /// are INSERTed into an Annoy-indexed column or if no value was specified at all in which case the arrays take on their default
-        /// value which is also an empty array.
+        /// value which is also empty.
         if (column_array->isDefaultAt(0))
             throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name);
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index a00cab6ca59..009c004faea 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -179,9 +179,9 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
         const auto & column_array_offsets = column_array->getOffsets();
         const size_t num_rows = column_array_offsets.size();
 
-        /// The Usearch algorithm naturally assumes that the indexed vectors have dimension >= 0. This condition is violated if empty arrays
+        /// The Usearch algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays
         /// are INSERTed into an Usearch-indexed column or if no value was specified at all in which case the arrays take on their default
-        /// value which is also an empty array.
+        /// values which is also empty.
         if (column_array->isDefaultAt(0))
             throw Exception(ErrorCodes::INCORRECT_DATA, "The arrays in column '{}' must not be empty. Did you try to INSERT default values?", index_column_name);
 

From 46fa7dbb8080aa9a2189a1fe0adbe3110f89c11f Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 5 Sep 2023 12:59:32 +0000
Subject: [PATCH 062/243] add libFuzzer build on 'libFuzzer' label, build
 produces artifacts

---
 .github/workflows/pull_request.yml |  45 +++++++++++
 CMakeLists.txt                     |  22 ++----
 cmake/utils.cmake                  | 120 +++++++++++++++++++++++++++++
 docker/packager/binary/build.sh    |   8 +-
 4 files changed, 173 insertions(+), 22 deletions(-)
 create mode 100644 cmake/utils.cmake

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index ce135846dd5..e6a4d1bf92e 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -5059,6 +5059,51 @@ jobs:
           docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
           sudo rm -fr "$TEMP_PATH"
 #############################################################################################
+#################################### libFuzzer build ########################################
+#############################################################################################
+  BuilderFuzzers:
+    if: contains(github.event.pull_request.labels.*.name, 'libFuzzer')
+    needs: [DockerHubPush, FastTest, StyleCheck]
+    runs-on: [self-hosted, builder]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/build_check
+          IMAGES_PATH=${{runner.temp}}/images_path
+          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
+          CACHES_PATH=${{runner.temp}}/../ccaches
+          BUILD_NAME=fuzzers
+          EOF
+      - name: Download changed images
+        uses: actions/download-artifact@v3
+        with:
+          name: changed_images
+          path: ${{ env.IMAGES_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+          submodules: true
+      - name: Build
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
+      - name: Upload build URLs to artifacts
+        if: ${{ success() || failure() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ env.BUILD_URLS }}
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
+#############################################################################################
 ###################################### JEPSEN TESTS #########################################
 #############################################################################################
   Jepsen:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 65ff9dc5384..781a9efe64a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,6 +19,7 @@ include (cmake/tools.cmake)
 include (cmake/ccache.cmake)
 include (cmake/clang_tidy.cmake)
 include (cmake/git.cmake)
+include (cmake/utils.cmake)
 
 # Ignore export() since we don't use it,
 # but it gets broken with a global targets via link_libraries()
@@ -562,22 +563,6 @@ add_subdirectory (programs)
 add_subdirectory (tests)
 add_subdirectory (utils)
 
-# Function get_all_targets collects all targets recursively
-function(get_all_targets var)
-    macro(get_all_targets_recursive targets dir)
-        get_property(subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES)
-        foreach(subdir ${subdirectories})
-            get_all_targets_recursive(${targets} ${subdir})
-        endforeach()
-        get_property(current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS)
-        list(APPEND ${targets} ${current_targets})
-    endmacro()
-
-    set(targets)
-    get_all_targets_recursive(targets ${CMAKE_CURRENT_SOURCE_DIR})
-    set(${var} ${targets} PARENT_SCOPE)
-endfunction()
-
 if (FUZZER)
     # Bundle fuzzers target
     add_custom_target(fuzzers)
@@ -592,11 +577,14 @@ if (FUZZER)
             # clickhouse fuzzer isn't working correctly
             # initial PR https://github.com/ClickHouse/ClickHouse/pull/27526
             #if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse")
-            if (target MATCHES ".+_fuzzer")
+            if (target_type STREQUAL "EXECUTABLE" AND target MATCHES ".+_fuzzer")
                 message(STATUS "${target} instrumented with fuzzer")
                 target_link_libraries(${target} PUBLIC ch_contrib::fuzzer)
                 # Add to fuzzers bundle
                 add_dependencies(fuzzers ${target})
+                get_target_filename(${target} target_bin_name)
+                get_target_property(target_bin_dir ${target} BINARY_DIR)
+                add_custom_command(TARGET fuzzers POST_BUILD COMMAND mv "${target_bin_dir}/${target_bin_name}" "${CMAKE_CURRENT_BINARY_DIR}/programs/" VERBATIM)
             endif()
         endif()
     endforeach()
diff --git a/cmake/utils.cmake b/cmake/utils.cmake
new file mode 100644
index 00000000000..a318408098a
--- /dev/null
+++ b/cmake/utils.cmake
@@ -0,0 +1,120 @@
+# Useful stuff
+
+# Function get_all_targets collects all targets recursively
+function(get_all_targets outvar)
+    macro(get_all_targets_recursive targets dir)
+        get_property(subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES)
+        foreach(subdir ${subdirectories})
+            get_all_targets_recursive(${targets} ${subdir})
+        endforeach()
+        get_property(current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS)
+        list(APPEND ${targets} ${current_targets})
+    endmacro()
+
+    set(targets)
+    get_all_targets_recursive(targets ${CMAKE_CURRENT_SOURCE_DIR})
+    set(${outvar} ${targets} PARENT_SCOPE)
+endfunction()
+
+
+# Function get_target_filename calculates target's output file name
+function(get_target_filename target outvar)
+    get_target_property(prop_type "${target}" TYPE)
+    get_target_property(prop_is_framework "${target}" FRAMEWORK)
+    get_target_property(prop_outname "${target}" OUTPUT_NAME)
+    get_target_property(prop_archive_outname "${target}" ARCHIVE_OUTPUT_NAME)
+    get_target_property(prop_library_outname "${target}" LIBRARY_OUTPUT_NAME)
+    get_target_property(prop_runtime_outname "${target}" RUNTIME_OUTPUT_NAME)
+    # message("prop_archive_outname: ${prop_archive_outname}")    
+    # message("prop_library_outname: ${prop_library_outname}")
+    # message("prop_runtime_outname: ${prop_runtime_outname}")
+    if(DEFINED CMAKE_BUILD_TYPE)
+        get_target_property(prop_cfg_outname "${target}" "${OUTPUT_NAME}_${CMAKE_BUILD_TYPE}")
+        get_target_property(prop_archive_cfg_outname "${target}" "${ARCHIVE_OUTPUT_NAME}_${CMAKE_BUILD_TYPE}")
+        get_target_property(prop_library_cfg_outname "${target}" "${LIBRARY_OUTPUT_NAME}_${CMAKE_BUILD_TYPE}")
+        get_target_property(prop_runtime_cfg_outname "${target}" "${RUNTIME_OUTPUT_NAME}_${CMAKE_BUILD_TYPE}")
+        # message("prop_archive_cfg_outname: ${prop_archive_cfg_outname}")
+        # message("prop_library_cfg_outname: ${prop_library_cfg_outname}")
+        # message("prop_runtime_cfg_outname: ${prop_runtime_cfg_outname}")
+        if(NOT ("${prop_cfg_outname}" STREQUAL "prop_cfg_outname-NOTFOUND"))
+            set(prop_outname "${prop_cfg_outname}")
+        endif()
+        if(NOT ("${prop_archive_cfg_outname}" STREQUAL "prop_archive_cfg_outname-NOTFOUND"))
+            set(prop_archive_outname "${prop_archive_cfg_outname}")
+        endif()
+        if(NOT ("${prop_library_cfg_outname}" STREQUAL "prop_library_cfg_outname-NOTFOUND"))
+            set(prop_library_outname "${prop_library_cfg_outname}")
+        endif()
+        if(NOT ("${prop_runtime_cfg_outname}" STREQUAL "prop_runtime_cfg_outname-NOTFOUND"))
+            set(prop_runtime_outname "${prop_runtime_cfg_outname}")
+        endif()
+    endif()
+    set(outname "${target}")
+    if(NOT ("${prop_outname}" STREQUAL "prop_outname-NOTFOUND"))
+        set(outname "${prop_outname}")
+    endif()
+    if("${prop_is_framework}")
+        set(filename "${outname}")
+    elseif(prop_type STREQUAL "STATIC_LIBRARY")
+        if(NOT ("${prop_archive_outname}" STREQUAL "prop_archive_outname-NOTFOUND"))
+            set(outname "${prop_archive_outname}")
+        endif()
+        set(filename "${CMAKE_STATIC_LIBRARY_PREFIX}${outname}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+    elseif(prop_type STREQUAL "MODULE_LIBRARY")
+        if(NOT ("${prop_library_outname}" STREQUAL "prop_library_outname-NOTFOUND"))
+            set(outname "${prop_library_outname}")
+        endif()
+        set(filename "${CMAKE_SHARED_MODULE_LIBRARY_PREFIX}${outname}${CMAKE_SHARED_MODULE_LIBRARY_SUFFIX}")
+    elseif(prop_type STREQUAL "SHARED_LIBRARY")
+        if(WIN32)
+            if(NOT ("${prop_runtime_outname}" STREQUAL "prop_runtime_outname-NOTFOUND"))
+                set(outname "${prop_runtime_outname}")
+            endif()
+        else()
+            if(NOT ("${prop_library_outname}" STREQUAL "prop_library_outname-NOTFOUND"))
+                set(outname "${prop_library_outname}")
+            endif()
+        endif()
+        set(filename "${CMAKE_SHARED_LIBRARY_PREFIX}${outname}${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    elseif(prop_type STREQUAL "EXECUTABLE")
+        if(NOT ("${prop_runtime_outname}" STREQUAL "prop_runtime_outname-NOTFOUND"))
+            set(outname "${prop_runtime_outname}")
+        endif()
+        set(filename "${CMAKE_EXECUTABLE_PREFIX}${outname}${CMAKE_EXECUTABLE_SUFFIX}")
+    else()
+        message(FATAL_ERROR "target \"${target}\" is not of type STATIC_LIBRARY, MODULE_LIBRARY, SHARED_LIBRARY, or EXECUTABLE.")
+    endif()
+    set("${outvar}" "${filename}" PARENT_SCOPE)
+endfunction()
+
+
+# Function get_cmake_properties returns list of all propreties that cmake supports
+function(get_cmake_properties outvar)
+    execute_process(COMMAND cmake --help-property-list OUTPUT_VARIABLE cmake_properties)
+    # Convert command output into a CMake list
+    string(REGEX REPLACE ";" "\\\\;" cmake_properties "${cmake_properties}")
+    string(REGEX REPLACE "\n" ";" cmake_properties "${cmake_properties}")
+    list(REMOVE_DUPLICATES cmake_properties)
+    set("${outvar}" "${cmake_properties}" PARENT_SCOPE)
+endfunction()
+
+# Function get_target_property_list returns list of all propreties set for target
+function(get_target_property_list target outvar)
+    get_cmake_properties(cmake_property_list)
+    foreach(property ${cmake_property_list})
+        string(REPLACE "<CONFIG>" "${CMAKE_BUILD_TYPE}" property ${property})
+
+        # https://stackoverflow.com/questions/32197663/how-can-i-remove-the-the-location-property-may-not-be-read-from-target-error-i
+        if(property STREQUAL "LOCATION" OR property MATCHES "^LOCATION_" OR property MATCHES "_LOCATION$")
+            continue()
+        endif()
+
+        get_property(was_set TARGET ${target} PROPERTY ${property} SET)
+        if(was_set)
+            get_target_property(value ${target} ${property})
+            string(REGEX REPLACE ";" "\\\\\\\\;" value "${value}")
+            list(APPEND outvar "${property} = ${value}")
+        endif()
+    endforeach()
+    set(${outvar} ${${outvar}} PARENT_SCOPE)
+endfunction()
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 39d299e1794..75a18528e65 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -97,11 +97,9 @@ if [ -n "$MAKE_DEB" ]; then
   bash -x /build/packages/build
 fi
 
-if [ "$BUILD_TARGET" != "fuzzers" ]; then
-  mv ./programs/clickhouse* /output
-  [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
-  mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
-fi
+mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
+[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
+mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
 
 prepare_combined_output () {
     local OUTPUT

From b7a17bf8dda0b94db456d2883e507d503f400594 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 10 Sep 2023 17:07:49 +0000
Subject: [PATCH 063/243] add libFuzzer tests, initial integration

---
 .github/workflows/pull_request.yml     |  37 ++
 docker/test/libfuzzer/Dockerfile       |  42 +++
 docker/test/libfuzzer/parse_options.py |  61 ++++
 docker/test/libfuzzer/run_libfuzzer.sh | 115 +++++++
 tests/ci/build_download_helper.py      |   6 +
 tests/ci/ci_config.py                  |   1 +
 tests/ci/libfuzzer_test_check.py       | 458 +++++++++++++++++++++++++
 7 files changed, 720 insertions(+)
 create mode 100644 docker/test/libfuzzer/Dockerfile
 create mode 100644 docker/test/libfuzzer/parse_options.py
 create mode 100644 docker/test/libfuzzer/run_libfuzzer.sh
 create mode 100644 tests/ci/libfuzzer_test_check.py

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index e6a4d1bf92e..7e56254bac0 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -5103,6 +5103,43 @@ jobs:
           docker ps --quiet | xargs --no-run-if-empty docker kill ||:
           docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
           sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
+##############################################################################################
+################################ libFuzzer TESTS #############################################
+##############################################################################################
+  libFuzzerTest:
+    needs: [BuilderFuzzers]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/libfuzzer
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=libFuzzer tests
+          REPO_COPY=${{runner.temp}}/libfuzzer/ClickHouse
+          KILL_TIMEOUT=10800
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: libFuzzer test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 libfuzzer_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
 #############################################################################################
 ###################################### JEPSEN TESTS #########################################
 #############################################################################################
diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile
new file mode 100644
index 00000000000..77815431314
--- /dev/null
+++ b/docker/test/libfuzzer/Dockerfile
@@ -0,0 +1,42 @@
+ARG FROM_TAG=latest
+FROM clickhouse/test-base:$FROM_TAG
+
+# ARG for quick switch to a given ubuntu mirror
+ARG apt_archive="http://archive.ubuntu.com"
+RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
+
+ENV LANG=C.UTF-8
+ENV TZ=Europe/Amsterdam
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
+            ca-certificates \
+            libc6-dbg \
+            moreutils \
+            ncdu \
+            p7zip-full \
+            parallel \
+            psmisc \
+            python3 \
+            python3-pip \
+            rsync \
+            tree \
+            tzdata \
+            vim \
+            wget \
+    && apt-get autoremove --yes \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip3 install Jinja2
+
+COPY * /
+
+SHELL ["/bin/bash", "-c"]
+CMD set -o pipefail \
+    && cd /workspace \
+    && timeout -s 9 1h /run_libfuzzer.sh 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log
+
+# docker run --network=host --volume <workspace>:/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/libfuzzer
+
diff --git a/docker/test/libfuzzer/parse_options.py b/docker/test/libfuzzer/parse_options.py
new file mode 100644
index 00000000000..5695e80a714
--- /dev/null
+++ b/docker/test/libfuzzer/parse_options.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+"""Helper script for parsing custom fuzzing options."""
+import configparser
+import sys
+
+
+def parse_options(options_file_path, options_section):
+    """Parses the given file and returns options from the given section."""
+    parser = configparser.ConfigParser()
+    parser.read(options_file_path)
+
+    if not parser.has_section(options_section):
+        return None
+
+    options = parser[options_section]
+
+    if options_section == "libfuzzer":
+        options_string = " ".join(
+            "-%s=%s" % (key, value) for key, value in options.items()
+        )
+    else:
+        # Sanitizer options.
+        options_string = ":".join(
+            "%s=%s" % (key, value) for key, value in options.items()
+        )
+
+    return options_string
+
+
+def main():
+    """Processes the arguments and prints the options in the correct format."""
+    if len(sys.argv) < 3:
+        sys.stderr.write(
+            "Usage: %s <path_to_options_file> <options_section>\n" % sys.argv[0]
+        )
+        return 1
+
+    options = parse_options(sys.argv[1], sys.argv[2])
+    if options is not None:
+        print(options)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/docker/test/libfuzzer/run_libfuzzer.sh b/docker/test/libfuzzer/run_libfuzzer.sh
new file mode 100644
index 00000000000..49a59dafb90
--- /dev/null
+++ b/docker/test/libfuzzer/run_libfuzzer.sh
@@ -0,0 +1,115 @@
+#!/bin/bash -eu
+# Copyright 2016 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+# Fuzzer runner. Appends .options arguments and seed corpus to users args.
+# Usage: $0 <fuzzer_name> <fuzzer_args>
+
+export PATH=$OUT:$PATH
+cd $OUT
+
+DEBUGGER=${DEBUGGER:-}
+
+FUZZER=$1
+shift
+
+# This env var is set by CIFuzz. CIFuzz fills this directory with the corpus
+# from ClusterFuzz.
+CORPUS_DIR=${CORPUS_DIR:-}
+if [ -z "$CORPUS_DIR" ]
+then
+  CORPUS_DIR="/tmp/${FUZZER}_corpus"
+  rm -rf $CORPUS_DIR && mkdir -p $CORPUS_DIR
+fi
+
+SANITIZER=${SANITIZER:-}
+if [ -z $SANITIZER ]; then
+  # If $SANITIZER is not specified (e.g. calling from `reproduce` command), it
+  # is not important and can be set to any value.
+  SANITIZER="default"
+fi
+
+if [[ "$RUN_FUZZER_MODE" = interactive ]]; then
+  FUZZER_OUT="$OUT/${FUZZER}_${FUZZING_ENGINE}_${SANITIZER}_out"
+else
+  FUZZER_OUT="/tmp/${FUZZER}_${FUZZING_ENGINE}_${SANITIZER}_out"
+fi
+
+
+rm -rf $FUZZER_OUT && mkdir -p $FUZZER_OUT
+
+SEED_CORPUS="${FUZZER}_seed_corpus.zip"
+
+# TODO: Investigate why this code block is skipped
+# by all default fuzzers in bad_build_check.
+# They all set SKIP_SEED_CORPUS=1.
+if [ -f $SEED_CORPUS ] && [ -z ${SKIP_SEED_CORPUS:-} ]; then
+  echo "Using seed corpus: $SEED_CORPUS"
+  unzip -o -d ${CORPUS_DIR}/ $SEED_CORPUS > /dev/null
+fi
+
+OPTIONS_FILE="${FUZZER}.options"
+CUSTOM_LIBFUZZER_OPTIONS=""
+
+if [ -f $OPTIONS_FILE ]; then
+  custom_asan_options=$(parse_options.py $OPTIONS_FILE asan)
+  if [ ! -z $custom_asan_options ]; then
+    export ASAN_OPTIONS="$ASAN_OPTIONS:$custom_asan_options"
+  fi
+
+  custom_msan_options=$(parse_options.py $OPTIONS_FILE msan)
+  if [ ! -z $custom_msan_options ]; then
+    export MSAN_OPTIONS="$MSAN_OPTIONS:$custom_msan_options"
+  fi
+
+  custom_ubsan_options=$(parse_options.py $OPTIONS_FILE ubsan)
+  if [ ! -z $custom_ubsan_options ]; then
+    export UBSAN_OPTIONS="$UBSAN_OPTIONS:$custom_ubsan_options"
+  fi
+
+  CUSTOM_LIBFUZZER_OPTIONS=$(parse_options.py $OPTIONS_FILE libfuzzer)
+fi
+
+
+
+CMD_LINE="$OUT/$FUZZER $FUZZER_ARGS $*"
+
+if [ -z ${SKIP_SEED_CORPUS:-} ]; then
+CMD_LINE="$CMD_LINE $CORPUS_DIR"
+fi
+
+if [[ ! -z ${CUSTOM_LIBFUZZER_OPTIONS} ]]; then
+CMD_LINE="$CMD_LINE $CUSTOM_LIBFUZZER_OPTIONS"
+fi
+
+if [[ ! "$CMD_LINE" =~ "-dict=" ]]; then
+if [ -f "$FUZZER.dict" ]; then
+    CMD_LINE="$CMD_LINE -dict=$FUZZER.dict"
+fi
+fi
+
+CMD_LINE="$CMD_LINE < /dev/null"
+
+echo $CMD_LINE
+
+# Unset OUT so the fuzz target can't rely on it.
+unset OUT
+
+if [ ! -z "$DEBUGGER" ]; then
+  CMD_LINE="$DEBUGGER $CMD_LINE"
+fi
+
+bash -c "$CMD_LINE"
diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py
index a6fda749494..02e22e88a96 100644
--- a/tests/ci/build_download_helper.py
+++ b/tests/ci/build_download_helper.py
@@ -210,3 +210,9 @@ def download_performance_build(check_name, reports_path, result_path):
         result_path,
         lambda x: x.endswith("performance.tar.zst"),
     )
+
+
+def download_fuzzers(check_name, reports_path, result_path):
+    download_builds_filter(
+        check_name, reports_path, result_path, lambda x: x.endswith("_fuzzer")
+    )
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index b9ccc23cb2e..198395eca27 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -282,6 +282,7 @@ CI_CONFIG = CiConfig(
         "SQLancer (debug)": TestConfig("package_debug"),
         "Sqllogic test (release)": TestConfig("package_release"),
         "SQLTest": TestConfig("package_release"),
+        "libFuzzer tests": TestConfig("fuzzers"),
     },
 )
 CI_CONFIG.validate()
diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py
new file mode 100644
index 00000000000..148b6e6d1e4
--- /dev/null
+++ b/tests/ci/libfuzzer_test_check.py
@@ -0,0 +1,458 @@
+#!/usr/bin/env python3
+
+import argparse
+import csv
+import logging
+import os
+import re
+import subprocess
+import sys
+import atexit
+from pathlib import Path
+from typing import List, Tuple
+
+from github import Github
+
+# from build_download_helper import download_all_deb_packages
+from build_download_helper import download_fuzzers
+from clickhouse_helper import (
+    CiLogsCredentials,
+    # ClickHouseHelper,
+    # prepare_tests_results_for_clickhouse,
+)
+from commit_status_helper import (
+    # NotSet,
+    RerunHelper,
+    get_commit,
+    # override_status,
+    # post_commit_status,
+    # post_commit_status_to_file,
+    update_mergeable_check,
+)
+from docker_pull_helper import DockerImage  # , get_image_with_version
+
+# from download_release_packages import download_last_release
+from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
+from get_robot_token import get_best_robot_token
+from pr_info import PRInfo  # , FORCE_TESTS_LABEL
+from report import TestResults, read_test_results
+
+# from s3_helper import S3Helper
+from stopwatch import Stopwatch
+
+# from tee_popen import TeePopen
+# from upload_result_helper import upload_results
+
+NO_CHANGES_MSG = "Nothing to run"
+
+
+def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
+    result = []
+    if "DatabaseReplicated" in check_name:
+        result.append("USE_DATABASE_REPLICATED=1")
+    if "DatabaseOrdinary" in check_name:
+        result.append("USE_DATABASE_ORDINARY=1")
+    if "wide parts enabled" in check_name:
+        result.append("USE_POLYMORPHIC_PARTS=1")
+    if "ParallelReplicas" in check_name:
+        result.append("USE_PARALLEL_REPLICAS=1")
+    if "s3 storage" in check_name:
+        result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1")
+    if "analyzer" in check_name:
+        result.append("USE_NEW_ANALYZER=1")
+
+    if run_by_hash_total != 0:
+        result.append(f"RUN_BY_HASH_NUM={run_by_hash_num}")
+        result.append(f"RUN_BY_HASH_TOTAL={run_by_hash_total}")
+
+    return result
+
+
+# def get_image_name(check_name):
+#     if "stateless" in check_name.lower():
+#         return "clickhouse/stateless-test"
+#     if "stateful" in check_name.lower():
+#         return "clickhouse/stateful-test"
+#     else:
+#         raise Exception(f"Cannot deduce image name based on check name {check_name}")
+
+
+def get_run_command(
+    #    check_name: str,
+    fuzzers_path: str,
+    repo_path: str,
+    result_path: str,
+    #    server_log_path: str,
+    kill_timeout: int,
+    additional_envs: List[str],
+    ci_logs_args: str,
+    image: DockerImage,
+    #    flaky_check: bool,
+    #    tests_to_run: List[str],
+) -> str:
+    additional_options = ["--hung-check"]
+    additional_options.append("--print-time")
+
+    # if tests_to_run:
+    #     additional_options += tests_to_run
+
+    additional_options_str = (
+        '-e ADDITIONAL_OPTIONS="' + " ".join(additional_options) + '"'
+    )
+
+    envs = [
+        f"-e MAX_RUN_TIME={int(0.9 * kill_timeout)}",
+        # a static link, don't use S3_URL or S3_DOWNLOAD
+        '-e S3_URL="https://s3.amazonaws.com/clickhouse-datasets"',
+    ]
+
+    # if flaky_check:
+    #     envs.append("-e NUM_TRIES=100")
+    #     envs.append("-e MAX_RUN_TIME=1800")
+
+    envs += [f"-e {e}" for e in additional_envs]
+
+    env_str = " ".join(envs)
+    # volume_with_broken_test = (
+    #     f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt "
+    #     if "analyzer" in check_name
+    #     else ""
+    # )
+
+    return (
+        f"docker run --volume={fuzzers_path}:/fuzzers "
+        f"{ci_logs_args}"
+        f"--volume={repo_path}/tests:/usr/share/clickhouse-test "
+        #        f"{volume_with_broken_test}"
+        f"--volume={result_path}:/test_output "
+        #        f"--volume={server_log_path}:/var/log/clickhouse-server "
+        f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}"
+    )
+
+
+def get_tests_to_run(pr_info: PRInfo) -> List[str]:
+    result = set()
+
+    if pr_info.changed_files is None:
+        return []
+
+    for fpath in pr_info.changed_files:
+        if re.match(r"tests/queries/0_stateless/[0-9]{5}", fpath):
+            logging.info("File '%s' is changed and seems like a test", fpath)
+            fname = fpath.split("/")[3]
+            fname_without_ext = os.path.splitext(fname)[0]
+            # add '.' to the end of the test name not to run all tests with the same prefix
+            # e.g. we changed '00001_some_name.reference'
+            # and we have ['00001_some_name.sh', '00001_some_name_2.sql']
+            # so we want to run only '00001_some_name.sh'
+            result.add(fname_without_ext + ".")
+        elif "tests/queries/" in fpath:
+            # log suspicious changes from tests/ for debugging in case of any problems
+            logging.info("File '%s' is changed, but it doesn't look like a test", fpath)
+    return list(result)
+
+
+def process_results(
+    result_folder: str,
+    server_log_path: str,
+) -> Tuple[str, str, TestResults, List[str]]:
+    test_results = []  # type: TestResults
+    additional_files = []
+    # Just upload all files from result_folder.
+    # If task provides processed results, then it's responsible for content of result_folder.
+    if os.path.exists(result_folder):
+        test_files = [
+            f
+            for f in os.listdir(result_folder)
+            if os.path.isfile(os.path.join(result_folder, f))
+        ]
+        additional_files = [os.path.join(result_folder, f) for f in test_files]
+
+    if os.path.exists(server_log_path):
+        server_log_files = [
+            f
+            for f in os.listdir(server_log_path)
+            if os.path.isfile(os.path.join(server_log_path, f))
+        ]
+        additional_files = additional_files + [
+            os.path.join(server_log_path, f) for f in server_log_files
+        ]
+
+    status = []
+    status_path = os.path.join(result_folder, "check_status.tsv")
+    if os.path.exists(status_path):
+        logging.info("Found test_results.tsv")
+        with open(status_path, "r", encoding="utf-8") as status_file:
+            status = list(csv.reader(status_file, delimiter="\t"))
+
+    if len(status) != 1 or len(status[0]) != 2:
+        logging.info("Files in result folder %s", os.listdir(result_folder))
+        return "error", "Invalid check_status.tsv", test_results, additional_files
+    state, description = status[0][0], status[0][1]
+
+    try:
+        results_path = Path(result_folder) / "test_results.tsv"
+
+        if results_path.exists():
+            logging.info("Found test_results.tsv")
+        else:
+            logging.info("Files in result folder %s", os.listdir(result_folder))
+            return "error", "Not found test_results.tsv", test_results, additional_files
+
+        test_results = read_test_results(results_path)
+        if len(test_results) == 0:
+            return "error", "Empty test_results.tsv", test_results, additional_files
+    except Exception as e:
+        return (
+            "error",
+            f"Cannot parse test_results.tsv ({e})",
+            test_results,
+            additional_files,
+        )
+
+    return state, description, test_results, additional_files
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("check_name")
+    parser.add_argument("kill_timeout", type=int)
+    parser.add_argument(
+        "--validate-bugfix",
+        action="store_true",
+        help="Check that added tests failed on latest stable",
+    )
+    parser.add_argument(
+        "--post-commit-status",
+        default="commit_status",
+        choices=["commit_status", "file"],
+        help="Where to public post commit status",
+    )
+    return parser.parse_args()
+
+
+def docker_build_image(image_name: str, filepath: Path) -> DockerImage:
+    # context = filepath.parent
+    docker_image = DockerImage(image_name)
+    build_cmd = f"docker build --network=host -t {image_name} {filepath}"
+    logging.info("Will build image with cmd: '%s'", build_cmd)
+    subprocess.check_call(
+        build_cmd,
+        shell=True,
+    )
+    return docker_image
+
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+
+    stopwatch = Stopwatch()
+
+    temp_path = TEMP_PATH
+    repo_path = REPO_COPY
+    reports_path = REPORTS_PATH
+    # post_commit_path = os.path.join(temp_path, "functional_commit_status.tsv")
+
+    args = parse_args()
+    check_name = args.check_name
+    kill_timeout = args.kill_timeout
+    validate_bugfix_check = args.validate_bugfix
+
+    # flaky_check = "flaky" in check_name.lower()
+
+    # run_changed_tests = flaky_check or validate_bugfix_check
+    run_changed_tests = validate_bugfix_check
+    gh = Github(get_best_robot_token(), per_page=100)
+
+    # For validate_bugfix_check we need up to date information about labels, so pr_event_from_api is used
+    pr_info = PRInfo(
+        need_changed_files=run_changed_tests, pr_event_from_api=validate_bugfix_check
+    )
+
+    commit = get_commit(gh, pr_info.sha)
+    atexit.register(update_mergeable_check, gh, pr_info, check_name)
+
+    if not os.path.exists(temp_path):
+        os.makedirs(temp_path)
+
+    # if validate_bugfix_check and "pr-bugfix" not in pr_info.labels:
+    #     if args.post_commit_status == "file":
+    #         post_commit_status_to_file(
+    #             post_commit_path,
+    #             f"Skipped (no pr-bugfix in {pr_info.labels})",
+    #             "success",
+    #             "null",
+    #         )
+    #     logging.info("Skipping '%s' (no pr-bugfix in %s)", check_name, pr_info.labels)
+    #     sys.exit(0)
+
+    if "RUN_BY_HASH_NUM" in os.environ:
+        run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0"))
+        run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0"))
+        check_name_with_group = (
+            check_name + f" [{run_by_hash_num + 1}/{run_by_hash_total}]"
+        )
+    else:
+        run_by_hash_num = 0
+        run_by_hash_total = 0
+        check_name_with_group = check_name
+
+    rerun_helper = RerunHelper(commit, check_name_with_group)
+    if rerun_helper.is_already_finished_by_status():
+        logging.info("Check is already finished according to github status, exiting")
+        sys.exit(0)
+
+    # tests_to_run = []
+    # if run_changed_tests:
+    #     tests_to_run = get_tests_to_run(pr_info)
+    #     if not tests_to_run:
+    #         state = override_status("success", check_name, validate_bugfix_check)
+    #         if args.post_commit_status == "commit_status":
+    #             post_commit_status(
+    #                 commit,
+    #                 state,
+    #                 NotSet,
+    #                 NO_CHANGES_MSG,
+    #                 check_name_with_group,
+    #                 pr_info,
+    #             )
+    #         elif args.post_commit_status == "file":
+    #             post_commit_status_to_file(
+    #                 post_commit_path,
+    #                 description=NO_CHANGES_MSG,
+    #                 state=state,
+    #                 report_url="null",
+    #             )
+    #         sys.exit(0)
+
+    image_name = "clickhouse/libfuzzer-test"  # get_image_name(check_name)
+    docker_image = docker_build_image(
+        image_name, Path("../../docker/test/libfuzzer/")
+    )  # get_image_with_version(reports_path, image_name)
+
+    fuzzers_tmp_path = os.path.join(temp_path, "fuzzers_tmp")
+    if not os.path.exists(fuzzers_tmp_path):
+        os.makedirs(fuzzers_tmp_path)
+
+    # if validate_bugfix_check:
+    #     download_last_release(packages_path)
+    # else:
+    #     download_all_deb_packages(check_name, reports_path, packages_path)
+    download_fuzzers(check_name, reports_path, fuzzers_tmp_path)
+
+    fuzzers_path = os.path.join(temp_path, "fuzzers")
+    for fuzzer in os.listdir(fuzzers_tmp_path):
+        fuzzer_path = os.path.join(fuzzers_path, fuzzer)
+        os.makedirs(fuzzer_path)
+        os.rename(
+            os.path.join(fuzzers_tmp_path, fuzzer), os.path.join(fuzzer_path, fuzzer)
+        )
+
+    os.rmdir(fuzzers_tmp_path)
+
+    # server_log_path = os.path.join(temp_path, "server_log")
+    # if not os.path.exists(server_log_path):
+    #     os.makedirs(server_log_path)
+
+    result_path = os.path.join(temp_path, "result_path")
+    if not os.path.exists(result_path):
+        os.makedirs(result_path)
+
+    # run_log_path = os.path.join(result_path, "run.log")
+
+    additional_envs = get_additional_envs(
+        check_name, run_by_hash_num, run_by_hash_total
+    )
+    # if validate_bugfix_check:
+    #     additional_envs.append("GLOBAL_TAGS=no-random-settings")
+
+    ci_logs_credentials = CiLogsCredentials(Path(temp_path) / "export-logs-config.sh")
+    ci_logs_args = ci_logs_credentials.get_docker_arguments(
+        pr_info, stopwatch.start_time_str, check_name
+    )
+
+    run_command = get_run_command(
+        #        check_name,
+        fuzzers_path,
+        repo_path,
+        result_path,
+        #        server_log_path,
+        kill_timeout,
+        additional_envs,
+        ci_logs_args,
+        docker_image,
+        #        flaky_check,
+        #        tests_to_run,
+    )
+    logging.info("Going to run func tests: %s", run_command)
+
+    sys.exit(0)
+
+    # with TeePopen(run_command, run_log_path) as process:
+    #     retcode = process.wait()
+    #     if retcode == 0:
+    #         logging.info("Run successfully")
+    #     else:
+    #         logging.info("Run failed")
+
+    # try:
+    #     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
+    # except subprocess.CalledProcessError:
+    #     logging.warning("Failed to change files owner in %s, ignoring it", temp_path)
+
+    # ci_logs_credentials.clean_ci_logs_from_credentials(Path(run_log_path))
+    # s3_helper = S3Helper()
+
+    # state, description, test_results, additional_logs = process_results(
+    #     result_path, server_log_path
+    # )
+    # state = override_status(state, check_name, invert=validate_bugfix_check)
+
+    # ch_helper = ClickHouseHelper()
+
+    # report_url = upload_results(
+    #     s3_helper,
+    #     pr_info.number,
+    #     pr_info.sha,
+    #     test_results,
+    #     [run_log_path] + additional_logs,
+    #     check_name_with_group,
+    # )
+
+    # print(f"::notice:: {check_name} Report url: {report_url}")
+    # if args.post_commit_status == "commit_status":
+    #     post_commit_status(
+    #         commit, state, report_url, description, check_name_with_group, pr_info
+    #     )
+    # elif args.post_commit_status == "file":
+    #     post_commit_status_to_file(
+    #         post_commit_path,
+    #         description,
+    #         state,
+    #         report_url,
+    #     )
+    # else:
+    #     raise Exception(
+    #         f'Unknown post_commit_status option "{args.post_commit_status}"'
+    #     )
+
+    # prepared_events = prepare_tests_results_for_clickhouse(
+    #     pr_info,
+    #     test_results,
+    #     state,
+    #     stopwatch.duration_seconds,
+    #     stopwatch.start_time_str,
+    #     report_url,
+    #     check_name_with_group,
+    # )
+    # ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
+
+    # if state != "success":
+    #     if FORCE_TESTS_LABEL in pr_info.labels:
+    #         print(f"'{FORCE_TESTS_LABEL}' enabled, will report success")
+    #     else:
+    #         sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

From 0847889db6e56f50d70e167915a69d4118b778b8 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 11 Sep 2023 19:06:00 +0000
Subject: [PATCH 064/243] libFuzzer infrastructure

---
 CMakeLists.txt                   |  1 +
 docker/packager/binary/build.sh  |  1 +
 tests/ci/libfuzzer_test_check.py | 18 ++++--------------
 tests/fuzz/build.sh              | 28 ++++++++++++++++++++++++++++
 4 files changed, 34 insertions(+), 14 deletions(-)
 create mode 100755 tests/fuzz/build.sh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 781a9efe64a..b4e13e8ab5b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -588,6 +588,7 @@ if (FUZZER)
             endif()
         endif()
     endforeach()
+    add_custom_command(TARGET fuzzers POST_BUILD COMMAND SRC=${CMAKE_SOURCE_DIR} BIN=${CMAKE_BINARY_DIR} OUT=${CMAKE_BINARY_DIR}/programs ${CMAKE_SOURCE_DIR}/tests/fuzz/build.sh VERBATIM)
 endif()
 
 include (cmake/sanitize_targets.cmake)
diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh
index 75a18528e65..11efffd592c 100755
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@@ -100,6 +100,7 @@ fi
 mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
 [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
 mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds
+mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure
 
 prepare_combined_output () {
     local OUTPUT
diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py
index 148b6e6d1e4..75af6ddf5d9 100644
--- a/tests/ci/libfuzzer_test_check.py
+++ b/tests/ci/libfuzzer_test_check.py
@@ -330,25 +330,15 @@ def main():
         image_name, Path("../../docker/test/libfuzzer/")
     )  # get_image_with_version(reports_path, image_name)
 
-    fuzzers_tmp_path = os.path.join(temp_path, "fuzzers_tmp")
-    if not os.path.exists(fuzzers_tmp_path):
-        os.makedirs(fuzzers_tmp_path)
+    fuzzers_path = os.path.join(temp_path, "fuzzers")
+    if not os.path.exists(fuzzers_path):
+        os.makedirs(fuzzers_path)
 
     # if validate_bugfix_check:
     #     download_last_release(packages_path)
     # else:
     #     download_all_deb_packages(check_name, reports_path, packages_path)
-    download_fuzzers(check_name, reports_path, fuzzers_tmp_path)
-
-    fuzzers_path = os.path.join(temp_path, "fuzzers")
-    for fuzzer in os.listdir(fuzzers_tmp_path):
-        fuzzer_path = os.path.join(fuzzers_path, fuzzer)
-        os.makedirs(fuzzer_path)
-        os.rename(
-            os.path.join(fuzzers_tmp_path, fuzzer), os.path.join(fuzzer_path, fuzzer)
-        )
-
-    os.rmdir(fuzzers_tmp_path)
+    download_fuzzers(check_name, reports_path, fuzzers_path)
 
     # server_log_path = os.path.join(temp_path, "server_log")
     # if not os.path.exists(server_log_path):
diff --git a/tests/fuzz/build.sh b/tests/fuzz/build.sh
new file mode 100755
index 00000000000..12f41f6e079
--- /dev/null
+++ b/tests/fuzz/build.sh
@@ -0,0 +1,28 @@
+#!/bin/bash -eu
+
+# copy fuzzer options and dictionaries
+cp $SRC/tests/fuzz/*.dict $OUT/
+cp $SRC/tests/fuzz/*.options $OUT/
+
+# prepare corpus dirs
+mkdir -p $BIN/tests/fuzz/lexer_fuzzer.in/
+mkdir -p $BIN/tests/fuzz/select_parser_fuzzer.in/
+mkdir -p $BIN/tests/fuzz/create_parser_fuzzer.in/
+mkdir -p $BIN/tests/fuzz/execute_query_fuzzer.in/
+
+# prepare corpus
+cp $SRC/tests/queries/0_stateless/*.sql $BIN/tests/fuzz/lexer_fuzzer.in/
+cp $SRC/tests/queries/0_stateless/*.sql $BIN/tests/fuzz/select_parser_fuzzer.in/
+cp $SRC/tests/queries/0_stateless/*.sql $BIN/tests/fuzz/create_parser_fuzzer.in/
+cp $SRC/tests/queries/0_stateless/*.sql $BIN/tests/fuzz/execute_query_fuzzer.in/
+cp $SRC/tests/queries/1_stateful/*.sql $BIN/tests/fuzz/lexer_fuzzer.in/
+cp $SRC/tests/queries/1_stateful/*.sql $BIN/tests/fuzz/select_parser_fuzzer.in/
+cp $SRC/tests/queries/1_stateful/*.sql $BIN/tests/fuzz/create_parser_fuzzer.in/
+cp $SRC/tests/queries/1_stateful/*.sql $BIN/tests/fuzz/execute_query_fuzzer.in/
+
+# build corpus archives
+cd $BIN/tests/fuzz
+for dir in *_fuzzer.in; do
+    fuzzer=$(basename $dir .in)
+    zip -rj "$OUT/${fuzzer}_seed_corpus.zip" "${dir}/"
+done

From eb3a7caa74748a5c9f97f3d740cefb16f9a4bae2 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 11 Sep 2023 21:10:03 +0000
Subject: [PATCH 065/243] add zip to build docker

---
 docker/packager/binary/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile
index 940daad9c61..12818335807 100644
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@@ -74,6 +74,7 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \
         python3-boto3 \
         yasm \
         zstd \
+        zip \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists
 

From 44546458f09b913eb511eb1c332f06d0fad48a46 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 11 Sep 2023 22:45:50 +0000
Subject: [PATCH 066/243] add infrastructure files to the download filter

---
 tests/ci/build_download_helper.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py
index 02e22e88a96..e27d10cbe5b 100644
--- a/tests/ci/build_download_helper.py
+++ b/tests/ci/build_download_helper.py
@@ -214,5 +214,8 @@ def download_performance_build(check_name, reports_path, result_path):
 
 def download_fuzzers(check_name, reports_path, result_path):
     download_builds_filter(
-        check_name, reports_path, result_path, lambda x: x.endswith("_fuzzer")
+        check_name,
+        reports_path,
+        result_path,
+        lambda x: x.endswith(("_fuzzer", ".dict", ".options", "_seed_corpus.zip")),
     )

From 1ad0a77c9f30146289bc640c1ba79de6db0d745e Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 12 Sep 2023 16:30:52 +0000
Subject: [PATCH 067/243] unzip corpora

---
 tests/ci/libfuzzer_test_check.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py
index 75af6ddf5d9..02f5c184b54 100644
--- a/tests/ci/libfuzzer_test_check.py
+++ b/tests/ci/libfuzzer_test_check.py
@@ -8,6 +8,7 @@ import re
 import subprocess
 import sys
 import atexit
+import zipfile
 from pathlib import Path
 from typing import List, Tuple
 
@@ -334,12 +335,15 @@ def main():
     if not os.path.exists(fuzzers_path):
         os.makedirs(fuzzers_path)
 
-    # if validate_bugfix_check:
-    #     download_last_release(packages_path)
-    # else:
-    #     download_all_deb_packages(check_name, reports_path, packages_path)
     download_fuzzers(check_name, reports_path, fuzzers_path)
 
+    for file in os.listdir(fuzzers_path):
+        if file.endswith("_seed_corpus.zip"):
+            corpus_path = os.path.join(
+                temp_path, file.removesuffix("_seed_corpus.zip") + ".in"
+            )
+            zipfile.ZipFile(os.path.join(temp_path, file), "r").extractall(corpus_path)
+
     # server_log_path = os.path.join(temp_path, "server_log")
     # if not os.path.exists(server_log_path):
     #     os.makedirs(server_log_path)
@@ -374,7 +378,7 @@ def main():
         #        flaky_check,
         #        tests_to_run,
     )
-    logging.info("Going to run func tests: %s", run_command)
+    logging.info("Going to run libFuzzer tests: %s", run_command)
 
     sys.exit(0)
 

From d80ae880606d2f40dae4dd9eb085a3016311a137 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 12 Sep 2023 20:56:42 +0000
Subject: [PATCH 068/243] run docker

---
 docker/test/libfuzzer/Dockerfile       |   3 +-
 docker/test/libfuzzer/parse_options.py |   0
 docker/test/libfuzzer/run_libfuzzer.sh | 142 +++++++++----------------
 tests/ci/libfuzzer_test_check.py       |  37 ++++---
 4 files changed, 77 insertions(+), 105 deletions(-)
 mode change 100644 => 100755 docker/test/libfuzzer/parse_options.py
 mode change 100644 => 100755 docker/test/libfuzzer/run_libfuzzer.sh

diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile
index 77815431314..65cd8e4831f 100644
--- a/docker/test/libfuzzer/Dockerfile
+++ b/docker/test/libfuzzer/Dockerfile
@@ -33,9 +33,10 @@ RUN pip3 install Jinja2
 
 COPY * /
 
+ENV FUZZER_ARGS="-max_total_time=60"
+
 SHELL ["/bin/bash", "-c"]
 CMD set -o pipefail \
-    && cd /workspace \
     && timeout -s 9 1h /run_libfuzzer.sh 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log
 
 # docker run --network=host --volume <workspace>:/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/libfuzzer
diff --git a/docker/test/libfuzzer/parse_options.py b/docker/test/libfuzzer/parse_options.py
old mode 100644
new mode 100755
diff --git a/docker/test/libfuzzer/run_libfuzzer.sh b/docker/test/libfuzzer/run_libfuzzer.sh
old mode 100644
new mode 100755
index 49a59dafb90..b60e942f02a
--- a/docker/test/libfuzzer/run_libfuzzer.sh
+++ b/docker/test/libfuzzer/run_libfuzzer.sh
@@ -1,115 +1,77 @@
 #!/bin/bash -eu
-# Copyright 2016 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-################################################################################
 
 # Fuzzer runner. Appends .options arguments and seed corpus to users args.
 # Usage: $0 <fuzzer_name> <fuzzer_args>
 
-export PATH=$OUT:$PATH
-cd $OUT
+# export PATH=$OUT:$PATH
+# cd $OUT
 
 DEBUGGER=${DEBUGGER:-}
+FUZZER_ARGS=${FUZZER_ARGS:-}
 
-FUZZER=$1
-shift
+function run_fuzzer() {
+    FUZZER=$1
 
-# This env var is set by CIFuzz. CIFuzz fills this directory with the corpus
-# from ClusterFuzz.
-CORPUS_DIR=${CORPUS_DIR:-}
-if [ -z "$CORPUS_DIR" ]
-then
-  CORPUS_DIR="/tmp/${FUZZER}_corpus"
-  rm -rf $CORPUS_DIR && mkdir -p $CORPUS_DIR
-fi
+    echo Running fuzzer "$FUZZER"
 
-SANITIZER=${SANITIZER:-}
-if [ -z $SANITIZER ]; then
-  # If $SANITIZER is not specified (e.g. calling from `reproduce` command), it
-  # is not important and can be set to any value.
-  SANITIZER="default"
-fi
+    CORPUS_DIR=""
+    if [ -d "${FUZZER}.in" ]; then
+        CORPUS_DIR="${FUZZER}.in"
+    fi
 
-if [[ "$RUN_FUZZER_MODE" = interactive ]]; then
-  FUZZER_OUT="$OUT/${FUZZER}_${FUZZING_ENGINE}_${SANITIZER}_out"
-else
-  FUZZER_OUT="/tmp/${FUZZER}_${FUZZING_ENGINE}_${SANITIZER}_out"
-fi
+    OPTIONS_FILE="${FUZZER}.options"
+    CUSTOM_LIBFUZZER_OPTIONS=""
 
+    if [ -f "$OPTIONS_FILE" ]; then
+        custom_asan_options=$(/parse_options.py "$OPTIONS_FILE" asan)
+        if [ -n "$custom_asan_options" ]; then
+            export ASAN_OPTIONS="$ASAN_OPTIONS:$custom_asan_options"
+        fi
 
-rm -rf $FUZZER_OUT && mkdir -p $FUZZER_OUT
+        custom_msan_options=$(/parse_options.py "$OPTIONS_FILE" msan)
+        if [ -n "$custom_msan_options" ]; then
+            export MSAN_OPTIONS="$MSAN_OPTIONS:$custom_msan_options"
+        fi
 
-SEED_CORPUS="${FUZZER}_seed_corpus.zip"
+        custom_ubsan_options=$(/parse_options.py "$OPTIONS_FILE" ubsan)
+        if [ -n "$custom_ubsan_options" ]; then
+            export UBSAN_OPTIONS="$UBSAN_OPTIONS:$custom_ubsan_options"
+        fi
 
-# TODO: Investigate why this code block is skipped
-# by all default fuzzers in bad_build_check.
-# They all set SKIP_SEED_CORPUS=1.
-if [ -f $SEED_CORPUS ] && [ -z ${SKIP_SEED_CORPUS:-} ]; then
-  echo "Using seed corpus: $SEED_CORPUS"
-  unzip -o -d ${CORPUS_DIR}/ $SEED_CORPUS > /dev/null
-fi
+        CUSTOM_LIBFUZZER_OPTIONS=$(/parse_options.py "$OPTIONS_FILE" libfuzzer)
+    fi
 
-OPTIONS_FILE="${FUZZER}.options"
-CUSTOM_LIBFUZZER_OPTIONS=""
+    CMD_LINE="./$FUZZER $FUZZER_ARGS"
+    CMD_LINE="$CMD_LINE $CORPUS_DIR"
 
-if [ -f $OPTIONS_FILE ]; then
-  custom_asan_options=$(parse_options.py $OPTIONS_FILE asan)
-  if [ ! -z $custom_asan_options ]; then
-    export ASAN_OPTIONS="$ASAN_OPTIONS:$custom_asan_options"
-  fi
+    if [[ -n "$CUSTOM_LIBFUZZER_OPTIONS" ]]; then
+        CMD_LINE="$CMD_LINE $CUSTOM_LIBFUZZER_OPTIONS"
+    fi
 
-  custom_msan_options=$(parse_options.py $OPTIONS_FILE msan)
-  if [ ! -z $custom_msan_options ]; then
-    export MSAN_OPTIONS="$MSAN_OPTIONS:$custom_msan_options"
-  fi
+    if [[ ! "$CMD_LINE" =~ "-dict=" ]]; then
+        if [ -f "$FUZZER.dict" ]; then
+            CMD_LINE="$CMD_LINE -dict=$FUZZER.dict"
+        fi
+    fi
 
-  custom_ubsan_options=$(parse_options.py $OPTIONS_FILE ubsan)
-  if [ ! -z $custom_ubsan_options ]; then
-    export UBSAN_OPTIONS="$UBSAN_OPTIONS:$custom_ubsan_options"
-  fi
+    CMD_LINE="$CMD_LINE < /dev/null"
 
-  CUSTOM_LIBFUZZER_OPTIONS=$(parse_options.py $OPTIONS_FILE libfuzzer)
-fi
+    echo "$CMD_LINE"
 
+    # Unset OUT so the fuzz target can't rely on it.
+    # unset OUT
 
+    if [ -n "$DEBUGGER" ]; then
+        CMD_LINE="$DEBUGGER $CMD_LINE"
+    fi
 
-CMD_LINE="$OUT/$FUZZER $FUZZER_ARGS $*"
+    bash -c "$CMD_LINE"
+}
 
-if [ -z ${SKIP_SEED_CORPUS:-} ]; then
-CMD_LINE="$CMD_LINE $CORPUS_DIR"
-fi
+ls -al
 
-if [[ ! -z ${CUSTOM_LIBFUZZER_OPTIONS} ]]; then
-CMD_LINE="$CMD_LINE $CUSTOM_LIBFUZZER_OPTIONS"
-fi
-
-if [[ ! "$CMD_LINE" =~ "-dict=" ]]; then
-if [ -f "$FUZZER.dict" ]; then
-    CMD_LINE="$CMD_LINE -dict=$FUZZER.dict"
-fi
-fi
-
-CMD_LINE="$CMD_LINE < /dev/null"
-
-echo $CMD_LINE
-
-# Unset OUT so the fuzz target can't rely on it.
-unset OUT
-
-if [ ! -z "$DEBUGGER" ]; then
-  CMD_LINE="$DEBUGGER $CMD_LINE"
-fi
-
-bash -c "$CMD_LINE"
+for fuzzer in *_fuzzer; do
+    if [ -f "$fuzzer" ] && [ -x "$fuzzer" ]; then
+        run_fuzzer "$fuzzer"
+    fi
+done
diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py
index 02f5c184b54..e7f907d02d4 100644
--- a/tests/ci/libfuzzer_test_check.py
+++ b/tests/ci/libfuzzer_test_check.py
@@ -41,7 +41,8 @@ from report import TestResults, read_test_results
 # from s3_helper import S3Helper
 from stopwatch import Stopwatch
 
-# from tee_popen import TeePopen
+from tee_popen import TeePopen
+
 # from upload_result_helper import upload_results
 
 NO_CHANGES_MSG = "Nothing to run"
@@ -121,8 +122,10 @@ def get_run_command(
     # )
 
     return (
-        f"docker run --volume={fuzzers_path}:/fuzzers "
-        f"{ci_logs_args}"
+        f"docker run "
+        f"{ci_logs_args} "
+        f"--workdir=/fuzzers "
+        f"--volume={fuzzers_path}:/fuzzers "
         f"--volume={repo_path}/tests:/usr/share/clickhouse-test "
         #        f"{volume_with_broken_test}"
         f"--volume={result_path}:/test_output "
@@ -338,11 +341,15 @@ def main():
     download_fuzzers(check_name, reports_path, fuzzers_path)
 
     for file in os.listdir(fuzzers_path):
-        if file.endswith("_seed_corpus.zip"):
+        if file.endswith("_fuzzer"):
+            os.chmod(os.path.join(fuzzers_path, file), 0o777)
+        elif file.endswith("_seed_corpus.zip"):
             corpus_path = os.path.join(
-                temp_path, file.removesuffix("_seed_corpus.zip") + ".in"
+                fuzzers_path, file.removesuffix("_seed_corpus.zip") + ".in"
+            )
+            zipfile.ZipFile(os.path.join(fuzzers_path, file), "r").extractall(
+                corpus_path
             )
-            zipfile.ZipFile(os.path.join(temp_path, file), "r").extractall(corpus_path)
 
     # server_log_path = os.path.join(temp_path, "server_log")
     # if not os.path.exists(server_log_path):
@@ -352,7 +359,7 @@ def main():
     if not os.path.exists(result_path):
         os.makedirs(result_path)
 
-    # run_log_path = os.path.join(result_path, "run.log")
+    run_log_path = os.path.join(result_path, "run.log")
 
     additional_envs = get_additional_envs(
         check_name, run_by_hash_num, run_by_hash_total
@@ -380,14 +387,16 @@ def main():
     )
     logging.info("Going to run libFuzzer tests: %s", run_command)
 
-    sys.exit(0)
+    # sys.exit(0)
 
-    # with TeePopen(run_command, run_log_path) as process:
-    #     retcode = process.wait()
-    #     if retcode == 0:
-    #         logging.info("Run successfully")
-    #     else:
-    #         logging.info("Run failed")
+    with TeePopen(run_command, run_log_path) as process:
+        retcode = process.wait()
+        if retcode == 0:
+            logging.info("Run successfully")
+        else:
+            logging.info("Run failed")
+
+    sys.exit(0)
 
     # try:
     #     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)

From cd0c775355e2eb2b620a638a1d2ce3c6f83c7f1c Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Thu, 14 Sep 2023 03:10:55 +0000
Subject: [PATCH 069/243] review suggestions

---
 docker/images.json                     |  4 ++
 docker/test/libfuzzer/Dockerfile       |  2 +-
 docker/test/libfuzzer/run_libfuzzer.py | 73 ++++++++++++++++++++++++++
 tests/ci/libfuzzer_test_check.py       | 11 ++--
 4 files changed, 84 insertions(+), 6 deletions(-)
 create mode 100755 docker/test/libfuzzer/run_libfuzzer.py

diff --git a/docker/images.json b/docker/images.json
index d895e2da2f0..bddfd49ea3b 100644
--- a/docker/images.json
+++ b/docker/images.json
@@ -21,6 +21,10 @@
         "name": "clickhouse/fuzzer",
         "dependent": []
     },
+    "docker/test/libfuzzer": {
+        "name": "clickhouse/libfuzzer",
+        "dependent": []
+    },
     "docker/test/performance-comparison": {
         "name": "clickhouse/performance-comparison",
         "dependent": []
diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile
index 65cd8e4831f..081cf5473f8 100644
--- a/docker/test/libfuzzer/Dockerfile
+++ b/docker/test/libfuzzer/Dockerfile
@@ -37,7 +37,7 @@ ENV FUZZER_ARGS="-max_total_time=60"
 
 SHELL ["/bin/bash", "-c"]
 CMD set -o pipefail \
-    && timeout -s 9 1h /run_libfuzzer.sh 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log
+    && timeout -s 9 1h /run_libfuzzer.py 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log
 
 # docker run --network=host --volume <workspace>:/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/libfuzzer
 
diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py
new file mode 100755
index 00000000000..b608c97de60
--- /dev/null
+++ b/docker/test/libfuzzer/run_libfuzzer.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+import logging
+import os
+from pathlib import Path
+import subprocess
+from parse_options import parse_options
+
+DEBUGGER = os.getenv("DEBUGGER", "")
+FUZZER_ARGS = os.getenv("FUZZER_ARGS", "")
+
+
+def run_fuzzer(fuzzer: str):
+    logging.info(f"Running fuzzer {fuzzer}...")
+
+    corpus_dir = f"{fuzzer}.in"
+    with Path(corpus_dir) as path:
+        if not path.exists() or not path.is_dir():
+            corpus_dir = ""
+
+    options_file = f"{fuzzer}.options"
+    custom_libfuzzer_options = ""
+
+    with Path(options_file) as path:
+        if path.exists() and path.is_file():
+            custom_asan_options = parse_options(options_file, "asan")
+            if custom_asan_options:
+                os.environ[
+                    "ASAN_OPTIONS"
+                ] = f"{os.environ['ASAN_OPTIONS']}:{custom_asan_options}"
+
+            custom_msan_options = parse_options(options_file, "msan")
+            if custom_msan_options:
+                os.environ[
+                    "MSAN_OPTIONS"
+                ] = f"{os.environ['MSAN_OPTIONS']}:{custom_msan_options}"
+
+            custom_ubsan_options = parse_options(options_file, "ubsan")
+            if custom_ubsan_options:
+                os.environ[
+                    "UBSAN_OPTIONS"
+                ] = f"{os.environ['UBSAN_OPTIONS']}:{custom_ubsan_options}"
+
+            custom_libfuzzer_options = parse_options(options_file, "libfuzzer")
+
+    cmd_line = f"{DEBUGGER} ./{fuzzer} {FUZZER_ARGS} {corpus_dir}"
+    if custom_libfuzzer_options:
+        cmd_line += f" {custom_libfuzzer_options}"
+
+    if not "-dict=" in cmd_line and Path(f"{fuzzer}.dict").exists():
+        cmd_line += f" -dict={fuzzer}.dict"
+
+    cmd_line += " < /dev/null"
+
+    logging.info(f"...will execute: {cmd_line}")
+    subprocess.check_call(cmd_line, shell=True)
+
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+
+    subprocess.check_call("ls -al", shell=True)
+
+    with Path() as current:
+        for fuzzer in current.iterdir():
+            if (current / fuzzer).is_file() and os.access(current / fuzzer, os.X_OK):
+                run_fuzzer(fuzzer)
+
+    exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py
index e7f907d02d4..41d08dade77 100644
--- a/tests/ci/libfuzzer_test_check.py
+++ b/tests/ci/libfuzzer_test_check.py
@@ -30,7 +30,7 @@ from commit_status_helper import (
     # post_commit_status_to_file,
     update_mergeable_check,
 )
-from docker_pull_helper import DockerImage  # , get_image_with_version
+from docker_pull_helper import DockerImage, get_image_with_version
 
 # from download_release_packages import download_last_release
 from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
@@ -329,10 +329,11 @@ def main():
     #             )
     #         sys.exit(0)
 
-    image_name = "clickhouse/libfuzzer-test"  # get_image_name(check_name)
-    docker_image = docker_build_image(
-        image_name, Path("../../docker/test/libfuzzer/")
-    )  # get_image_with_version(reports_path, image_name)
+    # image_name = "clickhouse/libfuzzer-test"  # get_image_name(check_name)
+    # docker_image = docker_build_image(
+    #     image_name, Path("../../docker/test/libfuzzer/")
+    # )
+    docker_image = get_image_with_version(reports_path, "clickhouse/libfuzzer")
 
     fuzzers_path = os.path.join(temp_path, "fuzzers")
     if not os.path.exists(fuzzers_path):

From 3a14bde95a54759cf8af6f1aac6d730dc2f3aad3 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Thu, 14 Sep 2023 20:06:53 +0000
Subject: [PATCH 070/243] cleanup, fix tee to escape non-decodable symbols

---
 docker/test/libfuzzer/run_libfuzzer.sh |  77 -------
 tests/ci/libfuzzer_test_check.py       | 270 ++-----------------------
 tests/ci/tee_popen.py                  |   1 +
 3 files changed, 17 insertions(+), 331 deletions(-)
 delete mode 100755 docker/test/libfuzzer/run_libfuzzer.sh

diff --git a/docker/test/libfuzzer/run_libfuzzer.sh b/docker/test/libfuzzer/run_libfuzzer.sh
deleted file mode 100755
index b60e942f02a..00000000000
--- a/docker/test/libfuzzer/run_libfuzzer.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash -eu
-
-# Fuzzer runner. Appends .options arguments and seed corpus to users args.
-# Usage: $0 <fuzzer_name> <fuzzer_args>
-
-# export PATH=$OUT:$PATH
-# cd $OUT
-
-DEBUGGER=${DEBUGGER:-}
-FUZZER_ARGS=${FUZZER_ARGS:-}
-
-function run_fuzzer() {
-    FUZZER=$1
-
-    echo Running fuzzer "$FUZZER"
-
-    CORPUS_DIR=""
-    if [ -d "${FUZZER}.in" ]; then
-        CORPUS_DIR="${FUZZER}.in"
-    fi
-
-    OPTIONS_FILE="${FUZZER}.options"
-    CUSTOM_LIBFUZZER_OPTIONS=""
-
-    if [ -f "$OPTIONS_FILE" ]; then
-        custom_asan_options=$(/parse_options.py "$OPTIONS_FILE" asan)
-        if [ -n "$custom_asan_options" ]; then
-            export ASAN_OPTIONS="$ASAN_OPTIONS:$custom_asan_options"
-        fi
-
-        custom_msan_options=$(/parse_options.py "$OPTIONS_FILE" msan)
-        if [ -n "$custom_msan_options" ]; then
-            export MSAN_OPTIONS="$MSAN_OPTIONS:$custom_msan_options"
-        fi
-
-        custom_ubsan_options=$(/parse_options.py "$OPTIONS_FILE" ubsan)
-        if [ -n "$custom_ubsan_options" ]; then
-            export UBSAN_OPTIONS="$UBSAN_OPTIONS:$custom_ubsan_options"
-        fi
-
-        CUSTOM_LIBFUZZER_OPTIONS=$(/parse_options.py "$OPTIONS_FILE" libfuzzer)
-    fi
-
-    CMD_LINE="./$FUZZER $FUZZER_ARGS"
-    CMD_LINE="$CMD_LINE $CORPUS_DIR"
-
-    if [[ -n "$CUSTOM_LIBFUZZER_OPTIONS" ]]; then
-        CMD_LINE="$CMD_LINE $CUSTOM_LIBFUZZER_OPTIONS"
-    fi
-
-    if [[ ! "$CMD_LINE" =~ "-dict=" ]]; then
-        if [ -f "$FUZZER.dict" ]; then
-            CMD_LINE="$CMD_LINE -dict=$FUZZER.dict"
-        fi
-    fi
-
-    CMD_LINE="$CMD_LINE < /dev/null"
-
-    echo "$CMD_LINE"
-
-    # Unset OUT so the fuzz target can't rely on it.
-    # unset OUT
-
-    if [ -n "$DEBUGGER" ]; then
-        CMD_LINE="$DEBUGGER $CMD_LINE"
-    fi
-
-    bash -c "$CMD_LINE"
-}
-
-ls -al
-
-for fuzzer in *_fuzzer; do
-    if [ -f "$fuzzer" ] && [ -x "$fuzzer" ]; then
-        run_fuzzer "$fuzzer"
-    fi
-done
diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py
index 41d08dade77..9fee997cc96 100644
--- a/tests/ci/libfuzzer_test_check.py
+++ b/tests/ci/libfuzzer_test_check.py
@@ -1,49 +1,37 @@
 #!/usr/bin/env python3
 
 import argparse
-import csv
 import logging
 import os
-import re
 import subprocess
 import sys
 import atexit
 import zipfile
 from pathlib import Path
-from typing import List, Tuple
+from typing import List
 
 from github import Github
 
-# from build_download_helper import download_all_deb_packages
 from build_download_helper import download_fuzzers
 from clickhouse_helper import (
     CiLogsCredentials,
-    # ClickHouseHelper,
-    # prepare_tests_results_for_clickhouse,
 )
 from commit_status_helper import (
-    # NotSet,
     RerunHelper,
     get_commit,
-    # override_status,
-    # post_commit_status,
-    # post_commit_status_to_file,
     update_mergeable_check,
 )
 from docker_pull_helper import DockerImage, get_image_with_version
 
-# from download_release_packages import download_last_release
 from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
 from get_robot_token import get_best_robot_token
-from pr_info import PRInfo  # , FORCE_TESTS_LABEL
-from report import TestResults, read_test_results
+from pr_info import PRInfo
+from report import TestResults
 
-# from s3_helper import S3Helper
 from stopwatch import Stopwatch
 
 from tee_popen import TeePopen
 
-# from upload_result_helper import upload_results
 
 NO_CHANGES_MSG = "Nothing to run"
 
@@ -70,34 +58,18 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
     return result
 
 
-# def get_image_name(check_name):
-#     if "stateless" in check_name.lower():
-#         return "clickhouse/stateless-test"
-#     if "stateful" in check_name.lower():
-#         return "clickhouse/stateful-test"
-#     else:
-#         raise Exception(f"Cannot deduce image name based on check name {check_name}")
-
-
 def get_run_command(
-    #    check_name: str,
-    fuzzers_path: str,
-    repo_path: str,
-    result_path: str,
-    #    server_log_path: str,
+    fuzzers_path: Path,
+    repo_path: Path,
+    result_path: Path,
     kill_timeout: int,
     additional_envs: List[str],
     ci_logs_args: str,
     image: DockerImage,
-    #    flaky_check: bool,
-    #    tests_to_run: List[str],
 ) -> str:
     additional_options = ["--hung-check"]
     additional_options.append("--print-time")
 
-    # if tests_to_run:
-    #     additional_options += tests_to_run
-
     additional_options_str = (
         '-e ADDITIONAL_OPTIONS="' + " ".join(additional_options) + '"'
     )
@@ -108,18 +80,9 @@ def get_run_command(
         '-e S3_URL="https://s3.amazonaws.com/clickhouse-datasets"',
     ]
 
-    # if flaky_check:
-    #     envs.append("-e NUM_TRIES=100")
-    #     envs.append("-e MAX_RUN_TIME=1800")
-
     envs += [f"-e {e}" for e in additional_envs]
 
     env_str = " ".join(envs)
-    # volume_with_broken_test = (
-    #     f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt "
-    #     if "analyzer" in check_name
-    #     else ""
-    # )
 
     return (
         f"docker run "
@@ -127,96 +90,11 @@ def get_run_command(
         f"--workdir=/fuzzers "
         f"--volume={fuzzers_path}:/fuzzers "
         f"--volume={repo_path}/tests:/usr/share/clickhouse-test "
-        #        f"{volume_with_broken_test}"
         f"--volume={result_path}:/test_output "
-        #        f"--volume={server_log_path}:/var/log/clickhouse-server "
         f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}"
     )
 
 
-def get_tests_to_run(pr_info: PRInfo) -> List[str]:
-    result = set()
-
-    if pr_info.changed_files is None:
-        return []
-
-    for fpath in pr_info.changed_files:
-        if re.match(r"tests/queries/0_stateless/[0-9]{5}", fpath):
-            logging.info("File '%s' is changed and seems like a test", fpath)
-            fname = fpath.split("/")[3]
-            fname_without_ext = os.path.splitext(fname)[0]
-            # add '.' to the end of the test name not to run all tests with the same prefix
-            # e.g. we changed '00001_some_name.reference'
-            # and we have ['00001_some_name.sh', '00001_some_name_2.sql']
-            # so we want to run only '00001_some_name.sh'
-            result.add(fname_without_ext + ".")
-        elif "tests/queries/" in fpath:
-            # log suspicious changes from tests/ for debugging in case of any problems
-            logging.info("File '%s' is changed, but it doesn't look like a test", fpath)
-    return list(result)
-
-
-def process_results(
-    result_folder: str,
-    server_log_path: str,
-) -> Tuple[str, str, TestResults, List[str]]:
-    test_results = []  # type: TestResults
-    additional_files = []
-    # Just upload all files from result_folder.
-    # If task provides processed results, then it's responsible for content of result_folder.
-    if os.path.exists(result_folder):
-        test_files = [
-            f
-            for f in os.listdir(result_folder)
-            if os.path.isfile(os.path.join(result_folder, f))
-        ]
-        additional_files = [os.path.join(result_folder, f) for f in test_files]
-
-    if os.path.exists(server_log_path):
-        server_log_files = [
-            f
-            for f in os.listdir(server_log_path)
-            if os.path.isfile(os.path.join(server_log_path, f))
-        ]
-        additional_files = additional_files + [
-            os.path.join(server_log_path, f) for f in server_log_files
-        ]
-
-    status = []
-    status_path = os.path.join(result_folder, "check_status.tsv")
-    if os.path.exists(status_path):
-        logging.info("Found test_results.tsv")
-        with open(status_path, "r", encoding="utf-8") as status_file:
-            status = list(csv.reader(status_file, delimiter="\t"))
-
-    if len(status) != 1 or len(status[0]) != 2:
-        logging.info("Files in result folder %s", os.listdir(result_folder))
-        return "error", "Invalid check_status.tsv", test_results, additional_files
-    state, description = status[0][0], status[0][1]
-
-    try:
-        results_path = Path(result_folder) / "test_results.tsv"
-
-        if results_path.exists():
-            logging.info("Found test_results.tsv")
-        else:
-            logging.info("Files in result folder %s", os.listdir(result_folder))
-            return "error", "Not found test_results.tsv", test_results, additional_files
-
-        test_results = read_test_results(results_path)
-        if len(test_results) == 0:
-            return "error", "Empty test_results.tsv", test_results, additional_files
-    except Exception as e:
-        return (
-            "error",
-            f"Cannot parse test_results.tsv ({e})",
-            test_results,
-            additional_files,
-        )
-
-    return state, description, test_results, additional_files
-
-
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("check_name")
@@ -255,16 +133,12 @@ def main():
     temp_path = TEMP_PATH
     repo_path = REPO_COPY
     reports_path = REPORTS_PATH
-    # post_commit_path = os.path.join(temp_path, "functional_commit_status.tsv")
 
     args = parse_args()
     check_name = args.check_name
     kill_timeout = args.kill_timeout
     validate_bugfix_check = args.validate_bugfix
 
-    # flaky_check = "flaky" in check_name.lower()
-
-    # run_changed_tests = flaky_check or validate_bugfix_check
     run_changed_tests = validate_bugfix_check
     gh = Github(get_best_robot_token(), per_page=100)
 
@@ -276,20 +150,9 @@ def main():
     commit = get_commit(gh, pr_info.sha)
     atexit.register(update_mergeable_check, gh, pr_info, check_name)
 
-    if not os.path.exists(temp_path):
+    if not Path(temp_path).exists():
         os.makedirs(temp_path)
 
-    # if validate_bugfix_check and "pr-bugfix" not in pr_info.labels:
-    #     if args.post_commit_status == "file":
-    #         post_commit_status_to_file(
-    #             post_commit_path,
-    #             f"Skipped (no pr-bugfix in {pr_info.labels})",
-    #             "success",
-    #             "null",
-    #         )
-    #     logging.info("Skipping '%s' (no pr-bugfix in %s)", check_name, pr_info.labels)
-    #     sys.exit(0)
-
     if "RUN_BY_HASH_NUM" in os.environ:
         run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0"))
         run_by_hash_total = int(os.getenv("RUN_BY_HASH_TOTAL", "0"))
@@ -306,67 +169,30 @@ def main():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
 
-    # tests_to_run = []
-    # if run_changed_tests:
-    #     tests_to_run = get_tests_to_run(pr_info)
-    #     if not tests_to_run:
-    #         state = override_status("success", check_name, validate_bugfix_check)
-    #         if args.post_commit_status == "commit_status":
-    #             post_commit_status(
-    #                 commit,
-    #                 state,
-    #                 NotSet,
-    #                 NO_CHANGES_MSG,
-    #                 check_name_with_group,
-    #                 pr_info,
-    #             )
-    #         elif args.post_commit_status == "file":
-    #             post_commit_status_to_file(
-    #                 post_commit_path,
-    #                 description=NO_CHANGES_MSG,
-    #                 state=state,
-    #                 report_url="null",
-    #             )
-    #         sys.exit(0)
-
-    # image_name = "clickhouse/libfuzzer-test"  # get_image_name(check_name)
-    # docker_image = docker_build_image(
-    #     image_name, Path("../../docker/test/libfuzzer/")
-    # )
     docker_image = get_image_with_version(reports_path, "clickhouse/libfuzzer")
 
-    fuzzers_path = os.path.join(temp_path, "fuzzers")
-    if not os.path.exists(fuzzers_path):
+    fuzzers_path = Path(temp_path) / "fuzzers"
+    if not fuzzers_path.exists():
         os.makedirs(fuzzers_path)
 
     download_fuzzers(check_name, reports_path, fuzzers_path)
 
     for file in os.listdir(fuzzers_path):
         if file.endswith("_fuzzer"):
-            os.chmod(os.path.join(fuzzers_path, file), 0o777)
+            os.chmod(fuzzers_path / file, 0o777)
         elif file.endswith("_seed_corpus.zip"):
-            corpus_path = os.path.join(
-                fuzzers_path, file.removesuffix("_seed_corpus.zip") + ".in"
-            )
-            zipfile.ZipFile(os.path.join(fuzzers_path, file), "r").extractall(
-                corpus_path
-            )
+            corpus_path = fuzzers_path / (file.removesuffix("_seed_corpus.zip") + ".in")
+            zipfile.ZipFile(fuzzers_path / file, "r").extractall(corpus_path)
 
-    # server_log_path = os.path.join(temp_path, "server_log")
-    # if not os.path.exists(server_log_path):
-    #     os.makedirs(server_log_path)
-
-    result_path = os.path.join(temp_path, "result_path")
-    if not os.path.exists(result_path):
+    result_path = Path(temp_path) / "result_path"
+    if not result_path.exists():
         os.makedirs(result_path)
 
-    run_log_path = os.path.join(result_path, "run.log")
+    run_log_path = result_path / "run.log"
 
     additional_envs = get_additional_envs(
         check_name, run_by_hash_num, run_by_hash_total
     )
-    # if validate_bugfix_check:
-    #     additional_envs.append("GLOBAL_TAGS=no-random-settings")
 
     ci_logs_credentials = CiLogsCredentials(Path(temp_path) / "export-logs-config.sh")
     ci_logs_args = ci_logs_credentials.get_docker_arguments(
@@ -374,22 +200,16 @@ def main():
     )
 
     run_command = get_run_command(
-        #        check_name,
         fuzzers_path,
-        repo_path,
+        Path(repo_path),
         result_path,
-        #        server_log_path,
         kill_timeout,
         additional_envs,
         ci_logs_args,
         docker_image,
-        #        flaky_check,
-        #        tests_to_run,
     )
     logging.info("Going to run libFuzzer tests: %s", run_command)
 
-    # sys.exit(0)
-
     with TeePopen(run_command, run_log_path) as process:
         retcode = process.wait()
         if retcode == 0:
@@ -399,64 +219,6 @@ def main():
 
     sys.exit(0)
 
-    # try:
-    #     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
-    # except subprocess.CalledProcessError:
-    #     logging.warning("Failed to change files owner in %s, ignoring it", temp_path)
-
-    # ci_logs_credentials.clean_ci_logs_from_credentials(Path(run_log_path))
-    # s3_helper = S3Helper()
-
-    # state, description, test_results, additional_logs = process_results(
-    #     result_path, server_log_path
-    # )
-    # state = override_status(state, check_name, invert=validate_bugfix_check)
-
-    # ch_helper = ClickHouseHelper()
-
-    # report_url = upload_results(
-    #     s3_helper,
-    #     pr_info.number,
-    #     pr_info.sha,
-    #     test_results,
-    #     [run_log_path] + additional_logs,
-    #     check_name_with_group,
-    # )
-
-    # print(f"::notice:: {check_name} Report url: {report_url}")
-    # if args.post_commit_status == "commit_status":
-    #     post_commit_status(
-    #         commit, state, report_url, description, check_name_with_group, pr_info
-    #     )
-    # elif args.post_commit_status == "file":
-    #     post_commit_status_to_file(
-    #         post_commit_path,
-    #         description,
-    #         state,
-    #         report_url,
-    #     )
-    # else:
-    #     raise Exception(
-    #         f'Unknown post_commit_status option "{args.post_commit_status}"'
-    #     )
-
-    # prepared_events = prepare_tests_results_for_clickhouse(
-    #     pr_info,
-    #     test_results,
-    #     state,
-    #     stopwatch.duration_seconds,
-    #     stopwatch.start_time_str,
-    #     report_url,
-    #     check_name_with_group,
-    # )
-    # ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
-
-    # if state != "success":
-    #     if FORCE_TESTS_LABEL in pr_info.labels:
-    #         print(f"'{FORCE_TESTS_LABEL}' enabled, will report success")
-    #     else:
-    #         sys.exit(1)
-
 
 if __name__ == "__main__":
     main()
diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py
index 7872b489951..a50532aea20 100644
--- a/tests/ci/tee_popen.py
+++ b/tests/ci/tee_popen.py
@@ -55,6 +55,7 @@ class TeePopen:
             stderr=STDOUT,
             stdout=PIPE,
             bufsize=1,
+            errors="backslashreplace",
         )
         if self.timeout is not None and self.timeout > 0:
             t = Thread(target=self._check_timeout)

From d1cd3cdd2a4dbbd7d695c09bdb09b7b6d1830400 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 16 Sep 2023 01:04:19 +0000
Subject: [PATCH 071/243] move on its own workflow

---
 .github/workflows/libfuzzer.yml    | 80 +++++++++++++++++++++++++++++
 .github/workflows/pull_request.yml | 82 ------------------------------
 2 files changed, 80 insertions(+), 82 deletions(-)
 create mode 100644 .github/workflows/libfuzzer.yml

diff --git a/.github/workflows/libfuzzer.yml b/.github/workflows/libfuzzer.yml
new file mode 100644
index 00000000000..74772ccf6d9
--- /dev/null
+++ b/.github/workflows/libfuzzer.yml
@@ -0,0 +1,80 @@
+name: libFuzzer
+
+env:
+  # Force the stdout and stderr streams to be unbuffered
+  PYTHONUNBUFFERED: 1
+
+on: # yamllint disable-line rule:truthy
+#  schedule:
+#    - cron: '0 0 2 31 1' # never for now
+  workflow_dispatch:
+jobs:
+  BuilderFuzzers:
+    runs-on: [self-hosted, builder]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/build_check
+          IMAGES_PATH=${{runner.temp}}/images_path
+          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
+          CACHES_PATH=${{runner.temp}}/../ccaches
+          BUILD_NAME=fuzzers
+          EOF
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+          submodules: true
+      - name: Build
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
+      - name: Upload build URLs to artifacts
+        if: ${{ success() || failure() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: ${{ env.BUILD_URLS }}
+          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
+  libFuzzerTest:
+    needs: [BuilderFuzzers]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/libfuzzer
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=libFuzzer tests
+          REPO_COPY=${{runner.temp}}/libfuzzer/ClickHouse
+          KILL_TIMEOUT=10800
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: libFuzzer test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 libfuzzer_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 7e56254bac0..ce135846dd5 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -5059,88 +5059,6 @@ jobs:
           docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
           sudo rm -fr "$TEMP_PATH"
 #############################################################################################
-#################################### libFuzzer build ########################################
-#############################################################################################
-  BuilderFuzzers:
-    if: contains(github.event.pull_request.labels.*.name, 'libFuzzer')
-    needs: [DockerHubPush, FastTest, StyleCheck]
-    runs-on: [self-hosted, builder]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/build_check
-          IMAGES_PATH=${{runner.temp}}/images_path
-          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
-          CACHES_PATH=${{runner.temp}}/../ccaches
-          BUILD_NAME=fuzzers
-          EOF
-      - name: Download changed images
-        uses: actions/download-artifact@v3
-        with:
-          name: changed_images
-          path: ${{ env.IMAGES_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-          submodules: true
-      - name: Build
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
-      - name: Upload build URLs to artifacts
-        if: ${{ success() || failure() }}
-        uses: actions/upload-artifact@v3
-        with:
-          name: ${{ env.BUILD_URLS }}
-          path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
-##############################################################################################
-################################ libFuzzer TESTS #############################################
-##############################################################################################
-  libFuzzerTest:
-    needs: [BuilderFuzzers]
-    runs-on: [self-hosted, func-tester]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          TEMP_PATH=${{runner.temp}}/libfuzzer
-          REPORTS_PATH=${{runner.temp}}/reports_dir
-          CHECK_NAME=libFuzzer tests
-          REPO_COPY=${{runner.temp}}/libfuzzer/ClickHouse
-          KILL_TIMEOUT=10800
-          EOF
-      - name: Download json reports
-        uses: actions/download-artifact@v3
-        with:
-          path: ${{ env.REPORTS_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-      - name: libFuzzer test
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci"
-          python3 libfuzzer_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH"
-#############################################################################################
 ###################################### JEPSEN TESTS #########################################
 #############################################################################################
   Jepsen:

From dd6f12dd94a93c916304ff9c0c0bd2dd2a40fcb9 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 16 Sep 2023 04:41:13 +0000
Subject: [PATCH 072/243] review suggestions

---
 .github/workflows/libfuzzer.yml        | 21 +++++++--
 .github/workflows/pull_request.yml     | 13 ++++--
 docker/test/libfuzzer/parse_options.py | 61 --------------------------
 docker/test/libfuzzer/run_libfuzzer.py | 26 ++++++-----
 tests/ci/libfuzzer_test_check.py       | 42 +++---------------
 5 files changed, 48 insertions(+), 115 deletions(-)
 delete mode 100755 docker/test/libfuzzer/parse_options.py

diff --git a/.github/workflows/libfuzzer.yml b/.github/workflows/libfuzzer.yml
index 74772ccf6d9..e8a0396684a 100644
--- a/.github/workflows/libfuzzer.yml
+++ b/.github/workflows/libfuzzer.yml
@@ -5,9 +5,9 @@ env:
   PYTHONUNBUFFERED: 1
 
 on: # yamllint disable-line rule:truthy
-#  schedule:
-#    - cron: '0 0 2 31 1' # never for now
-  workflow_dispatch:
+  #  schedule:
+  #    - cron: '0 0 2 31 1' # never for now
+  workflow_call:
 jobs:
   BuilderFuzzers:
     runs-on: [self-hosted, builder]
@@ -21,11 +21,19 @@ jobs:
           CACHES_PATH=${{runner.temp}}/../ccaches
           BUILD_NAME=fuzzers
           EOF
+      - name: Download changed images
+        # even if artifact does not exist, e.g. on `do not test` label or failed Docker job
+        continue-on-error: true
+        uses: actions/download-artifact@v3
+        with:
+          name: changed_images
+          path: ${{ env.IMAGES_PATH }}
       - name: Check out repository code
         uses: ClickHouse/checkout@v1
         with:
           clear-repository: true
           submodules: true
+          ref: ${{github.ref}}
       - name: Build
         run: |
           sudo rm -fr "$TEMP_PATH"
@@ -57,6 +65,13 @@ jobs:
           REPO_COPY=${{runner.temp}}/libfuzzer/ClickHouse
           KILL_TIMEOUT=10800
           EOF
+      - name: Download changed images
+        # even if artifact does not exist, e.g. on `do not test` label or failed Docker job
+        continue-on-error: true
+        uses: actions/download-artifact@v3
+        with:
+          name: changed_images
+          path: ${{ env.TEMP_PATH }}
       - name: Download json reports
         uses: actions/download-artifact@v3
         with:
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index ce135846dd5..838a6b56440 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -5186,9 +5186,16 @@ jobs:
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 finish_check.py
           python3 merge_pr.py --check-approved
-##############################################################################################
-########################### SQLLOGIC TEST ###################################################
-##############################################################################################
+#############################################################################################
+####################################### libFuzzer ###########################################
+#############################################################################################
+  libFuzzer:
+    if: contains(github.event.pull_request.labels.*.name, 'libFuzzer')
+    needs: [DockerHubPush, StyleCheck]
+    uses: ./.github/workflows/libfuzzer.yml
+  ##############################################################################################
+  ############################ SQLLOGIC TEST ###################################################
+  ##############################################################################################
   SQLLogicTestRelease:
     needs: [BuilderDebRelease]
     runs-on: [self-hosted, func-tester]
diff --git a/docker/test/libfuzzer/parse_options.py b/docker/test/libfuzzer/parse_options.py
deleted file mode 100755
index 5695e80a714..00000000000
--- a/docker/test/libfuzzer/parse_options.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python3
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-################################################################################
-"""Helper script for parsing custom fuzzing options."""
-import configparser
-import sys
-
-
-def parse_options(options_file_path, options_section):
-    """Parses the given file and returns options from the given section."""
-    parser = configparser.ConfigParser()
-    parser.read(options_file_path)
-
-    if not parser.has_section(options_section):
-        return None
-
-    options = parser[options_section]
-
-    if options_section == "libfuzzer":
-        options_string = " ".join(
-            "-%s=%s" % (key, value) for key, value in options.items()
-        )
-    else:
-        # Sanitizer options.
-        options_string = ":".join(
-            "%s=%s" % (key, value) for key, value in options.items()
-        )
-
-    return options_string
-
-
-def main():
-    """Processes the arguments and prints the options in the correct format."""
-    if len(sys.argv) < 3:
-        sys.stderr.write(
-            "Usage: %s <path_to_options_file> <options_section>\n" % sys.argv[0]
-        )
-        return 1
-
-    options = parse_options(sys.argv[1], sys.argv[2])
-    if options is not None:
-        print(options)
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py
index b608c97de60..5ed019490d5 100755
--- a/docker/test/libfuzzer/run_libfuzzer.py
+++ b/docker/test/libfuzzer/run_libfuzzer.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python3
 
+import configparser
 import logging
 import os
 from pathlib import Path
 import subprocess
-from parse_options import parse_options
 
 DEBUGGER = os.getenv("DEBUGGER", "")
 FUZZER_ARGS = os.getenv("FUZZER_ARGS", "")
@@ -23,25 +23,29 @@ def run_fuzzer(fuzzer: str):
 
     with Path(options_file) as path:
         if path.exists() and path.is_file():
-            custom_asan_options = parse_options(options_file, "asan")
-            if custom_asan_options:
+            parser = configparser.ConfigParser()
+            parser.read(path)
+
+            if parser.has_section("asan"):
                 os.environ[
                     "ASAN_OPTIONS"
-                ] = f"{os.environ['ASAN_OPTIONS']}:{custom_asan_options}"
+                ] = f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}"
 
-            custom_msan_options = parse_options(options_file, "msan")
-            if custom_msan_options:
+            if parser.has_section("msan"):
                 os.environ[
                     "MSAN_OPTIONS"
-                ] = f"{os.environ['MSAN_OPTIONS']}:{custom_msan_options}"
+                ] = f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}"
 
-            custom_ubsan_options = parse_options(options_file, "ubsan")
-            if custom_ubsan_options:
+            if parser.has_section("ubsan"):
                 os.environ[
                     "UBSAN_OPTIONS"
-                ] = f"{os.environ['UBSAN_OPTIONS']}:{custom_ubsan_options}"
+                ] = f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}"
 
-            custom_libfuzzer_options = parse_options(options_file, "libfuzzer")
+            if parser.has_section("libfuzzer"):
+                custom_libfuzzer_options = " ".join(
+                    "-%s=%s" % (key, value)
+                    for key, value in parser["libfuzzer"].items()
+                )
 
     cmd_line = f"{DEBUGGER} ./{fuzzer} {FUZZER_ARGS} {corpus_dir}"
     if custom_libfuzzer_options:
diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py
index 9fee997cc96..8d307b22042 100644
--- a/tests/ci/libfuzzer_test_check.py
+++ b/tests/ci/libfuzzer_test_check.py
@@ -3,7 +3,6 @@
 import argparse
 import logging
 import os
-import subprocess
 import sys
 import atexit
 import zipfile
@@ -99,59 +98,28 @@ def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("check_name")
     parser.add_argument("kill_timeout", type=int)
-    parser.add_argument(
-        "--validate-bugfix",
-        action="store_true",
-        help="Check that added tests failed on latest stable",
-    )
-    parser.add_argument(
-        "--post-commit-status",
-        default="commit_status",
-        choices=["commit_status", "file"],
-        help="Where to public post commit status",
-    )
     return parser.parse_args()
 
 
-def docker_build_image(image_name: str, filepath: Path) -> DockerImage:
-    # context = filepath.parent
-    docker_image = DockerImage(image_name)
-    build_cmd = f"docker build --network=host -t {image_name} {filepath}"
-    logging.info("Will build image with cmd: '%s'", build_cmd)
-    subprocess.check_call(
-        build_cmd,
-        shell=True,
-    )
-    return docker_image
-
-
 def main():
     logging.basicConfig(level=logging.INFO)
 
     stopwatch = Stopwatch()
 
-    temp_path = TEMP_PATH
-    repo_path = REPO_COPY
+    temp_path = Path(TEMP_PATH)
+    repo_path = Path(REPO_COPY)
     reports_path = REPORTS_PATH
 
     args = parse_args()
     check_name = args.check_name
     kill_timeout = args.kill_timeout
-    validate_bugfix_check = args.validate_bugfix
 
-    run_changed_tests = validate_bugfix_check
     gh = Github(get_best_robot_token(), per_page=100)
-
-    # For validate_bugfix_check we need up to date information about labels, so pr_event_from_api is used
-    pr_info = PRInfo(
-        need_changed_files=run_changed_tests, pr_event_from_api=validate_bugfix_check
-    )
-
+    pr_info = PRInfo()
     commit = get_commit(gh, pr_info.sha)
     atexit.register(update_mergeable_check, gh, pr_info, check_name)
 
-    if not Path(temp_path).exists():
-        os.makedirs(temp_path)
+    temp_path.mkdir(parents=True, exist_ok=True)
 
     if "RUN_BY_HASH_NUM" in os.environ:
         run_by_hash_num = int(os.getenv("RUN_BY_HASH_NUM", "0"))
@@ -201,7 +169,7 @@ def main():
 
     run_command = get_run_command(
         fuzzers_path,
-        Path(repo_path),
+        repo_path,
         result_path,
         kill_timeout,
         additional_envs,

From a138d8a376a8656d35b3128673f982afb6ed966a Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sun, 17 Sep 2023 21:03:14 +0000
Subject: [PATCH 073/243] Use index to access shard addresses

---
 src/Interpreters/ClusterProxy/executeQuery.cpp | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 4395f8373b2..9fb3f4f1b67 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -180,8 +180,10 @@ void executeQuery(
 
     ClusterPtr cluster = query_info.getCluster();
     const size_t shards = cluster->getShardCount();
-    for (const auto & shard_info : cluster->getShardsInfo())
+    for (size_t i = 0, s = cluster->getShardsInfo().size(); i < s; ++i)
     {
+        const auto & shard_info = cluster->getShardsInfo()[i];
+
         ASTPtr query_ast_for_shard = query_ast->clone();
         if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
         {
@@ -213,18 +215,8 @@ void executeQuery(
 
         // decide for each shard if parallel reading from replicas should be enabled
         // according to settings and number of replicas declared per shard
-        bool parallel_replicas_enabled = false;
-        if (shard_info.shard_num > 0 && shard_info.shard_num <= cluster->getShardsAddresses().size())
-        {
-            const auto & addresses = cluster->getShardsAddresses().at(shard_info.shard_num - 1);
-            parallel_replicas_enabled = addresses.size() > 1 && context->canUseParallelReplicas();
-        }
-        else
-        {
-            chassert(shard_info.shard_num > 0);
-
-            // FIXME or code: when can it happened (shard_num bigger than shard's addresses)? looks inconsistent
-        }
+        const auto & addresses = cluster->getShardsAddresses().at(i);
+        bool parallel_replicas_enabled = addresses.size() > 1 && context->canUseParallelReplicas();
 
         stream_factory.createForShard(shard_info,
             query_ast_for_shard, main_table, table_func_ptr,

From 39ca44b51a1432f3c6840670a09ecd7611b6393f Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Fri, 15 Sep 2023 16:33:48 +0800
Subject: [PATCH 074/243] Respect max_block_size for array join to avoid
 possible OOM

---
 src/Interpreters/ArrayJoinAction.cpp          | 73 ++++++++++++++---
 src/Interpreters/ArrayJoinAction.h            | 28 ++++++-
 .../Transforms/ArrayJoinTransform.cpp         | 35 ++++++--
 .../Transforms/ArrayJoinTransform.h           | 11 ++-
 ...5_array_join_with_max_block_size.reference | 81 +++++++++++++++++++
 .../02865_array_join_with_max_block_size.sql  | 14 ++++
 6 files changed, 222 insertions(+), 20 deletions(-)
 create mode 100644 tests/queries/0_stateless/02865_array_join_with_max_block_size.reference
 create mode 100644 tests/queries/0_stateless/02865_array_join_with_max_block_size.sql

diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp
index 4f42122e98f..39e35314afc 100644
--- a/src/Interpreters/ArrayJoinAction.cpp
+++ b/src/Interpreters/ArrayJoinAction.cpp
@@ -62,6 +62,7 @@ ArrayJoinAction::ArrayJoinAction(const NameSet & array_joined_columns_, bool arr
     : columns(array_joined_columns_)
     , is_left(array_join_is_left)
     , is_unaligned(context->getSettingsRef().enable_unaligned_array_join)
+    , max_block_size(context->getSettingsRef().max_block_size)
 {
     if (columns.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "No arrays to join");
@@ -93,19 +94,31 @@ void ArrayJoinAction::prepare(ColumnsWithTypeAndName & sample) const
     }
 }
 
-void ArrayJoinAction::execute(Block & block)
+ArrayJoinResultIteratorPtr ArrayJoinAction::execute(Block block)
 {
     if (columns.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "No arrays to join");
 
-    ColumnPtr any_array_map_ptr = block.getByName(*columns.begin()).column->convertToFullColumnIfConst();
-    const auto * any_array = getArrayJoinColumnRawPtr(any_array_map_ptr);
+    return std::make_unique<ArrayJoinResultIterator>(this, std::move(block));
+}
+
+
+ArrayJoinResultIterator::ArrayJoinResultIterator(const ArrayJoinAction * array_join_, Block block_)
+    : array_join(array_join_), block(std::move(block_)), total_rows(block.rows()), current_row(0)
+{
+    const auto & columns = array_join->columns;
+    bool is_unaligned = array_join->is_unaligned;
+    bool is_left = array_join->is_left;
+    const auto & function_length = array_join->function_length;
+    const auto & function_greatest = array_join->function_greatest;
+    const auto & function_array_resize = array_join->function_array_resize;
+    const auto & function_builder = array_join->function_builder;
+
+    any_array_map_ptr = block.getByName(*columns.begin()).column->convertToFullColumnIfConst();
+    any_array = getArrayJoinColumnRawPtr(any_array_map_ptr);
     if (!any_array)
         throw Exception(ErrorCodes::TYPE_MISMATCH, "ARRAY JOIN requires array or map argument");
 
-    /// If LEFT ARRAY JOIN, then we create columns in which empty arrays are replaced by arrays with one element - the default value.
-    std::map<String, ColumnPtr> non_empty_array_columns;
-
     if (is_unaligned)
     {
         /// Resize all array joined columns to the longest one, (at least 1 if LEFT ARRAY JOIN), padded with default values.
@@ -160,12 +173,43 @@ void ArrayJoinAction::execute(Block & block)
         if (!any_array)
             throw Exception(ErrorCodes::TYPE_MISMATCH, "ARRAY JOIN requires array or map argument");
     }
+}
+
+bool ArrayJoinResultIterator::hasNext() const
+{
+    return total_rows != 0 && current_row < total_rows;
+}
 
 
+Block ArrayJoinResultIterator::next()
+{
+    if (!hasNext())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No more elements in ArrayJoinResultIterator.");
+
+    size_t max_block_size = array_join->max_block_size;
+    const auto & offsets = any_array->getOffsets();
+
+    /// Make sure output block rows do not exceed max_block_size.
+    size_t next_row = current_row;
+    for (; next_row < total_rows; ++next_row)
+    {
+        if (offsets[next_row] - offsets[current_row - 1] >= max_block_size)
+            break;
+    }
+    if (next_row == current_row)
+        ++next_row;
+
+    Block res;
     size_t num_columns = block.columns();
+    const auto & columns = array_join->columns;
+    bool is_unaligned = array_join->is_unaligned;
+    bool is_left = array_join->is_left;
+    auto cut_any_col = any_array->cut(current_row, next_row - current_row);
+    const auto * cut_any_array = typeid_cast<const ColumnArray *>(cut_any_col.get());
     for (size_t i = 0; i < num_columns; ++i)
     {
-        ColumnWithTypeAndName & current = block.safeGetByPosition(i);
+        ColumnWithTypeAndName current = block.safeGetByPosition(i);
+        current.column = current.column->cut(current_row, next_row - current_row);
 
         if (columns.contains(current.name))
         {
@@ -174,18 +218,20 @@ void ArrayJoinAction::execute(Block & block)
                 ColumnPtr array_ptr;
                 if (typeid_cast<const DataTypeArray *>(current.type.get()))
                 {
-                    array_ptr = (is_left && !is_unaligned) ? non_empty_array_columns[current.name] : current.column;
+                    array_ptr = (is_left && !is_unaligned) ? non_empty_array_columns[current.name]->cut(current_row, next_row - current_row)
+                                                           : current.column;
                     array_ptr = array_ptr->convertToFullColumnIfConst();
                 }
                 else
                 {
                     ColumnPtr map_ptr = current.column->convertToFullColumnIfConst();
                     const ColumnMap & map = typeid_cast<const ColumnMap &>(*map_ptr);
-                    array_ptr = (is_left && !is_unaligned) ? non_empty_array_columns[current.name] : map.getNestedColumnPtr();
+                    array_ptr = (is_left && !is_unaligned) ? non_empty_array_columns[current.name]->cut(current_row, next_row - current_row)
+                                                           : map.getNestedColumnPtr();
                 }
 
                 const ColumnArray & array = typeid_cast<const ColumnArray &>(*array_ptr);
-                if (!is_unaligned && !array.hasEqualOffsets(*any_array))
+                if (!is_unaligned && !array.hasEqualOffsets(*cut_any_array))
                     throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match");
 
                 current.column = typeid_cast<const ColumnArray &>(*array_ptr).getDataPtr();
@@ -196,9 +242,14 @@ void ArrayJoinAction::execute(Block & block)
         }
         else
         {
-            current.column = current.column->replicate(any_array->getOffsets());
+            current.column = current.column->replicate(cut_any_array->getOffsets());
         }
+
+        res.insert(std::move(current));
     }
+
+    current_row = next_row;
+    return res;
 }
 
 }
diff --git a/src/Interpreters/ArrayJoinAction.h b/src/Interpreters/ArrayJoinAction.h
index 3baabd797d7..603f22ef245 100644
--- a/src/Interpreters/ArrayJoinAction.h
+++ b/src/Interpreters/ArrayJoinAction.h
@@ -20,12 +20,16 @@ const ColumnArray * getArrayJoinColumnRawPtr(const ColumnPtr & column);
 /// Otherwise do nothing.
 ColumnWithTypeAndName convertArrayJoinColumn(const ColumnWithTypeAndName & src_col);
 
+
+class ArrayJoinResultIterator;
+using ArrayJoinResultIteratorPtr = std::unique_ptr<ArrayJoinResultIterator>;
 class ArrayJoinAction
 {
 public:
     NameSet columns;
     bool is_left = false;
     bool is_unaligned = false;
+    size_t max_block_size = DEFAULT_BLOCK_SIZE;
 
     /// For unaligned [LEFT] ARRAY JOIN
     FunctionOverloadResolverPtr function_length;
@@ -37,9 +41,31 @@ public:
 
     ArrayJoinAction(const NameSet & array_joined_columns_, bool array_join_is_left, ContextPtr context);
     void prepare(ColumnsWithTypeAndName & sample) const;
-    void execute(Block & block);
+
+    ArrayJoinResultIteratorPtr execute(Block block);
 };
 
 using ArrayJoinActionPtr = std::shared_ptr<ArrayJoinAction>;
 
+class ArrayJoinResultIterator
+{
+public:
+    explicit ArrayJoinResultIterator(const ArrayJoinAction * array_join_, Block block_);
+    ~ArrayJoinResultIterator() = default;
+
+    Block next();
+    bool hasNext() const;
+
+private:
+    const ArrayJoinAction * array_join;
+    Block block;
+
+    ColumnPtr any_array_map_ptr;
+    const ColumnArray * any_array;
+    /// If LEFT ARRAY JOIN, then we create columns in which empty arrays are replaced by arrays with one element - the default value.
+    std::map<String, ColumnPtr> non_empty_array_columns;
+
+    size_t total_rows;
+    size_t current_row;
+};
 }
diff --git a/src/Processors/Transforms/ArrayJoinTransform.cpp b/src/Processors/Transforms/ArrayJoinTransform.cpp
index d9c940b8b05..1304434d74e 100644
--- a/src/Processors/Transforms/ArrayJoinTransform.cpp
+++ b/src/Processors/Transforms/ArrayJoinTransform.cpp
@@ -1,20 +1,29 @@
 #include <Processors/Transforms/ArrayJoinTransform.h>
 #include <Interpreters/ArrayJoinAction.h>
+#include "Core/Field.h"
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 Block ArrayJoinTransform::transformHeader(Block header, const ArrayJoinActionPtr & array_join)
 {
-    array_join->execute(header);
-    return header;
+    auto columns = header.getColumnsWithTypeAndName();
+    array_join->prepare(columns);
+    Block res{std::move(columns)};
+    res.setColumns(res.mutateColumns());
+    return res;
 }
 
 ArrayJoinTransform::ArrayJoinTransform(
     const Block & header_,
     ArrayJoinActionPtr array_join_,
     bool /*on_totals_*/)
-    : ISimpleTransform(header_, transformHeader(header_, array_join_), false)
+    : IInflatingTransform(header_, transformHeader(header_, array_join_))
     , array_join(std::move(array_join_))
 {
     /// TODO
@@ -22,11 +31,25 @@ ArrayJoinTransform::ArrayJoinTransform(
 //        throw Exception(ErrorCodes::LOGICAL_ERROR, "ARRAY JOIN is not supported for totals");
 }
 
-void ArrayJoinTransform::transform(Chunk & chunk)
+void ArrayJoinTransform::consume(Chunk chunk)
 {
     auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns());
-    array_join->execute(block);
-    chunk.setColumns(block.getColumns(), block.rows());
+    result_iterator = array_join->execute(block);
+}
+
+
+bool ArrayJoinTransform::canGenerate()
+{
+    return result_iterator && result_iterator->hasNext();
+}
+
+Chunk ArrayJoinTransform::generate()
+{
+    if (!canGenerate())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in ArrayJoinTransform");
+
+    auto block = result_iterator->next();
+    return Chunk(block.getColumns(), block.rows());
 }
 
 }
diff --git a/src/Processors/Transforms/ArrayJoinTransform.h b/src/Processors/Transforms/ArrayJoinTransform.h
index 0d81d5e458c..4219135982d 100644
--- a/src/Processors/Transforms/ArrayJoinTransform.h
+++ b/src/Processors/Transforms/ArrayJoinTransform.h
@@ -1,5 +1,6 @@
 #pragma once
 #include <Processors/ISimpleTransform.h>
+#include <Processors/IInflatingTransform.h>
 
 namespace DB
 {
@@ -7,8 +8,11 @@ namespace DB
 class ArrayJoinAction;
 using ArrayJoinActionPtr = std::shared_ptr<ArrayJoinAction>;
 
+class ArrayJoinResultIterator;
+using ArrayJoinResultIteratorPtr = std::unique_ptr<ArrayJoinResultIterator>;
+
 /// Execute ARRAY JOIN
-class ArrayJoinTransform : public ISimpleTransform
+class ArrayJoinTransform : public IInflatingTransform
 {
 public:
     ArrayJoinTransform(
@@ -21,10 +25,13 @@ public:
     static Block transformHeader(Block header, const ArrayJoinActionPtr & array_join);
 
 protected:
-    void transform(Chunk & chunk) override;
+    void consume(Chunk chunk) override;
+    bool canGenerate() override;
+    Chunk generate() override;
 
 private:
     ArrayJoinActionPtr array_join;
+    ArrayJoinResultIteratorPtr result_iterator;
 };
 
 }
diff --git a/tests/queries/0_stateless/02865_array_join_with_max_block_size.reference b/tests/queries/0_stateless/02865_array_join_with_max_block_size.reference
new file mode 100644
index 00000000000..9ea48287e78
--- /dev/null
+++ b/tests/queries/0_stateless/02865_array_join_with_max_block_size.reference
@@ -0,0 +1,81 @@
+-- { echoOn }
+set max_block_size = 10, enable_unaligned_array_join = true;
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
+0	100000
+1	100000
+2	100000
+3	100000
+4	100000
+5	100000
+6	100000
+7	100000
+8	100000
+9	100000
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
+0	5004001
+1	5003000
+2	5002000
+3	5001000
+4	5000000
+5	4999000
+6	4998000
+7	4997000
+8	4996000
+9	4995000
+SELECT (m+n) % 10, count(1) from (SELECT range(0, number+1) as x, range(0, number+2) as y FROM numbers(10000)) ARRAY JOIN x as m, y as n group by (m+n) % 10;
+0	10006000
+1	1000
+2	10004000
+3	1000
+4	10002000
+5	1000
+6	10000000
+7	1000
+8	9998000
+9	1000
+set max_block_size = 1000, enable_unaligned_array_join = true;
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
+0	100000
+1	100000
+2	100000
+3	100000
+4	100000
+5	100000
+6	100000
+7	100000
+8	100000
+9	100000
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
+0	5004001
+1	5003000
+2	5002000
+3	5001000
+4	5000000
+5	4999000
+6	4998000
+7	4997000
+8	4996000
+9	4995000
+set max_block_size = 100000, enable_unaligned_array_join = true;
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
+0	100000
+1	100000
+2	100000
+3	100000
+4	100000
+5	100000
+6	100000
+7	100000
+8	100000
+9	100000
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
+0	5004001
+1	5003000
+2	5002000
+3	5001000
+4	5000000
+5	4999000
+6	4998000
+7	4997000
+8	4996000
+9	4995000
diff --git a/tests/queries/0_stateless/02865_array_join_with_max_block_size.sql b/tests/queries/0_stateless/02865_array_join_with_max_block_size.sql
new file mode 100644
index 00000000000..fe393d6ccd6
--- /dev/null
+++ b/tests/queries/0_stateless/02865_array_join_with_max_block_size.sql
@@ -0,0 +1,14 @@
+-- { echoOn }
+set max_block_size = 10, enable_unaligned_array_join = true;
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
+SELECT (m+n) % 10, count(1) from (SELECT range(0, number+1) as x, range(0, number+2) as y FROM numbers(10000)) ARRAY JOIN x as m, y as n group by (m+n) % 10;
+
+set max_block_size = 1000, enable_unaligned_array_join = true;
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
+
+set max_block_size = 100000, enable_unaligned_array_join = true;
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
+-- { echoOff }

From 68cdfbcc6c95ae601c9788eb75104e8fee896fbf Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 11 Sep 2023 16:03:03 +0000
Subject: [PATCH 075/243] Refactorings and cleanups (semantics did not change)

---
 src/Functions/FunctionHelpers.cpp             |   2 +-
 src/Functions/FunctionsStringArray.cpp        |  18 +-
 src/Functions/FunctionsStringArray.h          | 246 ++++++------------
 src/Functions/URL/URLHierarchy.cpp            |  25 +-
 src/Functions/URL/URLPathHierarchy.cpp        |  25 +-
 .../URL/extractURLParameterNames.cpp          |  24 +-
 src/Functions/URL/extractURLParameters.cpp    |  23 +-
 7 files changed, 121 insertions(+), 242 deletions(-)

diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp
index 7a9817ad344..6c3e438dea7 100644
--- a/src/Functions/FunctionHelpers.cpp
+++ b/src/Functions/FunctionHelpers.cpp
@@ -105,7 +105,7 @@ void validateArgumentType(const IFunction & func, const DataTypes & arguments,
 
     const auto & argument = arguments[argument_index];
     if (!validator_func(*argument))
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {} expected {}",
+        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}",
                         argument->getName(), std::to_string(argument_index), func.getName(), expected_type_description);
 }
 
diff --git a/src/Functions/FunctionsStringArray.cpp b/src/Functions/FunctionsStringArray.cpp
index e7519068f44..4d118481bb2 100644
--- a/src/Functions/FunctionsStringArray.cpp
+++ b/src/Functions/FunctionsStringArray.cpp
@@ -9,19 +9,17 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
-DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const DataTypes & arguments) const
+DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const
 {
-    if (arguments.size() != 1 && arguments.size() != 2)
-        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-            "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.",
-            getName(), arguments.size());
+    FunctionArgumentDescriptors mandatory_args{
+        {"arr", &isArray<IDataType>, nullptr, "Array"},
+    };
 
-    const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
-    if (!array_type)
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an array.", getName());
+    FunctionArgumentDescriptors optional_args{
+        {"separator", &isString<IDataType>, isColumnConst, "const String"},
+    };
 
-    if (arguments.size() == 2 && !isString(arguments[1]))
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be constant string.", getName());
+    validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
 
     return std::make_shared<DataTypeString>();
 }
diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h
index 8d41789b556..ce78090dc6b 100644
--- a/src/Functions/FunctionsStringArray.h
+++ b/src/Functions/FunctionsStringArray.h
@@ -66,7 +66,6 @@ private:
     Pos end;
 
 public:
-    /// Get the name of the function.
     static constexpr auto name = "alphaTokens";
     static String getName() { return name; }
 
@@ -74,18 +73,22 @@ public:
 
     static size_t getNumberOfArguments() { return 0; }
 
-    /// Check the type of the function's arguments.
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        if (arguments.empty() || arguments.size() > 2)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", getName());
+        FunctionArgumentDescriptors mandatory_args{
+            {"s", &isString<IDataType>, nullptr, "String"},
+        };
 
-        if (!isString(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
-                "Must be String.", arguments[0]->getName(), getName());
+        FunctionArgumentDescriptors optional_args{
+            {"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
+        };
+
+        validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
     }
 
-    /// Initialize by the function arguments.
+    static constexpr auto strings_argument_position = 0uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional(1);
+
     void init(const ColumnsWithTypeAndName & /*arguments*/) {}
 
     /// Called for each next string.
@@ -95,18 +98,6 @@ public:
         end = end_;
     }
 
-    /// Returns the position of the argument, that is the column of strings
-    static size_t getStringsArgumentPosition()
-    {
-        return 0;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return 1;
-    }
-
     /// Get the next token, if any, or return false.
     bool get(Pos & token_begin, Pos & token_end)
     {
@@ -142,18 +133,14 @@ public:
     static bool isVariadic() { return true; }
     static size_t getNumberOfArguments() { return 0; }
 
-    /// Check the type of the function's arguments.
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        if (arguments.empty() || arguments.size() > 2)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", getName());
-
-        if (!isString(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
-                "Must be String.", arguments[0]->getName(), getName());
+        SplitByAlphaImpl::checkArguments(func, arguments);
     }
 
-    /// Initialize by the function arguments.
+    static constexpr auto strings_argument_position = 0uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional(1);
+
     void init(const ColumnsWithTypeAndName & /*arguments*/) {}
 
     /// Called for each next string.
@@ -163,18 +150,6 @@ public:
         end = end_;
     }
 
-    /// Returns the position of the argument, that is the column of strings
-    static size_t getStringsArgumentPosition()
-    {
-        return 0;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return 1;
-    }
-
     /// Get the next token, if any, or return false.
     bool get(Pos & token_begin, Pos & token_end)
     {
@@ -203,25 +178,20 @@ private:
     Pos end;
 
 public:
-    /// Get the name of the function.
     static constexpr auto name = "splitByWhitespace";
     static String getName() { return name; }
 
     static bool isVariadic() { return true; }
     static size_t getNumberOfArguments() { return 0; }
 
-    /// Check the type of the function's arguments.
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        if (arguments.empty() || arguments.size() > 2)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", getName());
-
-        if (!isString(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
-                "Must be String.", arguments[0]->getName(), getName());
+        return SplitByNonAlphaImpl::checkArguments(func, arguments);
     }
 
-    /// Initialize by the function arguments.
+    static constexpr auto strings_argument_position = 0uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional(1);
+
     void init(const ColumnsWithTypeAndName & /*arguments*/) {}
 
     /// Called for each next string.
@@ -231,18 +201,6 @@ public:
         end = end_;
     }
 
-    /// Returns the position of the argument, that is the column of strings
-    static size_t getStringsArgumentPosition()
-    {
-        return 0;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return 1;
-    }
-
     /// Get the next token, if any, or return false.
     bool get(Pos & token_begin, Pos & token_end)
     {
@@ -269,7 +227,7 @@ class SplitByCharImpl
 private:
     Pos pos;
     Pos end;
-    char sep;
+    char separator;
 
 public:
     static constexpr auto name = "splitByChar";
@@ -277,23 +235,23 @@ public:
     static bool isVariadic() { return true; }
     static size_t getNumberOfArguments() { return 0; }
 
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        if (arguments.size() < 2 || arguments.size() > 3)
-            throw Exception(
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
-                name, arguments.size());
+        FunctionArgumentDescriptors mandatory_args{
+            {"separator", &isString<IDataType>, isColumnConst, "const String"},
+            {"s", &isString<IDataType>, nullptr, "String"}
+        };
 
-        if (!isString(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
-                "Must be String.", arguments[0]->getName(), getName());
+        FunctionArgumentDescriptors optional_args{
+            {"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
+        };
 
-        if (!isString(arguments[1]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}. "
-                "Must be String.", arguments[1]->getName(), getName());
+        validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
     }
 
+    static constexpr auto strings_argument_position = 1uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional(2);
+
     void init(const ColumnsWithTypeAndName & arguments)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
@@ -307,19 +265,7 @@ public:
         if (sep_str.size() != 1)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal separator for function {}. Must be exactly one byte.", getName());
 
-        sep = sep_str[0];
-    }
-
-    /// Returns the position of the argument, that is the column of strings
-    static size_t getStringsArgumentPosition()
-    {
-        return 1;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return 2;
+        separator = sep_str[0];
     }
 
     void set(Pos pos_, Pos end_)
@@ -334,7 +280,7 @@ public:
             return false;
 
         token_begin = pos;
-        pos = reinterpret_cast<Pos>(memchr(pos, sep, end - pos));
+        pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
 
         if (pos)
         {
@@ -355,7 +301,7 @@ private:
     Pos pos;
     Pos end;
 
-    String sep;
+    String separator;
 
 public:
     static constexpr auto name = "splitByString";
@@ -363,11 +309,14 @@ public:
     static bool isVariadic() { return true; }
     static size_t getNumberOfArguments() { return 0; }
 
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        SplitByCharImpl::checkArguments(arguments);
+        SplitByCharImpl::checkArguments(func, arguments);
     }
 
+    static constexpr auto strings_argument_position = 1uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional(2);
+
     void init(const ColumnsWithTypeAndName & arguments)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
@@ -376,19 +325,7 @@ public:
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
                 "Must be constant string.", arguments[0].column->getName(), getName());
 
-        sep = col->getValue<String>();
-    }
-
-    /// Returns the position of the argument that is the column of strings
-    static size_t getStringsArgumentPosition()
-    {
-        return 1;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return 2;
+        separator = col->getValue<String>();
     }
 
     /// Called for each next string.
@@ -401,7 +338,7 @@ public:
     /// Get the next token, if any, or return false.
     bool get(Pos & token_begin, Pos & token_end)
     {
-        if (sep.empty())
+        if (separator.empty())
         {
             if (pos == end)
                 return false;
@@ -417,12 +354,12 @@ public:
 
             token_begin = pos;
 
-            pos = reinterpret_cast<Pos>(memmem(pos, end - pos, sep.data(), sep.size()));
+            pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
 
             if (pos)
             {
                 token_end = pos;
-                pos += sep.size();
+                pos += separator.size();
             }
             else
                 token_end = end;
@@ -448,13 +385,14 @@ public:
     static bool isVariadic() { return true; }
     static size_t getNumberOfArguments() { return 0; }
 
-    /// Check the type of function arguments.
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        SplitByStringImpl::checkArguments(arguments);
+        SplitByStringImpl::checkArguments(func, arguments);
     }
 
-    /// Initialize by the function arguments.
+    static constexpr auto strings_argument_position = 1uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional(2);
+
     void init(const ColumnsWithTypeAndName & arguments)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
@@ -467,18 +405,6 @@ public:
             re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
     }
 
-    /// Returns the position of the argument that is the column of strings
-    static size_t getStringsArgumentPosition()
-    {
-        return 1;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return 2;
-    }
-
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
     {
@@ -536,13 +462,19 @@ public:
     static bool isVariadic() { return false; }
     static size_t getNumberOfArguments() { return 2; }
 
-    /// Check the type of function arguments.
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        SplitByStringImpl::checkArguments(arguments);
+        FunctionArgumentDescriptors mandatory_args{
+            {"haystack", &isString<IDataType>, nullptr, "String"},
+            {"pattern", &isString<IDataType>, isColumnConst, "const String"}
+        };
+
+        validateFunctionArgumentTypes(func, arguments, mandatory_args);
     }
 
-    /// Initialize by the function arguments.
+    static constexpr auto strings_argument_position = 0uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional<size_t>();
+
     void init(const ColumnsWithTypeAndName & arguments)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
@@ -557,18 +489,6 @@ public:
         matches.resize(capture + 1);
     }
 
-    /// Returns the position of the argument that is the column of strings
-    static size_t getStringsArgumentPosition()
-    {
-        return 0;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return std::nullopt;
-    }
-
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
     {
@@ -611,10 +531,7 @@ public:
     static constexpr auto name = Generator::name;
     static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionTokens>(); }
 
-    String getName() const override
-    {
-        return name;
-    }
+    String getName() const override { return name; }
 
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
 
@@ -622,18 +539,9 @@ public:
 
     size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        Generator::checkArguments(arguments);
-
-        const auto max_substrings_pos = Generator::getMaxSubstringsArgumentPosition();
-        if (max_substrings_pos && *max_substrings_pos < arguments.size() && !isNativeInteger(arguments[*max_substrings_pos]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "{}-th argument for function '{}' must be integer, got '{}' instead",
-                *max_substrings_pos + 1,
-                getName(),
-                arguments[*max_substrings_pos]->getName());
+        Generator::checkArguments(*this, arguments);
 
         return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
     }
@@ -642,22 +550,24 @@ public:
     {
         Generator generator;
         generator.init(arguments);
-        const auto & array_argument = arguments[generator.getStringsArgumentPosition()];
+
+        const auto & array_argument = arguments[generator.strings_argument_position];
 
         /// Whether we need to limit max tokens returned by Generator::get
         /// If max_substrings is std::nullopt, no limit is applied.
         auto max_substrings = getMaxSubstrings(arguments);
 
         const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
-        const ColumnConst * col_const_str =
-                checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
+        const ColumnConst * col_str_const = checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
 
         auto col_res = ColumnArray::create(ColumnString::create());
+
         ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
-        ColumnArray::Offsets & res_offsets = col_res->getOffsets();
         ColumnString::Chars & res_strings_chars = res_strings.getChars();
         ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets();
 
+        ColumnArray::Offsets & res_offsets = col_res->getOffsets();
+
         if (col_str)
         {
             const ColumnString::Chars & src_chars = col_str->getChars();
@@ -701,9 +611,9 @@ public:
 
             return col_res;
         }
-        else if (col_const_str)
+        else if (col_str_const)
         {
-            String src = col_const_str->getValue<String>();
+            String src = col_str_const->getValue<String>();
             Array dst;
 
             generator.set(src.data(), src.data() + src.size());
@@ -713,7 +623,7 @@ public:
             while (generator.get(token_begin, token_end) && !(max_substrings && dst.size() >= *max_substrings))
                 dst.push_back(String(token_begin, token_end - token_begin));
 
-            return result_type->createColumnConst(col_const_str->size(), dst);
+            return result_type->createColumnConst(col_str_const->size(), dst);
         }
         else
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}",
@@ -734,7 +644,7 @@ private:
 
     std::optional<size_t> getMaxSubstrings(const ColumnsWithTypeAndName & arguments) const
     {
-        const auto pos = Generator::getMaxSubstringsArgumentPosition();
+        const auto pos = Generator::max_substrings_argument_position;
         if (!pos)
             return std::nullopt;
 
@@ -758,7 +668,7 @@ private:
         if (max_substrings && *max_substrings <= 0)
             return std::nullopt;
 
-        return *max_substrings;
+        return max_substrings;
     }
 };
 
@@ -803,7 +713,7 @@ private:
             /// Loop through the rows within the array. /// NOTE You can do everything in one copy, if the separator has a size of 1.
             for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
             {
-                if (unlikely(null_map && null_map[current_src_array_offset]))
+                if (null_map && null_map[current_src_array_offset]) [[unlikely]]
                     continue;
 
                 if (!first_non_null)
@@ -881,7 +791,7 @@ public:
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
     {
diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp
index d3a45efb498..69819d2214f 100644
--- a/src/Functions/URL/URLHierarchy.cpp
+++ b/src/Functions/URL/URLHierarchy.cpp
@@ -23,27 +23,20 @@ public:
     static bool isVariadic() { return false; }
     static size_t getNumberOfArguments() { return 1; }
 
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        if (!isString(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
-            "Must be String.", arguments[0]->getName(), getName());
+        FunctionArgumentDescriptors mandatory_args{
+            {"URL", &isString<IDataType>, nullptr, "String"},
+        };
+
+        validateFunctionArgumentTypes(func, arguments, mandatory_args);
     }
 
+    static constexpr auto strings_argument_position = 0uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional<size_t>();
+
     void init(const ColumnsWithTypeAndName & /*arguments*/) {}
 
-    /// Returns the position of the argument that is the column of rows
-    static size_t getStringsArgumentPosition()
-    {
-        return 0;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return std::nullopt;
-    }
-
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
     {
diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp
index 3775748f6ed..2c4f4e9be5c 100644
--- a/src/Functions/URL/URLPathHierarchy.cpp
+++ b/src/Functions/URL/URLPathHierarchy.cpp
@@ -22,27 +22,20 @@ public:
     static bool isVariadic() { return false; }
     static size_t getNumberOfArguments() { return 1; }
 
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        if (!isString(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
-            "Must be String.", arguments[0]->getName(), getName());
+        FunctionArgumentDescriptors mandatory_args{
+            {"URL", &isString<IDataType>, nullptr, "String"},
+        };
+
+        validateFunctionArgumentTypes(func, arguments, mandatory_args);
     }
 
+    static constexpr auto strings_argument_position = 0uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional<size_t>();
+
     void init(const ColumnsWithTypeAndName & /*arguments*/) {}
 
-    /// Returns the position of the argument that is the column of rows
-    static size_t getStringsArgumentPosition()
-    {
-        return 0;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return std::nullopt;
-    }
-
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
     {
diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp
index 4ca2d79d22d..0e9153acf7f 100644
--- a/src/Functions/URL/extractURLParameterNames.cpp
+++ b/src/Functions/URL/extractURLParameterNames.cpp
@@ -22,25 +22,17 @@ public:
     static bool isVariadic() { return false; }
     static size_t getNumberOfArguments() { return 1; }
 
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        if (!isString(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
-            "Must be String.", arguments[0]->getName(), getName());
-    }
-
-    /// Returns the position of the argument that is the column of rows
-    static size_t getStringsArgumentPosition()
-    {
-        return 0;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return std::nullopt;
+        FunctionArgumentDescriptors mandatory_args{
+            {"URL", &isString<IDataType>, nullptr, "String"},
+        };
+
+        validateFunctionArgumentTypes(func, arguments, mandatory_args);
     }
 
+    static constexpr auto strings_argument_position = 0uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional<size_t>();
 
     void init(const ColumnsWithTypeAndName & /*arguments*/) {}
 
diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp
index a44157e1b35..273edde8d18 100644
--- a/src/Functions/URL/extractURLParameters.cpp
+++ b/src/Functions/URL/extractURLParameters.cpp
@@ -22,26 +22,19 @@ public:
     static bool isVariadic() { return false; }
     static size_t getNumberOfArguments() { return 1; }
 
-    static void checkArguments(const DataTypes & arguments)
+    static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
     {
-        if (!isString(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
-            "Must be String.", arguments[0]->getName(), getName());
+        FunctionArgumentDescriptors mandatory_args{
+            {"URL", &isString<IDataType>, nullptr, "String"},
+        };
+
+        validateFunctionArgumentTypes(func, arguments, mandatory_args);
     }
 
     void init(const ColumnsWithTypeAndName & /*arguments*/) {}
 
-    /// Returns the position of the argument that is the column of rows
-    static size_t getStringsArgumentPosition()
-    {
-        return 0;
-    }
-
-    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
-    static std::optional<size_t> getMaxSubstringsArgumentPosition()
-    {
-        return std::nullopt;
-    }
+    static constexpr auto strings_argument_position = 0uz;
+    static constexpr auto max_substrings_argument_position = std::make_optional<size_t>();
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)

From b5b2cc511b7171332a368895e32589d573b46185 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 11 Sep 2023 18:48:40 +0000
Subject: [PATCH 076/243] Pythonic vs Spark splitting

---
 docs/en/operations/settings/settings.md       |  11 +
 .../functions/splitting-merging-functions.md  |  12 +
 src/Core/Settings.h                           |   1 +
 src/Functions/FunctionsStringArray.cpp        |  34 +++
 src/Functions/FunctionsStringArray.h          | 269 +++++++++++++-----
 src/Functions/URL/URLHierarchy.cpp            |   3 +-
 src/Functions/URL/URLPathHierarchy.cpp        |   3 +-
 .../URL/extractURLParameterNames.cpp          |   3 +-
 src/Functions/URL/extractURLParameters.cpp    |   3 +-
 .../02876_splitby_spark_vs_python.reference   |  22 ++
 .../02876_splitby_spark_vs_python.sql         |  27 ++
 11 files changed, 314 insertions(+), 74 deletions(-)
 create mode 100644 tests/queries/0_stateless/02876_splitby_spark_vs_python.reference
 create mode 100644 tests/queries/0_stateless/02876_splitby_spark_vs_python.sql

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index e9e5920fa59..227483758d5 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4067,6 +4067,17 @@ Result:
 └─────┴─────┴───────┘
 ```
 
+## split_tokens_like_python {#split-tokens-like-python}
+
+Controls if functions [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with `max_substring` argument > 0 include the remaining string (if any) in the result array (Python semantics) or not (Spark semantics).
+
+Possible values:
+
+- 0 - Don't include the remaining string (Spark semantics).
+- 1 - Include the remaining string (Python semantics).
+
+Default value: `0`.
+
 ## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
 
 Enables or disables returning results of type:
diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index c88643ef7cf..7e788a8e45b 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -38,6 +38,8 @@ The behavior of parameter `max_substrings` changed starting with ClickHouse v22.
 For example,
 - in v22.10: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b','c=d']`
 - in v22.11: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b']`
+
+The previous behavior can be restored by setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) = 1.
 :::
 
 **Example**
@@ -80,6 +82,8 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere
 - There are multiple consecutive non-empty separators;
 - The original string `s` is empty while the separator is not empty.
 
+Setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) (default: 0) controls whether with `max_substrings` > 0, the remaining string (if any) is included in the result array or not.
+
 **Example**
 
 ``` sql
@@ -133,6 +137,8 @@ Returns an array of selected substrings. Empty substrings may be selected when:
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
+Setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) (default: 0) controls whether with `max_substrings` > 0, the remaining string (if any) is included in the result array or not.
+
 **Example**
 
 ``` sql
@@ -182,6 +188,8 @@ Returns an array of selected substrings.
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
+Setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) (default: 0) controls whether with `max_substrings` > 0, the remaining string (if any) is included in the result array or not.
+
 **Example**
 
 ``` sql
@@ -219,6 +227,8 @@ Returns an array of selected substrings.
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
+Setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) (default: 0) controls whether with `max_substrings` > 0, the remaining string (if any) is included in the result array or not.
+
 **Example**
 
 ``` sql
@@ -279,6 +289,8 @@ Returns an array of selected substrings.
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
+Setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) (default: 0) controls whether with `max_substrings` > 0, the remaining string (if any) is included in the result array or not.
+
 **Example**
 
 ``` sql
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index e8430e96115..14e99918983 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -502,6 +502,7 @@ class IColumn;
     M(Bool, reject_expensive_hyperscan_regexps, true, "Reject patterns which will likely be expensive to evaluate with hyperscan (due to NFA state explosion)", 0) \
     M(Bool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \
     M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
+    M(Bool, split_tokens_like_python, false, "If true, then functions splitBy*() with given max_substring argument include remaining string in the result (Python semantics) or not (Spark semantics).", 0) \
     \
     M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
     M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' produces a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
diff --git a/src/Functions/FunctionsStringArray.cpp b/src/Functions/FunctionsStringArray.cpp
index 4d118481bb2..51b50d793e9 100644
--- a/src/Functions/FunctionsStringArray.cpp
+++ b/src/Functions/FunctionsStringArray.cpp
@@ -9,6 +9,40 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+template <typename DataType>
+std::optional<Int64> extractMaxSplitsImpl(const ColumnWithTypeAndName & argument)
+{
+    const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
+    if (!col)
+        return std::nullopt;
+
+    auto value = col->template getValue<DataType>();
+    return static_cast<Int64>(value);
+}
+
+std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position)
+{
+    if (max_substrings_argument_position >= arguments.size())
+        return std::nullopt;
+
+    std::optional<Int64> max_splits;
+    if (!((max_splits = extractMaxSplitsImpl<UInt8>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int8>(arguments[max_substrings_argument_position]))
+          || (max_splits = extractMaxSplitsImpl<UInt16>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int16>(arguments[max_substrings_argument_position]))
+          || (max_splits = extractMaxSplitsImpl<UInt32>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int32>(arguments[max_substrings_argument_position]))
+          || (max_splits = extractMaxSplitsImpl<UInt64>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int64>(arguments[max_substrings_argument_position]))))
+        throw Exception(
+            ErrorCodes::ILLEGAL_COLUMN,
+            "Illegal column {}, which is {}-th argument",// of function {}",
+            arguments[max_substrings_argument_position].column->getName(),
+            max_substrings_argument_position + 1);//,
+            /// getName());
+
+    if (max_splits && *max_splits <= 0)
+        return std::nullopt;
+
+    return max_splits;
+}
+
 DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const
 {
     FunctionArgumentDescriptors mandatory_args{
diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h
index ce78090dc6b..92eb015e6e3 100644
--- a/src/Functions/FunctionsStringArray.h
+++ b/src/Functions/FunctionsStringArray.h
@@ -56,6 +56,13 @@ namespace ErrorCodes
 
 using Pos = const char *;
 
+std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position);
+
+enum class SplitTokenMode
+{
+    LikeSpark,
+    LikePython
+};
 
 /// Substring generators. All of them have a common interface.
 
@@ -64,6 +71,9 @@ class SplitByAlphaImpl
 private:
     Pos pos;
     Pos end;
+    std::optional<size_t> max_splits;
+    size_t splits;
+    SplitTokenMode split_token_mode;
 
 public:
     static constexpr auto name = "alphaTokens";
@@ -87,15 +97,19 @@ public:
     }
 
     static constexpr auto strings_argument_position = 0uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional(1);
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/) {}
+    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
+    {
+        split_token_mode = split_token_mode_;
+        max_splits = extractMaxSplits(arguments, 1);
+    }
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
     {
         pos = pos_;
         end = end_;
+        splits = 0;
     }
 
     /// Get the next token, if any, or return false.
@@ -110,10 +124,26 @@ public:
 
         token_begin = pos;
 
+        if (max_splits && splits >= max_splits)
+        {
+            switch (split_token_mode)
+            {
+            case SplitTokenMode::LikeSpark:
+                return false;
+            case SplitTokenMode::LikePython:
+            {
+                token_end = end;
+                pos = end;
+                return true;
+            }
+            }
+        }
+
         while (pos < end && isAlphaASCII(*pos))
             ++pos;
 
         token_end = pos;
+        ++splits;
 
         return true;
     }
@@ -124,6 +154,9 @@ class SplitByNonAlphaImpl
 private:
     Pos pos;
     Pos end;
+    std::optional<size_t> max_splits;
+    size_t splits;
+    SplitTokenMode split_token_mode;
 
 public:
     /// Get the name of the function.
@@ -139,15 +172,19 @@ public:
     }
 
     static constexpr auto strings_argument_position = 0uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional(1);
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/) {}
+    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
+    {
+        split_token_mode = split_token_mode_;
+        max_splits = extractMaxSplits(arguments, 1);
+    }
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
     {
         pos = pos_;
         end = end_;
+        splits = 0;
     }
 
     /// Get the next token, if any, or return false.
@@ -162,10 +199,25 @@ public:
 
         token_begin = pos;
 
+        if (max_splits && splits >= max_splits)
+        {
+            switch (split_token_mode)
+            {
+            case SplitTokenMode::LikeSpark:
+                return false;
+            case SplitTokenMode::LikePython:
+            {
+                token_end = end;
+                pos = end;
+                return true;
+            }
+            }
+        }
         while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
             ++pos;
 
         token_end = pos;
+        splits++;
 
         return true;
     }
@@ -176,6 +228,9 @@ class SplitByWhitespaceImpl
 private:
     Pos pos;
     Pos end;
+    std::optional<size_t> max_splits;
+    size_t splits;
+    SplitTokenMode split_token_mode;
 
 public:
     static constexpr auto name = "splitByWhitespace";
@@ -190,15 +245,19 @@ public:
     }
 
     static constexpr auto strings_argument_position = 0uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional(1);
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/) {}
+    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
+    {
+        split_token_mode = split_token_mode_;
+        max_splits = extractMaxSplits(arguments, 1);
+    }
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
     {
         pos = pos_;
         end = end_;
+        splits = 0;
     }
 
     /// Get the next token, if any, or return false.
@@ -213,10 +272,26 @@ public:
 
         token_begin = pos;
 
+        if (max_splits && splits >= max_splits)
+        {
+            switch (split_token_mode)
+            {
+            case SplitTokenMode::LikeSpark:
+                return false;
+            case SplitTokenMode::LikePython:
+            {
+                token_end = end;
+                pos = end;
+                return true;
+            }
+            }
+        }
+
         while (pos < end && !isWhitespaceASCII(*pos))
             ++pos;
 
         token_end = pos;
+        splits++;
 
         return true;
     }
@@ -228,6 +303,9 @@ private:
     Pos pos;
     Pos end;
     char separator;
+    std::optional<size_t> max_splits;
+    size_t splits;
+    SplitTokenMode split_token_mode;
 
 public:
     static constexpr auto name = "splitByChar";
@@ -250,9 +328,8 @@ public:
     }
 
     static constexpr auto strings_argument_position = 1uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional(2);
 
-    void init(const ColumnsWithTypeAndName & arguments)
+    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
 
@@ -266,12 +343,16 @@ public:
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal separator for function {}. Must be exactly one byte.", getName());
 
         separator = sep_str[0];
+
+        split_token_mode = split_token_mode_;
+        max_splits = extractMaxSplits(arguments, 2);
     }
 
     void set(Pos pos_, Pos end_)
     {
         pos = pos_;
         end = end_;
+        splits = 0;
     }
 
     bool get(Pos & token_begin, Pos & token_end)
@@ -280,12 +361,28 @@ public:
             return false;
 
         token_begin = pos;
-        pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
 
+        if (max_splits && splits >= max_splits)
+        {
+            switch (split_token_mode)
+            {
+            case SplitTokenMode::LikeSpark:
+                return false;
+            case SplitTokenMode::LikePython:
+            {
+                token_end = end;
+                pos = nullptr;
+                return true;
+            }
+            }
+        }
+
+        pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
         if (pos)
         {
             token_end = pos;
             ++pos;
+            ++splits;
         }
         else
             token_end = end;
@@ -300,8 +397,10 @@ class SplitByStringImpl
 private:
     Pos pos;
     Pos end;
-
     String separator;
+    std::optional<size_t> max_splits;
+    size_t splits;
+    SplitTokenMode split_token_mode;
 
 public:
     static constexpr auto name = "splitByString";
@@ -315,9 +414,8 @@ public:
     }
 
     static constexpr auto strings_argument_position = 1uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional(2);
 
-    void init(const ColumnsWithTypeAndName & arguments)
+    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
 
@@ -326,6 +424,9 @@ public:
                 "Must be constant string.", arguments[0].column->getName(), getName());
 
         separator = col->getValue<String>();
+
+        split_token_mode = split_token_mode_;
+        max_splits = extractMaxSplits(arguments, 2);
     }
 
     /// Called for each next string.
@@ -333,6 +434,7 @@ public:
     {
         pos = pos_;
         end = end_;
+        splits = 0;
     }
 
     /// Get the next token, if any, or return false.
@@ -344,8 +446,25 @@ public:
                 return false;
 
             token_begin = pos;
+
+            if (max_splits && splits >= max_splits)
+            {
+                switch (split_token_mode)
+                {
+                case SplitTokenMode::LikeSpark:
+                    return false;
+                case SplitTokenMode::LikePython:
+                {
+                    token_end = end;
+                    pos = end;
+                    return true;
+                }
+                }
+            }
+
             pos += 1;
             token_end = pos;
+            ++splits;
         }
         else
         {
@@ -354,8 +473,22 @@ public:
 
             token_begin = pos;
 
-            pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
+            if (max_splits && splits >= max_splits)
+            {
+                switch (split_token_mode)
+                {
+                case SplitTokenMode::LikeSpark:
+                    return false;
+                case SplitTokenMode::LikePython:
+                {
+                    token_end = end;
+                    pos = nullptr;
+                    return true;
+                }
+                }
+            }
 
+            pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
             if (pos)
             {
                 token_end = pos;
@@ -363,6 +496,7 @@ public:
             }
             else
                 token_end = end;
+            ++splits;
         }
 
         return true;
@@ -378,6 +512,10 @@ private:
     Pos pos;
     Pos end;
 
+    std::optional<size_t> max_splits;
+    size_t splits;
+    SplitTokenMode split_token_mode;
+
 public:
     static constexpr auto name = "splitByRegexp";
     static String getName() { return name; }
@@ -391,9 +529,8 @@ public:
     }
 
     static constexpr auto strings_argument_position = 1uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional(2);
 
-    void init(const ColumnsWithTypeAndName & arguments)
+    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
 
@@ -403,6 +540,9 @@ public:
 
         if (!col->getValue<String>().empty())
             re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
+
+        split_token_mode = split_token_mode_;
+        max_splits = extractMaxSplits(arguments, 2);
     }
 
     /// Called for each next string.
@@ -410,6 +550,7 @@ public:
     {
         pos = pos_;
         end = end_;
+        splits = 0;
     }
 
     /// Get the next token, if any, or return false.
@@ -421,8 +562,25 @@ public:
                 return false;
 
             token_begin = pos;
+
+            if (max_splits && splits >= max_splits)
+            {
+                switch (split_token_mode)
+                {
+                case SplitTokenMode::LikeSpark:
+                    return false;
+                case SplitTokenMode::LikePython:
+                {
+                    token_end = end;
+                    pos = end;
+                    return true;
+                }
+                }
+            }
+
             pos += 1;
             token_end = pos;
+            ++splits;
         }
         else
         {
@@ -431,6 +589,21 @@ public:
 
             token_begin = pos;
 
+            if (max_splits && splits >= max_splits)
+            {
+                switch (split_token_mode)
+                {
+                case SplitTokenMode::LikeSpark:
+                    return false;
+                case SplitTokenMode::LikePython:
+                {
+                    token_end = end;
+                    pos = nullptr;
+                    return true;
+                }
+                }
+            }
+
             if (!re->match(pos, end - pos, matches) || !matches[0].length)
             {
                 token_end = end;
@@ -441,6 +614,7 @@ public:
                 token_end = pos + matches[0].offset;
                 pos = token_end + matches[0].length;
             }
+            ++splits;
         }
 
         return true;
@@ -473,9 +647,8 @@ public:
     }
 
     static constexpr auto strings_argument_position = 0uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional<size_t>();
 
-    void init(const ColumnsWithTypeAndName & arguments)
+    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode /*split_token_mode*/)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
 
@@ -527,9 +700,18 @@ public:
 template <typename Generator>
 class FunctionTokens : public IFunction
 {
+private:
+    SplitTokenMode split_token_mode;
+
 public:
     static constexpr auto name = Generator::name;
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionTokens>(); }
+    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
+
+    explicit FunctionTokens<Generator>(ContextPtr context)
+    {
+        const Settings & settings = context->getSettingsRef();
+        split_token_mode = settings.split_tokens_like_python ? SplitTokenMode::LikePython : SplitTokenMode::LikeSpark;
+    }
 
     String getName() const override { return name; }
 
@@ -549,14 +731,10 @@ public:
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
     {
         Generator generator;
-        generator.init(arguments);
+        generator.init(arguments, split_token_mode);
 
         const auto & array_argument = arguments[generator.strings_argument_position];
 
-        /// Whether we need to limit max tokens returned by Generator::get
-        /// If max_substrings is std::nullopt, no limit is applied.
-        auto max_substrings = getMaxSubstrings(arguments);
-
         const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
         const ColumnConst * col_str_const = checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
 
@@ -592,7 +770,7 @@ public:
 
                 generator.set(pos, end);
                 size_t j = 0;
-                while (generator.get(token_begin, token_end) && !(max_substrings && j >= *max_substrings))
+                while (generator.get(token_begin, token_end))
                 {
                     size_t token_size = token_end - token_begin;
 
@@ -620,7 +798,7 @@ public:
             Pos token_begin = nullptr;
             Pos token_end = nullptr;
 
-            while (generator.get(token_begin, token_end) && !(max_substrings && dst.size() >= *max_substrings))
+            while (generator.get(token_begin, token_end))
                 dst.push_back(String(token_begin, token_end - token_begin));
 
             return result_type->createColumnConst(col_str_const->size(), dst);
@@ -629,47 +807,6 @@ public:
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}",
                     array_argument.column->getName(), array_argument.column->getName(), getName());
     }
-
-private:
-    template <typename DataType>
-    std::optional<Int64> getMaxSubstringsImpl(const ColumnWithTypeAndName & argument) const
-    {
-        const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
-        if (!col)
-            return {};
-
-        auto value = col->template getValue<DataType>();
-        return static_cast<Int64>(value);
-    }
-
-    std::optional<size_t> getMaxSubstrings(const ColumnsWithTypeAndName & arguments) const
-    {
-        const auto pos = Generator::max_substrings_argument_position;
-        if (!pos)
-            return std::nullopt;
-
-        if (*pos >= arguments.size())
-            return std::nullopt;
-
-        std::optional<Int64> max_substrings;
-        if (!((max_substrings = getMaxSubstringsImpl<UInt8>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int8>(arguments[*pos]))
-              || (max_substrings = getMaxSubstringsImpl<UInt16>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int16>(arguments[*pos]))
-              || (max_substrings = getMaxSubstringsImpl<UInt32>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int32>(arguments[*pos]))
-              || (max_substrings = getMaxSubstringsImpl<UInt64>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int64>(arguments[*pos]))))
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column {}, which is {}-th argument of function {}",
-                arguments[*pos].column->getName(),
-                *pos + 1,
-                getName());
-
-        /// If max_substrings is negative or zero, tokenize will be applied as many times as possible, which is equivalent to
-        /// no max_substrings argument in function
-        if (max_substrings && *max_substrings <= 0)
-            return std::nullopt;
-
-        return max_substrings;
-    }
 };
 
 
diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp
index 69819d2214f..bce876f735f 100644
--- a/src/Functions/URL/URLHierarchy.cpp
+++ b/src/Functions/URL/URLHierarchy.cpp
@@ -33,9 +33,8 @@ public:
     }
 
     static constexpr auto strings_argument_position = 0uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional<size_t>();
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, SplitTokenMode /*split_token_mode*/) {}
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp
index 2c4f4e9be5c..b44144a5358 100644
--- a/src/Functions/URL/URLPathHierarchy.cpp
+++ b/src/Functions/URL/URLPathHierarchy.cpp
@@ -32,9 +32,8 @@ public:
     }
 
     static constexpr auto strings_argument_position = 0uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional<size_t>();
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, SplitTokenMode /*split_token_mode*/) {}
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp
index 0e9153acf7f..785ed050d15 100644
--- a/src/Functions/URL/extractURLParameterNames.cpp
+++ b/src/Functions/URL/extractURLParameterNames.cpp
@@ -32,9 +32,8 @@ public:
     }
 
     static constexpr auto strings_argument_position = 0uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional<size_t>();
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, SplitTokenMode /*split_token_mode*/) {}
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp
index 273edde8d18..c21ced2a3aa 100644
--- a/src/Functions/URL/extractURLParameters.cpp
+++ b/src/Functions/URL/extractURLParameters.cpp
@@ -31,10 +31,9 @@ public:
         validateFunctionArgumentTypes(func, arguments, mandatory_args);
     }
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, SplitTokenMode /*split_token_mode*/) {}
 
     static constexpr auto strings_argument_position = 0uz;
-    static constexpr auto max_substrings_argument_position = std::make_optional<size_t>();
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
diff --git a/tests/queries/0_stateless/02876_splitby_spark_vs_python.reference b/tests/queries/0_stateless/02876_splitby_spark_vs_python.reference
new file mode 100644
index 00000000000..0c73fd7de76
--- /dev/null
+++ b/tests/queries/0_stateless/02876_splitby_spark_vs_python.reference
@@ -0,0 +1,22 @@
+splitByAlpha
+['ab','cd']
+['ab','cd','ef.gh']
+splitByNonAlpha
+['128','0']
+['128','0','0.1']
+splitByWhitespace
+['Nein,','nein,']
+['Nein,','nein,','nein! Doch!']
+splitByChar
+['a','b']
+['a','b','c=d']
+splitByString
+['a','=']
+['a','=','=b==c==d']
+['a','b']
+['a','b','c==d']
+splitByRegexp
+['a','1']
+['a','1','2bc23de345f']
+['a','bc']
+['a','bc','de345f']
diff --git a/tests/queries/0_stateless/02876_splitby_spark_vs_python.sql b/tests/queries/0_stateless/02876_splitby_spark_vs_python.sql
new file mode 100644
index 00000000000..c550f69bd0c
--- /dev/null
+++ b/tests/queries/0_stateless/02876_splitby_spark_vs_python.sql
@@ -0,0 +1,27 @@
+SELECT 'splitByAlpha';
+SELECT splitByAlpha('ab.cd.ef.gh', 2) settings split_tokens_like_python = 0;
+SELECT splitByAlpha('ab.cd.ef.gh', 2) settings split_tokens_like_python = 1;
+
+SELECT 'splitByNonAlpha';
+SELECT splitByNonAlpha('128.0.0.1', 2) settings split_tokens_like_python = 0;
+SELECT splitByNonAlpha('128.0.0.1', 2) settings split_tokens_like_python = 1;
+
+SELECT 'splitByWhitespace';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) settings split_tokens_like_python = 0;
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) settings split_tokens_like_python = 1;
+
+SELECT 'splitByChar';
+SELECT splitByChar('=', 'a=b=c=d', 2) SETTINGS split_tokens_like_python = 0;
+SELECT splitByChar('=', 'a=b=c=d', 2) SETTINGS split_tokens_like_python = 1;
+
+SELECT 'splitByString';
+SELECT splitByString('', 'a==b==c==d', 2) SETTINGS split_tokens_like_python = 0;
+SELECT splitByString('', 'a==b==c==d', 2) SETTINGS split_tokens_like_python = 1;
+SELECT splitByString('==', 'a==b==c==d', 2) SETTINGS split_tokens_like_python = 0;
+SELECT splitByString('==', 'a==b==c==d', 2) SETTINGS split_tokens_like_python = 1;
+
+SELECT 'splitByRegexp';
+SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS split_tokens_like_python = 0;
+SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS split_tokens_like_python = 1;
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS split_tokens_like_python = 0;
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS split_tokens_like_python = 1;

From 2a5aa289e85707c8fa8b8363a3fdc7fe3b1c0ba4 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 13 Sep 2023 12:03:17 +0000
Subject: [PATCH 077/243] Implement true Python/Spark split behavior

---
 docs/en/operations/settings/settings.md       |  11 +-
 .../functions/splitting-merging-functions.md  |  12 +-
 src/Core/Settings.h                           |   2 +-
 src/Functions/FunctionsStringArray.cpp        |  27 +-
 src/Functions/FunctionsStringArray.h          | 382 ++++++++++++------
 src/Functions/URL/URLHierarchy.cpp            |   2 +-
 src/Functions/URL/URLPathHierarchy.cpp        |   2 +-
 .../URL/extractURLParameterNames.cpp          |   2 +-
 src/Functions/URL/extractURLParameters.cpp    |   2 +-
 ...6_splitby_max_substring_behavior.reference | 126 ++++++
 .../02876_splitby_max_substring_behavior.sql  | 151 +++++++
 .../02876_splitby_spark_vs_python.reference   |  22 -
 .../02876_splitby_spark_vs_python.sql         |  27 --
 13 files changed, 584 insertions(+), 184 deletions(-)
 create mode 100644 tests/queries/0_stateless/02876_splitby_max_substring_behavior.reference
 create mode 100644 tests/queries/0_stateless/02876_splitby_max_substring_behavior.sql
 delete mode 100644 tests/queries/0_stateless/02876_splitby_spark_vs_python.reference
 delete mode 100644 tests/queries/0_stateless/02876_splitby_spark_vs_python.sql

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 227483758d5..ad1437ea3eb 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4067,16 +4067,17 @@ Result:
 └─────┴─────┴───────┘
 ```
 
-## split_tokens_like_python {#split-tokens-like-python}
+## splitby_max_substring_behavior {#splitby-max-substring-behavior}
 
-Controls if functions [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with `max_substring` argument > 0 include the remaining string (if any) in the result array (Python semantics) or not (Spark semantics).
+Controls how functions [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with given `max_substring` argument behave.
 
 Possible values:
 
-- 0 - Don't include the remaining string (Spark semantics).
-- 1 - Include the remaining string (Python semantics).
+- `''` - If `max_substring` >=1, return the first `max_substring`-many splits.
+- `'python'` - If `max_substring` >= 0, split `max_substring`-many times, and return `max_substring + 1` elements where the last element contains the remaining string.
+- `'spark'` - If `max_substring` >= 1, split `max_substring`-many times, and return `max_substring + 1` elements where the last element contains the remaining string.
 
-Default value: `0`.
+Default value: ``.
 
 ## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
 
diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index 7e788a8e45b..1e0bc3da664 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -39,7 +39,7 @@ For example,
 - in v22.10: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b','c=d']`
 - in v22.11: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b']`
 
-The previous behavior can be restored by setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) = 1.
+The previous behavior can be restored by setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) = 'python'.
 :::
 
 **Example**
@@ -82,7 +82,7 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere
 - There are multiple consecutive non-empty separators;
 - The original string `s` is empty while the separator is not empty.
 
-Setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) (default: 0) controls whether with `max_substrings` > 0, the remaining string (if any) is included in the result array or not.
+Setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) (default: '') controls the behavior with `max_substrings` > 0.
 
 **Example**
 
@@ -137,7 +137,7 @@ Returns an array of selected substrings. Empty substrings may be selected when:
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
-Setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) (default: 0) controls whether with `max_substrings` > 0, the remaining string (if any) is included in the result array or not.
+Setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) (default: '') controls the behavior with `max_substrings` > 0.
 
 **Example**
 
@@ -188,7 +188,7 @@ Returns an array of selected substrings.
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
-Setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) (default: 0) controls whether with `max_substrings` > 0, the remaining string (if any) is included in the result array or not.
+Setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) (default: '') controls the behavior with `max_substrings` > 0.
 
 **Example**
 
@@ -227,7 +227,7 @@ Returns an array of selected substrings.
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
-Setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) (default: 0) controls whether with `max_substrings` > 0, the remaining string (if any) is included in the result array or not.
+Setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) (default: '') controls the behavior with `max_substrings` > 0.
 
 **Example**
 
@@ -289,7 +289,7 @@ Returns an array of selected substrings.
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
-Setting [split_tokens_like_python](../../operations/settings/settings.md#split-tokens-like-python) (default: 0) controls whether with `max_substrings` > 0, the remaining string (if any) is included in the result array or not.
+Setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) (default: '') controls the behavior with `max_substrings` > 0.
 
 **Example**
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 14e99918983..ca8f82ed8b6 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -502,7 +502,7 @@ class IColumn;
     M(Bool, reject_expensive_hyperscan_regexps, true, "Reject patterns which will likely be expensive to evaluate with hyperscan (due to NFA state explosion)", 0) \
     M(Bool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \
     M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
-    M(Bool, split_tokens_like_python, false, "If true, then functions splitBy*() with given max_substring argument include remaining string in the result (Python semantics) or not (Spark semantics).", 0) \
+    M(String, splitby_max_substring_behavior, "", "Control the behavior of the 'max_substring' argument in functions splitBy*(): '' (default), 'python' or 'spark'", 0) \
     \
     M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
     M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' produces a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
diff --git a/src/Functions/FunctionsStringArray.cpp b/src/Functions/FunctionsStringArray.cpp
index 51b50d793e9..085cb2c8eae 100644
--- a/src/Functions/FunctionsStringArray.cpp
+++ b/src/Functions/FunctionsStringArray.cpp
@@ -20,7 +20,7 @@ std::optional<Int64> extractMaxSplitsImpl(const ColumnWithTypeAndName & argument
     return static_cast<Int64>(value);
 }
 
-std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position)
+std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position, MaxSubstringBehavior max_substring_behavior)
 {
     if (max_substrings_argument_position >= arguments.size())
         return std::nullopt;
@@ -32,13 +32,28 @@ std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments,
           || (max_splits = extractMaxSplitsImpl<UInt64>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int64>(arguments[max_substrings_argument_position]))))
         throw Exception(
             ErrorCodes::ILLEGAL_COLUMN,
-            "Illegal column {}, which is {}-th argument",// of function {}",
+            "Illegal column {}, which is {}-th argument",
             arguments[max_substrings_argument_position].column->getName(),
-            max_substrings_argument_position + 1);//,
-            /// getName());
+            max_substrings_argument_position + 1);
+
+    if (max_splits)
+        switch (max_substring_behavior)
+        {
+            case MaxSubstringBehavior::LikeClickHouse:
+            case MaxSubstringBehavior::LikeSpark:
+            {
+                if (*max_splits <= 0)
+                    return std::nullopt;
+                break;
+            }
+            case MaxSubstringBehavior::LikePython:
+            {
+                if (*max_splits < 0)
+                    return std::nullopt;
+                break;
+            }
+        }
 
-    if (max_splits && *max_splits <= 0)
-        return std::nullopt;
 
     return max_splits;
 }
diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h
index 92eb015e6e3..8b732292a1c 100644
--- a/src/Functions/FunctionsStringArray.h
+++ b/src/Functions/FunctionsStringArray.h
@@ -56,14 +56,15 @@ namespace ErrorCodes
 
 using Pos = const char *;
 
-std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position);
-
-enum class SplitTokenMode
+enum class MaxSubstringBehavior
 {
+    LikeClickHouse,
     LikeSpark,
     LikePython
 };
 
+std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position, MaxSubstringBehavior max_substring_behavior);
+
 /// Substring generators. All of them have a common interface.
 
 class SplitByAlphaImpl
@@ -73,7 +74,7 @@ private:
     Pos end;
     std::optional<size_t> max_splits;
     size_t splits;
-    SplitTokenMode split_token_mode;
+    MaxSubstringBehavior max_substring_behavior;
 
 public:
     static constexpr auto name = "alphaTokens";
@@ -98,10 +99,10 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
+    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
     {
-        split_token_mode = split_token_mode_;
-        max_splits = extractMaxSplits(arguments, 1);
+        max_substring_behavior = max_substring_behavior_;
+        max_splits = extractMaxSplits(arguments, 1, max_substring_behavior);
     }
 
     /// Called for each next string.
@@ -124,18 +125,36 @@ public:
 
         token_begin = pos;
 
-        if (max_splits && splits >= max_splits)
+        if (max_splits)
         {
-            switch (split_token_mode)
+            switch (max_substring_behavior)
             {
-            case SplitTokenMode::LikeSpark:
-                return false;
-            case SplitTokenMode::LikePython:
-            {
-                token_end = end;
-                pos = end;
-                return true;
-            }
+                case MaxSubstringBehavior::LikeClickHouse:
+                {
+                    if (splits == *max_splits)
+                        return false;
+                    break;
+                }
+                case MaxSubstringBehavior::LikeSpark:
+                {
+                    if (splits == *max_splits - 1)
+                    {
+                        token_end = end;
+                        pos = end;
+                        return true;
+                    }
+                    break;
+                }
+                case MaxSubstringBehavior::LikePython:
+                {
+                    if (splits == *max_splits)
+                    {
+                        token_end = end;
+                        pos = end;
+                        return true;
+                    }
+                    break;
+                }
             }
         }
 
@@ -156,7 +175,7 @@ private:
     Pos end;
     std::optional<size_t> max_splits;
     size_t splits;
-    SplitTokenMode split_token_mode;
+    MaxSubstringBehavior max_substring_behavior;
 
 public:
     /// Get the name of the function.
@@ -173,10 +192,10 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
+    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
     {
-        split_token_mode = split_token_mode_;
-        max_splits = extractMaxSplits(arguments, 1);
+        max_substring_behavior = max_substring_behavior_;
+        max_splits = extractMaxSplits(arguments, 1, max_substring_behavior);
     }
 
     /// Called for each next string.
@@ -199,20 +218,39 @@ public:
 
         token_begin = pos;
 
-        if (max_splits && splits >= max_splits)
+        if (max_splits)
         {
-            switch (split_token_mode)
+            switch (max_substring_behavior)
             {
-            case SplitTokenMode::LikeSpark:
-                return false;
-            case SplitTokenMode::LikePython:
-            {
-                token_end = end;
-                pos = end;
-                return true;
-            }
+                case MaxSubstringBehavior::LikeClickHouse:
+                {
+                    if (splits == *max_splits)
+                        return false;
+                    break;
+                }
+                case MaxSubstringBehavior::LikeSpark:
+                {
+                    if (splits == *max_splits - 1)
+                    {
+                        token_end = end;
+                        pos = end;
+                        return true;
+                    }
+                    break;
+                }
+                case MaxSubstringBehavior::LikePython:
+                {
+                    if (splits == *max_splits)
+                    {
+                        token_end = end;
+                        pos = end;
+                        return true;
+                    }
+                    break;
+                }
             }
         }
+
         while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
             ++pos;
 
@@ -230,7 +268,7 @@ private:
     Pos end;
     std::optional<size_t> max_splits;
     size_t splits;
-    SplitTokenMode split_token_mode;
+    MaxSubstringBehavior max_substring_behavior;
 
 public:
     static constexpr auto name = "splitByWhitespace";
@@ -246,10 +284,10 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
+    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
     {
-        split_token_mode = split_token_mode_;
-        max_splits = extractMaxSplits(arguments, 1);
+        max_substring_behavior = max_substring_behavior_;
+        max_splits = extractMaxSplits(arguments, 1, max_substring_behavior);
     }
 
     /// Called for each next string.
@@ -272,18 +310,36 @@ public:
 
         token_begin = pos;
 
-        if (max_splits && splits >= max_splits)
+        if (max_splits)
         {
-            switch (split_token_mode)
+            switch (max_substring_behavior)
             {
-            case SplitTokenMode::LikeSpark:
-                return false;
-            case SplitTokenMode::LikePython:
-            {
-                token_end = end;
-                pos = end;
-                return true;
-            }
+                case MaxSubstringBehavior::LikeClickHouse:
+                {
+                    if (splits == *max_splits)
+                        return false;
+                    break;
+                }
+                case MaxSubstringBehavior::LikeSpark:
+                {
+                    if (splits == *max_splits - 1)
+                    {
+                        token_end = end;
+                        pos = end;
+                        return true;
+                    }
+                    break;
+                }
+                case MaxSubstringBehavior::LikePython:
+                {
+                    if (splits == *max_splits)
+                    {
+                        token_end = end;
+                        pos = end;
+                        return true;
+                    }
+                    break;
+                }
             }
         }
 
@@ -305,7 +361,7 @@ private:
     char separator;
     std::optional<size_t> max_splits;
     size_t splits;
-    SplitTokenMode split_token_mode;
+    MaxSubstringBehavior max_substring_behavior;
 
 public:
     static constexpr auto name = "splitByChar";
@@ -329,7 +385,7 @@ public:
 
     static constexpr auto strings_argument_position = 1uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
+    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
 
@@ -344,8 +400,8 @@ public:
 
         separator = sep_str[0];
 
-        split_token_mode = split_token_mode_;
-        max_splits = extractMaxSplits(arguments, 2);
+        max_substring_behavior = max_substring_behavior_;
+        max_splits = extractMaxSplits(arguments, 2, max_substring_behavior);
     }
 
     void set(Pos pos_, Pos end_)
@@ -362,18 +418,36 @@ public:
 
         token_begin = pos;
 
-        if (max_splits && splits >= max_splits)
+        if (max_splits)
         {
-            switch (split_token_mode)
+            switch (max_substring_behavior)
             {
-            case SplitTokenMode::LikeSpark:
-                return false;
-            case SplitTokenMode::LikePython:
-            {
-                token_end = end;
-                pos = nullptr;
-                return true;
-            }
+                case MaxSubstringBehavior::LikeClickHouse:
+                {
+                    if (splits == *max_splits)
+                        return false;
+                    break;
+                }
+                case MaxSubstringBehavior::LikeSpark:
+                {
+                    if (splits == *max_splits - 1)
+                    {
+                        token_end = end;
+                        pos = nullptr;
+                        return true;
+                    }
+                    break;
+                }
+                case MaxSubstringBehavior::LikePython:
+                {
+                    if (splits == *max_splits)
+                    {
+                        token_end = end;
+                        pos = nullptr;
+                        return true;
+                    }
+                    break;
+                }
             }
         }
 
@@ -400,7 +474,7 @@ private:
     String separator;
     std::optional<size_t> max_splits;
     size_t splits;
-    SplitTokenMode split_token_mode;
+    MaxSubstringBehavior max_substring_behavior;
 
 public:
     static constexpr auto name = "splitByString";
@@ -415,7 +489,7 @@ public:
 
     static constexpr auto strings_argument_position = 1uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
+    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
 
@@ -425,8 +499,8 @@ public:
 
         separator = col->getValue<String>();
 
-        split_token_mode = split_token_mode_;
-        max_splits = extractMaxSplits(arguments, 2);
+        max_substring_behavior = max_substring_behavior_;
+        max_splits = extractMaxSplits(arguments, 2, max_substring_behavior);
     }
 
     /// Called for each next string.
@@ -447,18 +521,36 @@ public:
 
             token_begin = pos;
 
-            if (max_splits && splits >= max_splits)
+            if (max_splits)
             {
-                switch (split_token_mode)
+                switch (max_substring_behavior)
                 {
-                case SplitTokenMode::LikeSpark:
-                    return false;
-                case SplitTokenMode::LikePython:
-                {
-                    token_end = end;
-                    pos = end;
-                    return true;
-                }
+                    case MaxSubstringBehavior::LikeClickHouse:
+                    {
+                        if (splits == *max_splits)
+                            return false;
+                        break;
+                    }
+                    case MaxSubstringBehavior::LikeSpark:
+                    {
+                        if (splits == *max_splits - 1)
+                        {
+                            token_end = end;
+                            pos = end;
+                            return true;
+                        }
+                        break;
+                    }
+                    case MaxSubstringBehavior::LikePython:
+                    {
+                        if (splits == *max_splits)
+                        {
+                            token_end = end;
+                            pos = end;
+                            return true;
+                        }
+                        break;
+                    }
                 }
             }
 
@@ -473,18 +565,36 @@ public:
 
             token_begin = pos;
 
-            if (max_splits && splits >= max_splits)
+            if (max_splits)
             {
-                switch (split_token_mode)
+                switch (max_substring_behavior)
                 {
-                case SplitTokenMode::LikeSpark:
-                    return false;
-                case SplitTokenMode::LikePython:
-                {
-                    token_end = end;
-                    pos = nullptr;
-                    return true;
-                }
+                    case MaxSubstringBehavior::LikeClickHouse:
+                    {
+                        if (splits == *max_splits)
+                            return false;
+                        break;
+                    }
+                    case MaxSubstringBehavior::LikeSpark:
+                    {
+                        if (splits == *max_splits - 1)
+                        {
+                            token_end = end;
+                            pos = nullptr;
+                            return true;
+                        }
+                        break;
+                    }
+                    case MaxSubstringBehavior::LikePython:
+                    {
+                        if (splits == *max_splits)
+                        {
+                            token_end = end;
+                            pos = nullptr;
+                            return true;
+                        }
+                        break;
+                    }
                 }
             }
 
@@ -493,10 +603,10 @@ public:
             {
                 token_end = pos;
                 pos += separator.size();
+                ++splits;
             }
             else
                 token_end = end;
-            ++splits;
         }
 
         return true;
@@ -514,7 +624,7 @@ private:
 
     std::optional<size_t> max_splits;
     size_t splits;
-    SplitTokenMode split_token_mode;
+    MaxSubstringBehavior max_substring_behavior;
 
 public:
     static constexpr auto name = "splitByRegexp";
@@ -530,7 +640,7 @@ public:
 
     static constexpr auto strings_argument_position = 1uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode split_token_mode_)
+    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
 
@@ -541,8 +651,8 @@ public:
         if (!col->getValue<String>().empty())
             re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
 
-        split_token_mode = split_token_mode_;
-        max_splits = extractMaxSplits(arguments, 2);
+        max_substring_behavior = max_substring_behavior_;
+        max_splits = extractMaxSplits(arguments, 2, max_substring_behavior);
     }
 
     /// Called for each next string.
@@ -563,18 +673,36 @@ public:
 
             token_begin = pos;
 
-            if (max_splits && splits >= max_splits)
+            if (max_splits)
             {
-                switch (split_token_mode)
+                switch (max_substring_behavior)
                 {
-                case SplitTokenMode::LikeSpark:
-                    return false;
-                case SplitTokenMode::LikePython:
-                {
-                    token_end = end;
-                    pos = end;
-                    return true;
-                }
+                    case MaxSubstringBehavior::LikeClickHouse:
+                    {
+                        if (splits == *max_splits)
+                            return false;
+                        break;
+                    }
+                    case MaxSubstringBehavior::LikeSpark:
+                    {
+                        if (splits == *max_splits - 1)
+                        {
+                            token_end = end;
+                            pos = end;
+                            return true;
+                        }
+                        break;
+                    }
+                    case MaxSubstringBehavior::LikePython:
+                    {
+                        if (splits == *max_splits)
+                        {
+                            token_end = end;
+                            pos = end;
+                            return true;
+                        }
+                        break;
+                    }
                 }
             }
 
@@ -589,18 +717,36 @@ public:
 
             token_begin = pos;
 
-            if (max_splits && splits >= max_splits)
+            if (max_splits)
             {
-                switch (split_token_mode)
+                switch (max_substring_behavior)
                 {
-                case SplitTokenMode::LikeSpark:
-                    return false;
-                case SplitTokenMode::LikePython:
-                {
-                    token_end = end;
-                    pos = nullptr;
-                    return true;
-                }
+                    case MaxSubstringBehavior::LikeClickHouse:
+                    {
+                        if (splits == *max_splits)
+                            return false;
+                        break;
+                    }
+                    case MaxSubstringBehavior::LikeSpark:
+                    {
+                        if (splits == *max_splits - 1)
+                        {
+                            token_end = end;
+                            pos = nullptr;
+                            return true;
+                        }
+                        break;
+                    }
+                    case MaxSubstringBehavior::LikePython:
+                    {
+                        if (splits == *max_splits)
+                        {
+                            token_end = end;
+                            pos = nullptr;
+                            return true;
+                        }
+                        break;
+                    }
                 }
             }
 
@@ -613,8 +759,8 @@ public:
             {
                 token_end = pos + matches[0].offset;
                 pos = token_end + matches[0].length;
+                ++splits;
             }
-            ++splits;
         }
 
         return true;
@@ -648,7 +794,7 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, SplitTokenMode /*split_token_mode*/)
+    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior /*max_substring_behavior*/)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
 
@@ -701,7 +847,7 @@ template <typename Generator>
 class FunctionTokens : public IFunction
 {
 private:
-    SplitTokenMode split_token_mode;
+    MaxSubstringBehavior max_substring_behavior;
 
 public:
     static constexpr auto name = Generator::name;
@@ -710,7 +856,17 @@ public:
     explicit FunctionTokens<Generator>(ContextPtr context)
     {
         const Settings & settings = context->getSettingsRef();
-        split_token_mode = settings.split_tokens_like_python ? SplitTokenMode::LikePython : SplitTokenMode::LikeSpark;
+        if (settings.splitby_max_substring_behavior.value == "")
+            max_substring_behavior = MaxSubstringBehavior::LikeClickHouse;
+        else if (settings.splitby_max_substring_behavior.value == "python")
+            max_substring_behavior = MaxSubstringBehavior::LikePython;
+        else if (settings.splitby_max_substring_behavior.value == "spark")
+            max_substring_behavior = MaxSubstringBehavior::LikeSpark;
+        else
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal value {} for setting splitby_max_substring_behavior in function {}, must be '', 'python' or 'spark'",
+                settings.splitby_max_substring_behavior.value, getName());
     }
 
     String getName() const override { return name; }
@@ -731,7 +887,7 @@ public:
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
     {
         Generator generator;
-        generator.init(arguments, split_token_mode);
+        generator.init(arguments, max_substring_behavior);
 
         const auto & array_argument = arguments[generator.strings_argument_position];
 
diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp
index bce876f735f..5bd5629992f 100644
--- a/src/Functions/URL/URLHierarchy.cpp
+++ b/src/Functions/URL/URLHierarchy.cpp
@@ -34,7 +34,7 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/, SplitTokenMode /*split_token_mode*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, MaxSubstringBehavior /*max_substring_behavior*/) {}
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp
index b44144a5358..714f56ece36 100644
--- a/src/Functions/URL/URLPathHierarchy.cpp
+++ b/src/Functions/URL/URLPathHierarchy.cpp
@@ -33,7 +33,7 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/, SplitTokenMode /*split_token_mode*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, MaxSubstringBehavior /*max_substring_behavior*/) {}
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp
index 785ed050d15..3d40013335a 100644
--- a/src/Functions/URL/extractURLParameterNames.cpp
+++ b/src/Functions/URL/extractURLParameterNames.cpp
@@ -33,7 +33,7 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/, SplitTokenMode /*split_token_mode*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, MaxSubstringBehavior /*max_substring_behavior*/) {}
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp
index c21ced2a3aa..82df7888196 100644
--- a/src/Functions/URL/extractURLParameters.cpp
+++ b/src/Functions/URL/extractURLParameters.cpp
@@ -31,7 +31,7 @@ public:
         validateFunctionArgumentTypes(func, arguments, mandatory_args);
     }
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/, SplitTokenMode /*split_token_mode*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, MaxSubstringBehavior /*max_substring_behavior*/) {}
 
     static constexpr auto strings_argument_position = 0uz;
 
diff --git a/tests/queries/0_stateless/02876_splitby_max_substring_behavior.reference b/tests/queries/0_stateless/02876_splitby_max_substring_behavior.reference
new file mode 100644
index 00000000000..9966c7d090e
--- /dev/null
+++ b/tests/queries/0_stateless/02876_splitby_max_substring_behavior.reference
@@ -0,0 +1,126 @@
+-- splitByAlpha
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+['ab']
+['ab','cd']
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+['ab.cd.ef.gh']
+['ab','cd.ef.gh']
+['ab','cd','ef.gh']
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+['ab.cd.ef.gh']
+['ab','cd.ef.gh']
+-- splitByNonAlpha
+['128','0','0','1']
+['128','0','0','1']
+['128','0','0','1']
+['128']
+['128','0']
+['128','0','0','1']
+['128','0','0','1']
+['128.0.0.1']
+['128','0.0.1']
+['128','0','0.1']
+['128','0','0','1']
+['128','0','0','1']
+['128','0','0','1']
+['128.0.0.1']
+['128','0.0.1']
+-- splitByWhitespace
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein,']
+['Nein,','nein,']
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein, nein, nein! Doch!']
+['Nein,','nein, nein! Doch!']
+['Nein,','nein,','nein! Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein, nein, nein! Doch!']
+['Nein,','nein, nein! Doch!']
+-- splitByChar
+['a','','b','c','d']
+['a','','b','c','d']
+['a','','b','c','d']
+['a']
+['a','']
+['a','','b','c','d']
+['a','','b','c','d']
+['a==b=c=d']
+['a','=b=c=d']
+['a','','b=c=d']
+['a','','b','c','d']
+['a','','b','c','d']
+['a','','b','c','d']
+['a==b=c=d']
+['a','=b=c=d']
+-- splitByString
+['a','b=c=d']
+['a','b=c=d']
+['a','b=c=d']
+['a']
+['a','b=c=d']
+['a','b=c=d']
+['a','b=c=d']
+['a==b=c=d']
+['a','b=c=d']
+['a','b=c=d']
+['a','b=c=d']
+['a','b=c=d']
+['a','b=c=d']
+['a==b=c=d']
+['a','b=c=d']
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a']
+['a','=']
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a==b=c=d']
+['a','==b=c=d']
+['a','=','=b=c=d']
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a==b=c=d']
+['a','==b=c=d']
+-- splitByRegexp
+['a','bc','de','f']
+['a','bc','de','f']
+['a','bc','de','f']
+['a']
+['a','bc']
+['a','bc','de','f']
+['a','bc','de','f']
+['a12bc23de345f']
+['a','bc23de345f']
+['a','bc','de345f']
+['a','bc','de','f']
+['a','bc','de','f']
+['a','bc','de','f']
+['a12bc23de345f']
+['a','bc23de345f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a']
+['a','1']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a12bc23de345f']
+['a','12bc23de345f']
+['a','1','2bc23de345f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a12bc23de345f']
+['a','12bc23de345f']
diff --git a/tests/queries/0_stateless/02876_splitby_max_substring_behavior.sql b/tests/queries/0_stateless/02876_splitby_max_substring_behavior.sql
new file mode 100644
index 00000000000..1dcad65f09b
--- /dev/null
+++ b/tests/queries/0_stateless/02876_splitby_max_substring_behavior.sql
@@ -0,0 +1,151 @@
+SELECT '-- splitByAlpha';
+SELECT splitByAlpha('ab.cd.ef.gh') SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByAlpha('ab.cd.ef.gh', -1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByAlpha('ab.cd.ef.gh', 0) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByAlpha('ab.cd.ef.gh', 1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByAlpha('ab.cd.ef.gh', 2) SETTINGS splitby_max_substring_behavior = '';
+
+SELECT splitByAlpha('ab.cd.ef.gh') SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByAlpha('ab.cd.ef.gh', -1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByAlpha('ab.cd.ef.gh', 0) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByAlpha('ab.cd.ef.gh', 1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByAlpha('ab.cd.ef.gh', 2) SETTINGS splitby_max_substring_behavior = 'python';
+
+SELECT splitByAlpha('ab.cd.ef.gh') SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByAlpha('ab.cd.ef.gh', -1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByAlpha('ab.cd.ef.gh', 0) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByAlpha('ab.cd.ef.gh', 1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByAlpha('ab.cd.ef.gh', 2) SETTINGS splitby_max_substring_behavior = 'spark';
+
+SELECT '-- splitByNonAlpha';
+SELECT splitByNonAlpha('128.0.0.1') SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByNonAlpha('128.0.0.1', -1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByNonAlpha('128.0.0.1', 0) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByNonAlpha('128.0.0.1', 1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByNonAlpha('128.0.0.1', 2) SETTINGS splitby_max_substring_behavior = '';
+
+SELECT splitByNonAlpha('128.0.0.1') SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByNonAlpha('128.0.0.1', -1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByNonAlpha('128.0.0.1', 0) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByNonAlpha('128.0.0.1', 1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByNonAlpha('128.0.0.1', 2) SETTINGS splitby_max_substring_behavior = 'python';
+
+SELECT splitByNonAlpha('128.0.0.1') SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByNonAlpha('128.0.0.1', -1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByNonAlpha('128.0.0.1', 0) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByNonAlpha('128.0.0.1', 1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByNonAlpha('128.0.0.1', 2) SETTINGS splitby_max_substring_behavior = 'spark';
+
+SELECT '-- splitByWhitespace';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!') SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', -1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 0) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) SETTINGS splitby_max_substring_behavior = '';
+
+SELECT splitByWhitespace('Nein, nein, nein! Doch!') SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', -1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 0) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) SETTINGS splitby_max_substring_behavior = 'python';
+
+SELECT splitByWhitespace('Nein, nein, nein! Doch!') SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', -1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 0) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) SETTINGS splitby_max_substring_behavior = 'spark';
+
+SELECT '-- splitByChar';
+SELECT splitByChar('=', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByChar('=', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByChar('=', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByChar('=', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByChar('=', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = '';
+
+SELECT splitByChar('=', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByChar('=', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByChar('=', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByChar('=', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByChar('=', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'python';
+
+SELECT splitByChar('=', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByChar('=', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByChar('=', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByChar('=', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByChar('=', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'spark';
+
+SELECT '-- splitByString';
+
+SELECT splitByString('==', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByString('==', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByString('==', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByString('==', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByString('==', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = '';
+
+SELECT splitByString('==', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByString('==', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByString('==', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByString('==', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByString('==', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'python';
+
+SELECT splitByString('==', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByString('==', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByString('==', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByString('==', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByString('==', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'spark';
+
+SELECT splitByString('', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByString('', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByString('', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByString('', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByString('', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = '';
+
+SELECT splitByString('', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByString('', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByString('', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByString('', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByString('', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'python';
+
+SELECT splitByString('', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByString('', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByString('', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByString('', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByString('', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'spark';
+
+SELECT '-- splitByRegexp';
+
+SELECT splitByRegexp('\\d+', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = '';
+
+SELECT splitByRegexp('\\d+', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = 'python';
+
+SELECT splitByRegexp('\\d+', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = 'spark';
+
+SELECT splitByRegexp('', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByRegexp('', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByRegexp('', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByRegexp('', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = '';
+SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = '';
+
+SELECT splitByRegexp('', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByRegexp('', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByRegexp('', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByRegexp('', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = 'python';
+SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = 'python';
+
+SELECT splitByRegexp('', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByRegexp('', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByRegexp('', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByRegexp('', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = 'spark';
+SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = 'spark';
diff --git a/tests/queries/0_stateless/02876_splitby_spark_vs_python.reference b/tests/queries/0_stateless/02876_splitby_spark_vs_python.reference
deleted file mode 100644
index 0c73fd7de76..00000000000
--- a/tests/queries/0_stateless/02876_splitby_spark_vs_python.reference
+++ /dev/null
@@ -1,22 +0,0 @@
-splitByAlpha
-['ab','cd']
-['ab','cd','ef.gh']
-splitByNonAlpha
-['128','0']
-['128','0','0.1']
-splitByWhitespace
-['Nein,','nein,']
-['Nein,','nein,','nein! Doch!']
-splitByChar
-['a','b']
-['a','b','c=d']
-splitByString
-['a','=']
-['a','=','=b==c==d']
-['a','b']
-['a','b','c==d']
-splitByRegexp
-['a','1']
-['a','1','2bc23de345f']
-['a','bc']
-['a','bc','de345f']
diff --git a/tests/queries/0_stateless/02876_splitby_spark_vs_python.sql b/tests/queries/0_stateless/02876_splitby_spark_vs_python.sql
deleted file mode 100644
index c550f69bd0c..00000000000
--- a/tests/queries/0_stateless/02876_splitby_spark_vs_python.sql
+++ /dev/null
@@ -1,27 +0,0 @@
-SELECT 'splitByAlpha';
-SELECT splitByAlpha('ab.cd.ef.gh', 2) settings split_tokens_like_python = 0;
-SELECT splitByAlpha('ab.cd.ef.gh', 2) settings split_tokens_like_python = 1;
-
-SELECT 'splitByNonAlpha';
-SELECT splitByNonAlpha('128.0.0.1', 2) settings split_tokens_like_python = 0;
-SELECT splitByNonAlpha('128.0.0.1', 2) settings split_tokens_like_python = 1;
-
-SELECT 'splitByWhitespace';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) settings split_tokens_like_python = 0;
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) settings split_tokens_like_python = 1;
-
-SELECT 'splitByChar';
-SELECT splitByChar('=', 'a=b=c=d', 2) SETTINGS split_tokens_like_python = 0;
-SELECT splitByChar('=', 'a=b=c=d', 2) SETTINGS split_tokens_like_python = 1;
-
-SELECT 'splitByString';
-SELECT splitByString('', 'a==b==c==d', 2) SETTINGS split_tokens_like_python = 0;
-SELECT splitByString('', 'a==b==c==d', 2) SETTINGS split_tokens_like_python = 1;
-SELECT splitByString('==', 'a==b==c==d', 2) SETTINGS split_tokens_like_python = 0;
-SELECT splitByString('==', 'a==b==c==d', 2) SETTINGS split_tokens_like_python = 1;
-
-SELECT 'splitByRegexp';
-SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS split_tokens_like_python = 0;
-SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS split_tokens_like_python = 1;
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS split_tokens_like_python = 0;
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS split_tokens_like_python = 1;

From cf12563df18bad5cf934b851fafb8e3ae07e9bcd Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 13 Sep 2023 12:55:17 +0000
Subject: [PATCH 078/243] Fix style check

---
 src/Functions/FunctionsStringArray.cpp         | 3 +--
 src/Functions/FunctionsStringArray.h           | 2 --
 src/Functions/URL/URLHierarchy.cpp             | 4 ----
 src/Functions/URL/URLPathHierarchy.cpp         | 4 ----
 src/Functions/URL/extractURLParameterNames.cpp | 4 ----
 src/Functions/URL/extractURLParameters.cpp     | 4 ----
 6 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/src/Functions/FunctionsStringArray.cpp b/src/Functions/FunctionsStringArray.cpp
index 085cb2c8eae..326651c111d 100644
--- a/src/Functions/FunctionsStringArray.cpp
+++ b/src/Functions/FunctionsStringArray.cpp
@@ -5,8 +5,7 @@ namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_COLUMN;
 }
 
 template <typename DataType>
diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h
index 8b732292a1c..e720fc96e52 100644
--- a/src/Functions/FunctionsStringArray.h
+++ b/src/Functions/FunctionsStringArray.h
@@ -23,10 +23,8 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_COLUMN;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
 
diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp
index 5bd5629992f..260053dc401 100644
--- a/src/Functions/URL/URLHierarchy.cpp
+++ b/src/Functions/URL/URLHierarchy.cpp
@@ -3,10 +3,6 @@
 
 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
 
 class URLPathHierarchyImpl
 {
diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp
index 714f56ece36..a11be358a70 100644
--- a/src/Functions/URL/URLPathHierarchy.cpp
+++ b/src/Functions/URL/URLPathHierarchy.cpp
@@ -3,10 +3,6 @@
 
 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
 
 class URLHierarchyImpl
 {
diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp
index 3d40013335a..2b79be07cae 100644
--- a/src/Functions/URL/extractURLParameterNames.cpp
+++ b/src/Functions/URL/extractURLParameterNames.cpp
@@ -3,10 +3,6 @@
 
 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
 
 class ExtractURLParameterNamesImpl
 {
diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp
index 82df7888196..271e5dc89c9 100644
--- a/src/Functions/URL/extractURLParameters.cpp
+++ b/src/Functions/URL/extractURLParameters.cpp
@@ -3,10 +3,6 @@
 
 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
 
 class ExtractURLParametersImpl
 {

From 40e272521b2cf3cbad3c6a303d2a08a3a1126cc6 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 13 Sep 2023 12:55:51 +0000
Subject: [PATCH 079/243] Fix spell check

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index bcb971951e5..b2818a50a57 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -2215,6 +2215,7 @@ sparkBar
 sparkbar
 sparsehash
 speedscope
+splitby
 splitByChar
 splitByNonAlpha
 splitByRegexp

From b583b80733b7409aa141742a3867291b514a64ca Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 14 Sep 2023 10:23:40 +0000
Subject: [PATCH 080/243] Fix spelling

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index b2818a50a57..b3084e8f298 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -2215,6 +2215,7 @@ sparkBar
 sparkbar
 sparsehash
 speedscope
+splitBy
 splitby
 splitByChar
 splitByNonAlpha

From 32c5aee1c34b68cd992b0fb81565caa7fc702e4e Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 18 Sep 2023 16:31:36 +0800
Subject: [PATCH 081/243] fix ut

---
 ...5_array_join_with_max_block_size.reference | 176 ++++++++++--------
 .../02865_array_join_with_max_block_size.sql  |  16 +-
 2 files changed, 108 insertions(+), 84 deletions(-)

diff --git a/tests/queries/0_stateless/02865_array_join_with_max_block_size.reference b/tests/queries/0_stateless/02865_array_join_with_max_block_size.reference
index 9ea48287e78..29f4a91cc22 100644
--- a/tests/queries/0_stateless/02865_array_join_with_max_block_size.reference
+++ b/tests/queries/0_stateless/02865_array_join_with_max_block_size.reference
@@ -1,81 +1,103 @@
 -- { echoOn }
 set max_block_size = 10, enable_unaligned_array_join = true;
-SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
-0	100000
-1	100000
-2	100000
-3	100000
-4	100000
-5	100000
-6	100000
-7	100000
-8	100000
-9	100000
-SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
-0	5004001
-1	5003000
-2	5002000
-3	5001000
-4	5000000
-5	4999000
-6	4998000
-7	4997000
-8	4996000
-9	4995000
-SELECT (m+n) % 10, count(1) from (SELECT range(0, number+1) as x, range(0, number+2) as y FROM numbers(10000)) ARRAY JOIN x as m, y as n group by (m+n) % 10;
-0	10006000
-1	1000
-2	10004000
-3	1000
-4	10002000
-5	1000
-6	10000000
-7	1000
-8	9998000
-9	1000
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(100000))) ARRAY JOIN x as n group by n;
+0	10000
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(1000)) LEFT ARRAY JOIN x as n group by n % 10;
+0	50401
+1	50300
+2	50200
+3	50100
+4	50000
+5	49900
+6	49800
+7	49700
+8	49600
+9	49500
+SELECT (m+n) % 10, count(1) from (SELECT range(0, number+1) as x, range(0, number+2) as y FROM numbers(100)) ARRAY JOIN x as m, y as n group by (m+n) % 10;
+0	1060
+1	10
+2	1040
+3	10
+4	1020
+5	10
+6	1000
+7	10
+8	980
+9	10
 set max_block_size = 1000, enable_unaligned_array_join = true;
-SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
-0	100000
-1	100000
-2	100000
-3	100000
-4	100000
-5	100000
-6	100000
-7	100000
-8	100000
-9	100000
-SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
-0	5004001
-1	5003000
-2	5002000
-3	5001000
-4	5000000
-5	4999000
-6	4998000
-7	4997000
-8	4996000
-9	4995000
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(100000))) ARRAY JOIN x as n group by n;
+0	10000
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(1000)) LEFT ARRAY JOIN x as n group by n % 10;
+0	50401
+1	50300
+2	50200
+3	50100
+4	50000
+5	49900
+6	49800
+7	49700
+8	49600
+9	49500
+SELECT (m+n) % 10, count(1) from (SELECT range(0, number+1) as x, range(0, number+2) as y FROM numbers(100)) ARRAY JOIN x as m, y as n group by (m+n) % 10;
+0	1060
+1	10
+2	1040
+3	10
+4	1020
+5	10
+6	1000
+7	10
+8	980
+9	10
 set max_block_size = 100000, enable_unaligned_array_join = true;
-SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
-0	100000
-1	100000
-2	100000
-3	100000
-4	100000
-5	100000
-6	100000
-7	100000
-8	100000
-9	100000
-SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
-0	5004001
-1	5003000
-2	5002000
-3	5001000
-4	5000000
-5	4999000
-6	4998000
-7	4997000
-8	4996000
-9	4995000
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(100000))) ARRAY JOIN x as n group by n;
+0	10000
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(1000)) LEFT ARRAY JOIN x as n group by n % 10;
+0	50401
+1	50300
+2	50200
+3	50100
+4	50000
+5	49900
+6	49800
+7	49700
+8	49600
+9	49500
+SELECT (m+n) % 10, count(1) from (SELECT range(0, number+1) as x, range(0, number+2) as y FROM numbers(100)) ARRAY JOIN x as m, y as n group by (m+n) % 10;
+0	1060
+1	10
+2	1040
+3	10
+4	1020
+5	10
+6	1000
+7	10
+8	980
+9	10
diff --git a/tests/queries/0_stateless/02865_array_join_with_max_block_size.sql b/tests/queries/0_stateless/02865_array_join_with_max_block_size.sql
index fe393d6ccd6..bc82978262a 100644
--- a/tests/queries/0_stateless/02865_array_join_with_max_block_size.sql
+++ b/tests/queries/0_stateless/02865_array_join_with_max_block_size.sql
@@ -1,14 +1,16 @@
 -- { echoOn }
 set max_block_size = 10, enable_unaligned_array_join = true;
-SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
-SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
-SELECT (m+n) % 10, count(1) from (SELECT range(0, number+1) as x, range(0, number+2) as y FROM numbers(10000)) ARRAY JOIN x as m, y as n group by (m+n) % 10;
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(100000))) ARRAY JOIN x as n group by n;
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(1000)) LEFT ARRAY JOIN x as n group by n % 10;
+SELECT (m+n) % 10, count(1) from (SELECT range(0, number+1) as x, range(0, number+2) as y FROM numbers(100)) ARRAY JOIN x as m, y as n group by (m+n) % 10;
 
 set max_block_size = 1000, enable_unaligned_array_join = true;
-SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
-SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(100000))) ARRAY JOIN x as n group by n;
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(1000)) LEFT ARRAY JOIN x as n group by n % 10;
+SELECT (m+n) % 10, count(1) from (SELECT range(0, number+1) as x, range(0, number+2) as y FROM numbers(100)) ARRAY JOIN x as m, y as n group by (m+n) % 10;
 
 set max_block_size = 100000, enable_unaligned_array_join = true;
-SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(1000000))) ARRAY JOIN x as n group by n;
-SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(10000)) LEFT ARRAY JOIN x as n group by n % 10;
+SELECT n, count(1) from (SELECT groupArray(number % 10) AS x FROM (SELECT * FROM numbers(100000))) ARRAY JOIN x as n group by n;
+SELECT n % 10, count(1) from (SELECT range(0, number) as x FROM numbers(1000)) LEFT ARRAY JOIN x as n group by n % 10;
+SELECT (m+n) % 10, count(1) from (SELECT range(0, number+1) as x, range(0, number+2) as y FROM numbers(100)) ARRAY JOIN x as m, y as n group by (m+n) % 10;
 -- { echoOff }

From 66265e879dddee38080a9429a56ea0f4be3df291 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 18 Sep 2023 09:59:20 +0000
Subject: [PATCH 082/243] Make the transformation to one line reversible

---
 src/Parsers/tests/gtest_Parser.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp
index a20d6b2f111..a84f63d7cb1 100644
--- a/src/Parsers/tests/gtest_Parser.cpp
+++ b/src/Parsers/tests/gtest_Parser.cpp
@@ -18,6 +18,7 @@
 #include <string_view>
 #include <regex>
 #include <gtest/gtest.h>
+#include <boost/algorithm/string/replace.hpp>
 
 namespace
 {
@@ -40,7 +41,7 @@ std::ostream & operator<<(std::ostream & ostr, const std::shared_ptr<IParser> pa
 std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case)
 {
     std::string input_text{test_case.input_text};
-    std::replace(input_text.begin(), input_text.end(),'\n', ' ');
+    boost::replace_all(input_text, "\n", "\\n");
     return ostr << "ParserTestCase input: " << input_text;
 }
 

From 2ea324e44e3a8580ad9ad5f16be53812e53017a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Mon, 18 Sep 2023 09:59:30 +0000
Subject: [PATCH 083/243] Add explanatory comment

---
 src/Parsers/tests/gtest_Parser.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp
index a84f63d7cb1..3a90256f262 100644
--- a/src/Parsers/tests/gtest_Parser.cpp
+++ b/src/Parsers/tests/gtest_Parser.cpp
@@ -40,6 +40,8 @@ std::ostream & operator<<(std::ostream & ostr, const std::shared_ptr<IParser> pa
 
 std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case)
 {
+    // New line characters are removed because at the time of writing this the unit test results are parsed from the
+    // command line output, and multi-line string representations are breaking the parsing logic.
     std::string input_text{test_case.input_text};
     boost::replace_all(input_text, "\n", "\\n");
     return ostr << "ParserTestCase input: " << input_text;

From 7f3279fa21f7beb98ccc39798a772fee0f28b58b Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 18 Sep 2023 11:18:39 +0000
Subject: [PATCH 084/243] Revert batch size to 100 for Keeper

---
 src/Coordination/CoordinationSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h
index bb9e57715db..0398e12d07d 100644
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@@ -38,7 +38,7 @@ struct Settings;
     M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
     M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
     M(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \
-    M(UInt64, max_requests_batch_size, 1000, "Max size of batch of requests that can be sent to RAFT", 0) \
+    M(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \
     M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \
     M(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \
     M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \

From 262a81a7d7272523af7b601e24665deb3982eff8 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 18 Sep 2023 13:20:19 +0000
Subject: [PATCH 085/243] Fix style check for test

---
 .../02875_parallel_replicas_cluster_all_replicas.sql            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
index c1cbc404aa6..1201a156246 100644
--- a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
+++ b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
@@ -9,7 +9,7 @@ SYSTEM FLUSH LOGS;
 
 SET allow_experimental_parallel_reading_from_replicas=0;
 SELECT count() > 0 FROM system.text_log
-WHERE query_id in (select query_id from system.query_log where log_comment = '02875_190aed82-2423-413b-ad4c-24dcca50f65b')
+WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment = '02875_190aed82-2423-413b-ad4c-24dcca50f65b')
     AND message LIKE '%Parallel reading from replicas is disabled for cluster%';
 
 DROP TABLE tt;

From c285f85cade3c114785e5d794211a344c64a810f Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 18 Sep 2023 14:43:10 +0000
Subject: [PATCH 086/243] Fix: correct check if all shards have only 1 node

---
 src/Interpreters/ClusterProxy/executeQuery.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 372376afc02..637d0de8a1f 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -127,12 +127,12 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster,
     /// disable parallel replicas if cluster contains only shards with 1 replica
     if (context->canUseParallelReplicas())
     {
-        bool disable_parallel_replicas = false;
+        bool disable_parallel_replicas = true;
         for (const auto & shard : cluster.getShardsInfo())
         {
-            if (shard.getAllNodeCount() <= 1)
+            if (shard.getAllNodeCount() > 1)
             {
-                disable_parallel_replicas = true;
+                disable_parallel_replicas = false;
                 break;
             }
         }
@@ -192,10 +192,10 @@ void executeQuery(
     SelectStreamFactory::Shards remote_shards;
 
     auto cluster = query_info.getCluster();
-    auto new_context
-        = updateSettingsForCluster(*cluster, context, settings, main_table, query_info.additional_filter_ast, log);
-    if (new_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value
-        != context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value)
+    auto new_context = updateSettingsForCluster(*cluster, context, settings, main_table, query_info.additional_filter_ast, log);
+    if (context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value
+        && context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value
+           != new_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value)
     {
         LOG_TRACE(
             log,

From 13bc2941746dedc21207ed7ad4de87f63ea27cac Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 18 Sep 2023 15:09:42 +0000
Subject: [PATCH 087/243] Test for remote()

---
 .../02875_parallel_replicas_remote.reference      |  2 ++
 .../02875_parallel_replicas_remote.sql            | 15 +++++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 tests/queries/0_stateless/02875_parallel_replicas_remote.reference
 create mode 100644 tests/queries/0_stateless/02875_parallel_replicas_remote.sql

diff --git a/tests/queries/0_stateless/02875_parallel_replicas_remote.reference b/tests/queries/0_stateless/02875_parallel_replicas_remote.reference
new file mode 100644
index 00000000000..79ebceba739
--- /dev/null
+++ b/tests/queries/0_stateless/02875_parallel_replicas_remote.reference
@@ -0,0 +1,2 @@
+60
+1
diff --git a/tests/queries/0_stateless/02875_parallel_replicas_remote.sql b/tests/queries/0_stateless/02875_parallel_replicas_remote.sql
new file mode 100644
index 00000000000..f47fc559df9
--- /dev/null
+++ b/tests/queries/0_stateless/02875_parallel_replicas_remote.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS tt;
+CREATE TABLE tt (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
+INSERT INTO tt SELECT * FROM numbers(10);
+
+SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, use_hedged_requests=0, parallel_replicas_for_non_replicated_merge_tree=1;
+SELECT count() FROM remote('127.0.0.{1..6}', currentDatabase(), tt) settings log_comment='02875_89f3c39b-1919-48cb-b66e-ef9904e73146';
+
+SYSTEM FLUSH LOGS;
+
+SET allow_experimental_parallel_reading_from_replicas=0;
+SELECT count() > 0 FROM system.text_log
+WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment = '02875_89f3c39b-1919-48cb-b66e-ef9904e73146')
+    AND message LIKE '%Parallel reading from replicas is disabled for cluster%';
+
+DROP TABLE tt;

From e1019ba3c47f175c6c582ae169517ebe4e297e28 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Mon, 18 Sep 2023 15:27:55 +0000
Subject: [PATCH 088/243] Disabling parallel replicas per shard will be done
 separately

---
 src/Processors/QueryPlan/ReadFromRemote.cpp | 51 +++++++--------------
 1 file changed, 16 insertions(+), 35 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 884bc85aaaf..f8d718b481d 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -236,49 +236,29 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
     scalars["_shard_num"]
         = Block{{DataTypeUInt32().createColumnConst(1, shard.shard_info.shard_num), std::make_shared<DataTypeUInt32>(), "_shard_num"}};
 
-    ContextPtr execution_context = context;
     if (context->canUseParallelReplicas())
     {
-        if (shard.shard_info.getAllNodeCount() > 1)
+        if (context->getSettingsRef().cluster_for_parallel_replicas.changed)
         {
-            if (context->getSettingsRef().cluster_for_parallel_replicas.changed)
-            {
-                const String cluster_for_parallel_replicas = context->getSettingsRef().cluster_for_parallel_replicas;
-                if (cluster_for_parallel_replicas != cluster_name)
-                    LOG_INFO(
-                        log,
-                        "cluster_for_parallel_replicas has been set for the query but has no effect: {}. Distributed table cluster is "
-                        "used: {}",
-                        cluster_for_parallel_replicas,
-                        cluster_name);
-            }
-
-            LOG_TRACE(&Poco::Logger::get("ReadFromRemote"), "Setting `cluster_for_parallel_replicas` to {}", cluster_name);
-            context->setSetting("cluster_for_parallel_replicas", cluster_name);
+            const String cluster_for_parallel_replicas = context->getSettingsRef().cluster_for_parallel_replicas;
+            if (cluster_for_parallel_replicas != cluster_name)
+                LOG_INFO(
+                    log,
+                    "cluster_for_parallel_replicas has been set for the query but has no effect: {}. Distributed table cluster is "
+                    "used: {}",
+                    cluster_for_parallel_replicas,
+                    cluster_name);
         }
-        else
-        {
-            ContextMutablePtr tmp = Context::createCopy(context);
-            tmp->setSetting("allow_experimental_parallel_reading_from_replicas", Field{0});
-            execution_context = tmp;
 
-            LOG_TRACE(
-                &Poco::Logger::get("ReadFromRemote"),
-                "Parallel reading from replicas is disabled for shard. Not enough nodes: shard={} cluster={} nodes={}",
-                shard.shard_info.shard_num,
-                cluster_name,
-                shard.shard_info.getAllNodeCount());
-        }
+        LOG_TRACE(log, "Setting `cluster_for_parallel_replicas` to {}", cluster_name);
+        context->setSetting("cluster_for_parallel_replicas", cluster_name);
     }
 
-    std::shared_ptr<RemoteQueryExecutor> remote_query_executor;
-
-    remote_query_executor = std::make_shared<RemoteQueryExecutor>(
-        shard.shard_info.pool, query_string, output_stream->header, execution_context, throttler, scalars, external_tables, stage);
-
+    auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
+        shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage);
     remote_query_executor->setLogger(log);
 
-    if (execution_context->canUseParallelReplicas())
+    if (context->canUseParallelReplicas())
     {
         // when doing parallel reading from replicas (ParallelReplicasMode::READ_TASKS) on a shard:
         // establish a connection to a replica on the shard, the replica will instantiate coordinator to manage parallel reading from replicas on the shard.
@@ -294,7 +274,8 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
     if (!table_func_ptr)
         remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table);
 
-    pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending));
+    pipes.emplace_back(
+        createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending));
     addConvertingActions(pipes.back(), output_stream->header);
 }
 

From 326eaa9bc583bd0397c97760580679b5b50d46c6 Mon Sep 17 00:00:00 2001
From: Alexander Sapin <alesapin@gmail.com>
Date: Mon, 18 Sep 2023 18:26:53 +0200
Subject: [PATCH 089/243] Better exception message in checkDataPart

---
 src/Storages/MergeTree/checkDataPart.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp
index a327ca17609..1717d91271a 100644
--- a/src/Storages/MergeTree/checkDataPart.cpp
+++ b/src/Storages/MergeTree/checkDataPart.cpp
@@ -123,9 +123,20 @@ static IMergeTreeDataPart::Checksums checkDataPart(
 
     if (data_part_storage.exists(IMergeTreeDataPart::SERIALIZATION_FILE_NAME))
     {
-        auto serialization_file = data_part_storage.readFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, read_settings, std::nullopt, std::nullopt);
-        SerializationInfo::Settings settings{ratio_of_defaults, false};
-        serialization_infos = SerializationInfoByName::readJSON(columns_txt, settings, *serialization_file);
+        try
+        {
+            auto serialization_file = data_part_storage.readFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, read_settings, std::nullopt, std::nullopt);
+            SerializationInfo::Settings settings{ratio_of_defaults, false};
+            serialization_infos = SerializationInfoByName::readJSON(columns_txt, settings, *serialization_file);
+        }
+        catch (const Poco::Exception & ex)
+        {
+            throw Exception(ErrorCodes::CORRUPTED_DATA, "Failed to load {}, with error {}", IMergeTreeDataPart::SERIALIZATION_FILE_NAME, ex.message());
+        }
+        catch (...)
+        {
+            throw;
+        }
     }
 
     auto get_serialization = [&serialization_infos](const auto & column)

From e94908ce2e41fa532ab5de211f498d9b6247c4be Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Mon, 18 Sep 2023 17:14:06 +0000
Subject: [PATCH 090/243] add unit tests + fix bugs

---
 src/IO/ISchedulerNode.h                       |  53 ++++++-
 src/IO/Resource/SemaphoreConstraint.h         |   1 -
 src/IO/Resource/ThrottlerConstraint.h         |  16 +-
 src/IO/Resource/tests/ResourceTest.h          |  26 +++-
 .../tests/gtest_throttler_constraint.cpp      | 145 ++++++++++++++++++
 5 files changed, 224 insertions(+), 17 deletions(-)
 create mode 100644 src/IO/Resource/tests/gtest_throttler_constraint.cpp

diff --git a/src/IO/ISchedulerNode.h b/src/IO/ISchedulerNode.h
index 57d7b1c70e8..628dfd53e41 100644
--- a/src/IO/ISchedulerNode.h
+++ b/src/IO/ISchedulerNode.h
@@ -3,6 +3,7 @@
 #include <Common/ErrorCodes.h>
 #include <Common/Exception.h>
 #include <Common/Priority.h>
+#include <base/defines.h>
 #include <base/types.h>
 
 #include <IO/ResourceRequest.h>
@@ -90,6 +91,7 @@ class EventQueue
 public:
     using Event = std::function<void()>;
     using TimePoint = std::chrono::system_clock::time_point;
+    using Duration = std::chrono::system_clock::duration;
     static constexpr UInt64 not_postponed = 0;
 
     struct Postponed
@@ -188,7 +190,7 @@ public:
             return false;
         else
         {
-            if (postponed.front().key <= std::chrono::system_clock::now())
+            if (postponed.front().key <= now())
             {
                 processPostponed(lock);
                 return true;
@@ -206,17 +208,54 @@ public:
             if (!queue.empty())
                 return processQueue(lock);
             if (postponed.empty())
-                pending.wait(lock);
+                wait(lock);
             else
             {
-                if (postponed.front().key <= std::chrono::system_clock::now())
+                if (postponed.front().key <= now())
                     return processPostponed(lock);
-                pending.wait_until(lock, postponed.front().key);
+                waitUntil(lock, postponed.front().key);
             }
         }
     }
 
+    TimePoint now()
+    {
+        if (auto result = manual_time.load(); likely(result == TimePoint()))
+            return std::chrono::system_clock::now();
+        else
+            return result;
+    }
+
+    /// For testing only
+    void setManualTime(TimePoint value)
+    {
+        std::unique_lock lock{mutex};
+        manual_time.store(value);
+        pending.notify_one();
+    }
+
+    /// For testing only
+    void advanceManualTime(Duration elapsed)
+    {
+        std::unique_lock lock{mutex};
+        manual_time.store(manual_time.load() + elapsed);
+        pending.notify_one();
+    }
+
 private:
+    void wait(std::unique_lock<std::mutex> & lock)
+    {
+        pending.wait(lock);
+    }
+
+    void waitUntil(std::unique_lock<std::mutex> & lock, TimePoint t)
+    {
+        if (likely(manual_time.load() == TimePoint()))
+            pending.wait_until(lock, t);
+        else
+            pending.wait(lock);
+    }
+
     void processQueue(std::unique_lock<std::mutex> & lock)
     {
         Event event = std::move(queue.front());
@@ -239,6 +278,8 @@ private:
     std::deque<Event> queue;
     std::vector<Postponed> postponed;
     UInt64 last_id = 0;
+
+    std::atomic<TimePoint> manual_time{TimePoint()}; // for tests only
 };
 
 /*
@@ -272,12 +313,12 @@ private:
 class ISchedulerNode : private boost::noncopyable
 {
 public:
-    ISchedulerNode(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
+    explicit ISchedulerNode(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
         : event_queue(event_queue_)
         , info(config, config_prefix)
     {}
 
-    virtual ~ISchedulerNode() {}
+    virtual ~ISchedulerNode() = default;
 
     /// Checks if two nodes configuration is equal
     virtual bool equals(ISchedulerNode * other)
diff --git a/src/IO/Resource/SemaphoreConstraint.h b/src/IO/Resource/SemaphoreConstraint.h
index c4ab4ad59ae..f0053ded0af 100644
--- a/src/IO/Resource/SemaphoreConstraint.h
+++ b/src/IO/Resource/SemaphoreConstraint.h
@@ -152,7 +152,6 @@ private:
         return satisfied() && child_active;
     }
 
-private:
     const Int64 max_requests = default_max_requests;
     const Int64 max_cost = default_max_cost;
 
diff --git a/src/IO/Resource/ThrottlerConstraint.h b/src/IO/Resource/ThrottlerConstraint.h
index 48ae60228b0..f456cbb7059 100644
--- a/src/IO/Resource/ThrottlerConstraint.h
+++ b/src/IO/Resource/ThrottlerConstraint.h
@@ -1,15 +1,13 @@
 #pragma once
 
 #include <IO/ISchedulerConstraint.h>
-#include <__chrono/duration.h>
-#include <__chrono/time_point.h>
-#include "IO/ISchedulerNode.h"
 
 #include <chrono>
 #include <mutex>
 #include <limits>
 #include <utility>
 
+
 namespace DB
 {
 
@@ -24,6 +22,8 @@ public:
         : ISchedulerConstraint(event_queue_, config, config_prefix)
         , max_burst(config.getDouble(config_prefix + ".max_burst", 0))
         , max_speed(config.getDouble(config_prefix + ".max_speed", 0))
+        , last_update(event_queue_->now())
+        , tokens(max_burst)
     {}
 
     ~ThrottlerConstraint() override
@@ -144,17 +144,18 @@ private:
         postponed = EventQueue::not_postponed;
         bool was_active = active();
         updateBucket();
-        if (!was_active && active())
+        if (!was_active && active() && parent)
             parent->activateChild(this);
     }
 
     void updateBucket(ResourceCost use = 0)
     {
-        auto now = std::chrono::system_clock::now();
+        auto now = event_queue->now();
         if (max_speed > 0.0)
         {
             double elapsed = std::chrono::nanoseconds(now - last_update).count() / 1e9;
-            tokens = std::min(tokens + max_speed * elapsed - use, max_burst);
+            tokens = std::min(tokens + max_speed * elapsed, max_burst);
+            tokens -= use; // This is done outside min() to avoid passing large requests w/o token consumption after long idle period
 
             // Postpone activation until there is positive amount of tokens
             if (tokens < 0.0)
@@ -162,7 +163,7 @@ private:
                 auto delay_ns = std::chrono::nanoseconds(static_cast<Int64>(-tokens / max_speed * 1e9));
                 if (postponed == EventQueue::not_postponed)
                 {
-                    postponed = event_queue->postpone(std::chrono::time_point_cast<std::chrono::system_clock::duration>(now + delay_ns),
+                    postponed = event_queue->postpone(std::chrono::time_point_cast<EventQueue::Duration>(now + delay_ns),
                         [this] { onPostponed(); });
                     throttling_duration += delay_ns;
                 }
@@ -181,7 +182,6 @@ private:
         return satisfied() && child_active;
     }
 
-private:
     const double max_burst{0}; /// in tokens
     const double max_speed{0}; /// in tokens per second
 
diff --git a/src/IO/Resource/tests/ResourceTest.h b/src/IO/Resource/tests/ResourceTest.h
index 36009b3afbe..072e2dda47c 100644
--- a/src/IO/Resource/tests/ResourceTest.h
+++ b/src/IO/Resource/tests/ResourceTest.h
@@ -173,6 +173,28 @@ public:
         }
     }
 
+    void process(EventQueue::TimePoint now, size_t count_limit = size_t(-1), ResourceCost cost_limit = ResourceCostMax)
+    {
+        event_queue.setManualTime(now);
+
+        while (count_limit > 0 && cost_limit > 0)
+        {
+            processEvents();
+            if (!root_node->isActive())
+                return;
+            if (auto [request, _] = root_node->dequeueRequest(); request)
+            {
+                count_limit--;
+                cost_limit -= request->cost;
+                handle(static_cast<Request *>(request));
+            }
+            else
+            {
+                break;
+            }
+        }
+    }
+
     void handle(Request * request)
     {
         consumed_cost[request->name] += request->cost;
@@ -181,8 +203,8 @@ public:
 
     void consumed(const String & name, ResourceCost value, ResourceCost error = 0)
     {
-        EXPECT_TRUE(consumed_cost[name] >= value - error);
-        EXPECT_TRUE(consumed_cost[name] <= value + error);
+        EXPECT_GE(consumed_cost[name], value - error);
+        EXPECT_LE(consumed_cost[name], value + error);
         consumed_cost[name] -= value;
     }
 
diff --git a/src/IO/Resource/tests/gtest_throttler_constraint.cpp b/src/IO/Resource/tests/gtest_throttler_constraint.cpp
new file mode 100644
index 00000000000..4f12caec6c2
--- /dev/null
+++ b/src/IO/Resource/tests/gtest_throttler_constraint.cpp
@@ -0,0 +1,145 @@
+#include <chrono>
+#include <gtest/gtest.h>
+
+#include <IO/Resource/tests/ResourceTest.h>
+
+#include <IO/Resource/FairPolicy.h>
+#include <IO/Resource/ThrottlerConstraint.h>
+#include "IO/ISchedulerNode.h"
+#include "IO/ResourceRequest.h"
+
+using namespace DB;
+
+using ResourceTest = ResourceTestClass;
+
+TEST(IOResourceThrottlerConstraint, LeakyBucketConstraint)
+{
+    ResourceTest t;
+    EventQueue::TimePoint start = std::chrono::system_clock::now();
+    t.process(start, 0);
+
+    t.add<ThrottlerConstraint>("/", "<max_burst>20.0</max_burst><max_speed>10.0</max_speed>");
+    t.add<FifoQueue>("/A", "");
+
+    t.enqueue("/A", {10, 10, 10, 10, 10, 10, 10, 10});
+
+    t.process(start + std::chrono::seconds(0));
+    t.consumed("A", 30); // It is allowed to go below zero for exactly one resource request
+
+    t.process(start + std::chrono::seconds(1));
+    t.consumed("A", 10);
+
+    t.process(start + std::chrono::seconds(2));
+    t.consumed("A", 10);
+
+    t.process(start + std::chrono::seconds(3));
+    t.consumed("A", 10);
+
+    t.process(start + std::chrono::seconds(4));
+    t.consumed("A", 10);
+
+    t.process(start + std::chrono::seconds(100500));
+    t.consumed("A", 10);
+}
+
+TEST(IOResourceThrottlerConstraint, BucketFilling)
+{
+    ResourceTest t;
+    EventQueue::TimePoint start = std::chrono::system_clock::now();
+    t.process(start, 0);
+
+    t.add<ThrottlerConstraint>("/", "<max_burst>100.0</max_burst><max_speed>10.0</max_speed>");
+    t.add<FifoQueue>("/A", "");
+
+    t.enqueue("/A", {100});
+
+    t.process(start + std::chrono::seconds(0));
+    t.consumed("A", 100); // consume all tokens, but it is still active (not negative)
+
+    t.process(start + std::chrono::seconds(5));
+    t.consumed("A", 0); // There was nothing to consume
+
+    t.enqueue("/A", {10, 10, 10, 10, 10, 10, 10, 10, 10, 10});
+    t.process(start + std::chrono::seconds(5));
+    t.consumed("A", 60); // 5 sec * 10 tokens/sec = 50 tokens + 1 extra request to go below zero
+
+    t.process(start + std::chrono::seconds(100));
+    t.consumed("A", 40); // Consume rest
+
+    t.process(start + std::chrono::seconds(200));
+
+    t.enqueue("/A", {95, 1, 1, 1, 1, 1, 1, 1, 1, 1});
+    t.process(start + std::chrono::seconds(200));
+    t.consumed("A", 101); // check we cannot consume more than max_burst + 1 request
+
+    t.process(start + std::chrono::seconds(100500));
+    t.consumed("A", 3);
+}
+
+TEST(IOResourceThrottlerConstraint, PeekAndAvgLimits)
+{
+    ResourceTest t;
+    EventQueue::TimePoint start = std::chrono::system_clock::now();
+    t.process(start, 0);
+
+    // Burst = 100 token
+    // Peek speed = 50 token/s for 10 seconds
+    // Avg speed = 10 tokens/s afterwards
+    t.add<ThrottlerConstraint>("/", "<max_burst>100.0</max_burst><max_speed>50.0</max_speed>");
+    t.add<ThrottlerConstraint>("/avg", "<max_burst>5000.0</max_burst><max_speed>10.0</max_speed>");
+    t.add<FifoQueue>("/avg/A", "");
+
+    ResourceCost req_cost = 1;
+    ResourceCost total_cost = 10000;
+    for (int i = 0; i < total_cost / req_cost; i++)
+        t.enqueue("/avg/A", {req_cost});
+
+    double consumed = 0;
+    for (int seconds = 0; seconds < 100; seconds++)
+    {
+        t.process(start + std::chrono::seconds(seconds));
+        double arrival_curve = std::min(100.0 + 50.0 * seconds, 5000.0 + 10.0 * seconds) + req_cost;
+        t.consumed("A", static_cast<ResourceCost>(arrival_curve - consumed));
+        consumed = arrival_curve;
+    }
+}
+
+TEST(IOResourceThrottlerConstraint, ThrottlerAndFairness)
+{
+    ResourceTest t;
+    EventQueue::TimePoint start;
+    start += EventQueue::Duration(1000000000);
+    t.process(start, 0);
+
+    t.add<ThrottlerConstraint>("/", "<max_burst>100.0</max_burst><max_speed>10.0</max_speed>");
+    t.add<FairPolicy>("/fair", "");
+    t.add<FifoQueue>("/fair/A", "<weight>10</weight>");
+    t.add<FifoQueue>("/fair/B", "<weight>90</weight>");
+
+    ResourceCost req_cost = 1;
+    ResourceCost total_cost = 2000;
+    for (int i = 0; i < total_cost / req_cost; i++)
+    {
+        t.enqueue("/fair/A", {req_cost});
+        t.enqueue("/fair/B", {req_cost});
+    }
+
+    double shareA = 0.1;
+    double shareB = 0.9;
+
+    // Bandwidth-latency coupling due to fairness: worst latency is inversely proportional to share
+    auto max_latencyA = static_cast<ResourceCost>(req_cost * (1.0 + 1.0 / shareA));
+    auto max_latencyB = static_cast<ResourceCost>(req_cost * (1.0 + 1.0 / shareB));
+
+    double consumedA = 0;
+    double consumedB = 0;
+    for (int seconds = 0; seconds < 100; seconds++)
+    {
+        t.process(start + std::chrono::seconds(seconds));
+        double arrival_curve = 100.0 + 10.0 * seconds + req_cost;
+        t.consumed("A", static_cast<ResourceCost>(arrival_curve * shareA - consumedA), max_latencyA);
+        t.consumed("B", static_cast<ResourceCost>(arrival_curve * shareB - consumedB), max_latencyB);
+        consumedA = arrival_curve * shareA;
+        consumedB = arrival_curve * shareB;
+    }
+}

From c3f8ee589e6ca9790f642fbaafabf34c2854e6ee Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Mon, 18 Sep 2023 17:24:58 +0000
Subject: [PATCH 091/243] fix uninitialized value

---
 src/IO/Resource/ThrottlerConstraint.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/Resource/ThrottlerConstraint.h b/src/IO/Resource/ThrottlerConstraint.h
index f456cbb7059..763cd25cf9c 100644
--- a/src/IO/Resource/ThrottlerConstraint.h
+++ b/src/IO/Resource/ThrottlerConstraint.h
@@ -190,7 +190,7 @@ private:
     double tokens; /// in ResourceCost units
     bool child_active = false;
 
-    std::chrono::nanoseconds throttling_duration;
+    std::chrono::nanoseconds throttling_duration{0};
 
     SchedulerNodePtr child;
 };

From 178780315741c9befb779f2f65cf09d30141a8ee Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Mon, 18 Sep 2023 17:25:16 +0000
Subject: [PATCH 092/243] fix scheduler node registration

---
 src/IO/Resource/ThrottlerConstraint.cpp    | 2 +-
 src/IO/Resource/registerSchedulerNodes.cpp | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/IO/Resource/ThrottlerConstraint.cpp b/src/IO/Resource/ThrottlerConstraint.cpp
index ca52d5f3e83..0ced2b1ab3e 100644
--- a/src/IO/Resource/ThrottlerConstraint.cpp
+++ b/src/IO/Resource/ThrottlerConstraint.cpp
@@ -5,7 +5,7 @@
 namespace DB
 {
 
-void registerSemaphoreConstraint(SchedulerNodeFactory & factory)
+void registerThrottlerConstraint(SchedulerNodeFactory & factory)
 {
     factory.registerMethod<ThrottlerConstraint>("bandwidth_limit");
 }
diff --git a/src/IO/Resource/registerSchedulerNodes.cpp b/src/IO/Resource/registerSchedulerNodes.cpp
index 896f96d7f50..612885e26b7 100644
--- a/src/IO/Resource/registerSchedulerNodes.cpp
+++ b/src/IO/Resource/registerSchedulerNodes.cpp
@@ -10,6 +10,7 @@ namespace DB
 void registerPriorityPolicy(SchedulerNodeFactory &);
 void registerFairPolicy(SchedulerNodeFactory &);
 void registerSemaphoreConstraint(SchedulerNodeFactory &);
+void registerThrottlerConstraint(SchedulerNodeFactory &);
 void registerFifoQueue(SchedulerNodeFactory &);
 
 void registerSchedulerNodes()
@@ -22,6 +23,7 @@ void registerSchedulerNodes()
 
     // ISchedulerConstraint
     registerSemaphoreConstraint(factory);
+    registerThrottlerConstraint(factory);
 
     // ISchedulerQueue
     registerFifoQueue(factory);

From 3a3a00f95e213d57773ae4769e587b3e84850610 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 24 Aug 2023 18:03:22 +0000
Subject: [PATCH 093/243] Bump minimal required compiler to Clang 16

---
 .github/workflows/nightly.yml                 |  4 +--
 cmake/clang_tidy.cmake                        |  4 +--
 cmake/ld.lld.in                               | 17 ---------
 cmake/sanitize.cmake                          |  9 -----
 cmake/tools.cmake                             | 13 ++-----
 contrib/llvm-project-cmake/CMakeLists.txt     |  2 +-
 docker/packager/packager                      | 22 ++++++------
 docker/test/fasttest/run.sh                   |  2 +-
 docker/test/fuzzer/run-fuzzer.sh              |  2 +-
 docker/test/keeper-jepsen/run.sh              |  2 +-
 docker/test/server-jepsen/run.sh              |  2 +-
 docker/test/sqltest/run.sh                    |  2 +-
 docker/test/util/Dockerfile                   |  2 +-
 docs/en/development/build-cross-osx.md        |  8 ++---
 docs/en/development/build.md                  | 10 +++---
 docs/en/development/continuous-integration.md |  2 +-
 docs/en/development/developer-instruction.md  |  2 +-
 tests/ci/ci_config.py                         | 36 +++++++++----------
 .../0_stateless/02252_jit_profile_events.sql  |  2 +-
 .../02723_jit_aggregation_bug_48120.sql       |  2 +-
 20 files changed, 55 insertions(+), 90 deletions(-)
 delete mode 100755 cmake/ld.lld.in

diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 8162dc37223..ed8159b229e 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -82,8 +82,8 @@ jobs:
       SONAR_SCANNER_VERSION: 4.8.0.2856
       SONAR_SERVER_URL: "https://sonarcloud.io"
       BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
-      CC: clang-16
-      CXX: clang++-16
+      CC: clang-17
+      CXX: clang++-17
     steps:
       - name: Check out repository code
         uses: ClickHouse/checkout@v1
diff --git a/cmake/clang_tidy.cmake b/cmake/clang_tidy.cmake
index 96c295b6bb9..4323c20463a 100644
--- a/cmake/clang_tidy.cmake
+++ b/cmake/clang_tidy.cmake
@@ -5,14 +5,14 @@ if (ENABLE_CLANG_TIDY)
 
     find_program (CLANG_TIDY_CACHE_PATH NAMES "clang-tidy-cache")
     if (CLANG_TIDY_CACHE_PATH)
-        find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-16" "clang-tidy-15" "clang-tidy-14" "clang-tidy")
+        find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-17" "clang-tidy-16" "clang-tidy")
 
         # Why do we use ';' here?
         # It's a cmake black magic: https://cmake.org/cmake/help/latest/prop_tgt/LANG_CLANG_TIDY.html#prop_tgt:%3CLANG%3E_CLANG_TIDY
         # The CLANG_TIDY_PATH is passed to CMAKE_CXX_CLANG_TIDY, which follows CXX_CLANG_TIDY syntax.
         set (CLANG_TIDY_PATH "${CLANG_TIDY_CACHE_PATH};${_CLANG_TIDY_PATH}" CACHE STRING "A combined command to run clang-tidy with caching wrapper")
     else ()
-        find_program (CLANG_TIDY_PATH NAMES "clang-tidy-16" "clang-tidy-15" "clang-tidy-14" "clang-tidy")
+        find_program (CLANG_TIDY_PATH NAMES "clang-tidy-17" "clang-tidy-16" "clang-tidy")
     endif ()
 
     if (CLANG_TIDY_PATH)
diff --git a/cmake/ld.lld.in b/cmake/ld.lld.in
deleted file mode 100755
index 78a264a0089..00000000000
--- a/cmake/ld.lld.in
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env bash
-
-# This is a workaround for bug in llvm/clang,
-# that does not produce .debug_aranges with LTO
-#
-# NOTE: this is a temporary solution, that should be removed after upgrading to
-# clang-16/llvm-16.
-#
-# Refs: https://reviews.llvm.org/D133092
-
-# NOTE: only -flto=thin is supported.
-# NOTE: it is not possible to check was there -gdwarf-aranges initially or not.
-if [[ "$*" =~ -plugin-opt=thinlto ]]; then
-    exec "@LLD_PATH@" -plugin-opt=-generate-arange-section "$@"
-else
-    exec "@LLD_PATH@" "$@"
-fi
diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 6ac46bb42fa..b3c8f97a5e1 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -14,15 +14,6 @@ set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
 if (SANITIZE)
     if (SANITIZE STREQUAL "address")
         set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
-        if (COMPILER_CLANG)
-            if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 15 AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 16)
-                # LLVM-15 has a bug in Address Sanitizer, preventing the usage
-                # of 'sanitize-address-use-after-scope', see [1].
-                #
-                #   [1]: https://github.com/llvm/llvm-project/issues/58633
-                set (ASAN_FLAGS "${ASAN_FLAGS} -fno-sanitize-address-use-after-scope")
-            endif()
-        endif()
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
         set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
 
diff --git a/cmake/tools.cmake b/cmake/tools.cmake
index 802907c9dda..7fb45bc15d8 100644
--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@@ -13,7 +13,7 @@ execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE COMPILER
 message (STATUS "Using compiler:\n${COMPILER_SELF_IDENTIFICATION}")
 
 # Require minimum compiler versions
-set (CLANG_MINIMUM_VERSION 15)
+set (CLANG_MINIMUM_VERSION 16)
 set (XCODE_MINIMUM_VERSION 12.0)
 set (APPLE_CLANG_MINIMUM_VERSION 12.0.0)
 
@@ -70,16 +70,7 @@ if (LINKER_NAME)
     if (NOT LLD_PATH)
         message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
     endif ()
-    # This a temporary quirk to emit .debug_aranges with ThinLTO, it is only the case clang/llvm <16
-    if (COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
-        set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld")
-        configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)
-
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
-    else ()
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
-    endif()
-
+    set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
 endif ()
 
 if (LINKER_NAME)
diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt
index 00992f4f792..ce82a10d3eb 100644
--- a/contrib/llvm-project-cmake/CMakeLists.txt
+++ b/contrib/llvm-project-cmake/CMakeLists.txt
@@ -1,4 +1,4 @@
-if (APPLE OR SANITIZE STREQUAL "undefined")
+if (APPLE OR SANITIZE STREQUAL "undefined" OR SANITIZE STREQUAL "memory")
    set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
 else()
    set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
diff --git a/docker/packager/packager b/docker/packager/packager
index 8f203230702..a0e460abacf 100755
--- a/docker/packager/packager
+++ b/docker/packager/packager
@@ -393,18 +393,18 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--compiler",
         choices=(
-            "clang-16",
-            "clang-16-darwin",
-            "clang-16-darwin-aarch64",
-            "clang-16-aarch64",
-            "clang-16-aarch64-v80compat",
-            "clang-16-ppc64le",
-            "clang-16-riscv64",
-            "clang-16-s390x",
-            "clang-16-amd64-compat",
-            "clang-16-freebsd",
+            "clang-17",
+            "clang-17-darwin",
+            "clang-17-darwin-aarch64",
+            "clang-17-aarch64",
+            "clang-17-aarch64-v80compat",
+            "clang-17-ppc64le",
+            "clang-17-riscv64",
+            "clang-17-s390x",
+            "clang-17-amd64-compat",
+            "clang-17-freebsd",
         ),
-        default="clang-16",
+        default="clang-17",
         help="a compiler to use",
     )
     parser.add_argument(
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 296a132d3e3..201ef01cf03 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT
 stage=${stage:-}
 
 # Compiler version, normally set by Dockerfile
-export LLVM_VERSION=${LLVM_VERSION:-16}
+export LLVM_VERSION=${LLVM_VERSION:-17}
 
 # A variable to pass additional flags to CMake.
 # Here we explicitly default it to nothing so that bash doesn't complain about
diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 5426cd9756b..e56fe6ca83a 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -17,7 +17,7 @@ stage=${stage:-}
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 echo "$script_dir"
 repo_dir=ch
-BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"}
+BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-17_debug_none_unsplitted_disable_False_binary"}
 BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
 
 function git_clone_with_retry
diff --git a/docker/test/keeper-jepsen/run.sh b/docker/test/keeper-jepsen/run.sh
index 694d7fcd916..576a0f0ef8e 100644
--- a/docker/test/keeper-jepsen/run.sh
+++ b/docker/test/keeper-jepsen/run.sh
@@ -2,7 +2,7 @@
 set -euo pipefail
 
 
-CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
+CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-17_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
 CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}
 
 
diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh
index 0c3768df813..81e442e65b6 100644
--- a/docker/test/server-jepsen/run.sh
+++ b/docker/test/server-jepsen/run.sh
@@ -2,7 +2,7 @@
 set -euo pipefail
 
 
-CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
+CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-17_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
 CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}
 
 
diff --git a/docker/test/sqltest/run.sh b/docker/test/sqltest/run.sh
index cba1c1dab1f..1d939805c7b 100755
--- a/docker/test/sqltest/run.sh
+++ b/docker/test/sqltest/run.sh
@@ -6,7 +6,7 @@ set -e
 set -u
 set -o pipefail
 
-BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"}
+BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-17_debug_none_unsplitted_disable_False_binary"}
 BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
 
 function wget_with_retry
diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile
index 359041eed03..eb5abce280a 100644
--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
 
 # 15.0.2
-ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16
+ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=17
 
 RUN apt-get update \
     && apt-get install \
diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md
index ce8d1b77526..b70fc36e28e 100644
--- a/docs/en/development/build-cross-osx.md
+++ b/docs/en/development/build-cross-osx.md
@@ -11,14 +11,14 @@ This is intended for continuous integration checks that run on Linux servers. If
 
 The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first.
 
-## Install Clang-16
+## Install Clang-17
 
 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
 For example the commands for Bionic are like:
 
 ``` bash
-sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-16 main" >> /etc/apt/sources.list
-sudo apt-get install clang-16
+sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-17 main" >> /etc/apt/sources.list
+sudo apt-get install clang-17
 ```
 
 ## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
@@ -55,7 +55,7 @@ curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX1
 cd ClickHouse
 mkdir build-darwin
 cd build-darwin
-CC=clang-16 CXX=clang++-16 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
+CC=clang-17 CXX=clang++-17 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
 ninja
 ```
 
diff --git a/docs/en/development/build.md b/docs/en/development/build.md
index 04dbc26aac1..b474c445604 100644
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@@ -57,7 +57,7 @@ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
 
 For other Linux distributions - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
 
-As of April 2023, clang-16 or higher will work.
+As of August 2023, clang-16 or higher will work.
 GCC as a compiler is not supported.
 To build with a specific Clang version:
 
@@ -67,8 +67,8 @@ to see what version you have installed before setting this environment variable.
 :::
 
 ``` bash
-export CC=clang-16
-export CXX=clang++-16
+export CC=clang-17
+export CXX=clang++-17
 ```
 
 ### Checkout ClickHouse Sources {#checkout-clickhouse-sources}
@@ -105,8 +105,8 @@ The build requires the following components:
 
 - Git (used to checkout the sources, not needed for the build)
 - CMake 3.20 or newer
-- Compiler: clang-16 or newer
-- Linker: lld-16 or newer
+- Compiler: clang-17 or newer
+- Linker: lld-17 or newer
 - Ninja
 - Yasm
 - Gawk
diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md
index c76ab738004..eec5ccbb9dc 100644
--- a/docs/en/development/continuous-integration.md
+++ b/docs/en/development/continuous-integration.md
@@ -102,7 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t
 
 ### Report Details
 
-- **Compiler**: `clang-16`, optionally with the name of a target platform
+- **Compiler**: `clang-17`, optionally with the name of a target platform
 - **Build type**: `Debug` or `RelWithDebInfo` (cmake).
 - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
 - **Status**: `success` or `fail`
diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index 1f3ab1aae2c..d632829d6a6 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -152,7 +152,7 @@ While inside the `build` directory, configure your build by running CMake. Befor
     export CC=clang CXX=clang++
     cmake ..
 
-If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-16 CXX=clang++-16`. The clang version will be in the script output.
+If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-17 CXX=clang++-17`. The clang version will be in the script output.
 
 The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building.
 
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index b9ccc23cb2e..cdee24dcd1a 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -87,49 +87,49 @@ class CiConfig:
 CI_CONFIG = CiConfig(
     build_config={
         "package_release": BuildConfig(
-            compiler="clang-16",
+            compiler="clang-17",
             package_type="deb",
             static_binary_name="amd64",
             additional_pkgs=True,
         ),
         "package_aarch64": BuildConfig(
-            compiler="clang-16-aarch64",
+            compiler="clang-17-aarch64",
             package_type="deb",
             static_binary_name="aarch64",
             additional_pkgs=True,
         ),
         "package_asan": BuildConfig(
-            compiler="clang-16",
+            compiler="clang-17",
             sanitizer="address",
             package_type="deb",
         ),
         "package_ubsan": BuildConfig(
-            compiler="clang-16",
+            compiler="clang-17",
             sanitizer="undefined",
             package_type="deb",
         ),
         "package_tsan": BuildConfig(
-            compiler="clang-16",
+            compiler="clang-17",
             sanitizer="thread",
             package_type="deb",
         ),
         "package_msan": BuildConfig(
-            compiler="clang-16",
+            compiler="clang-17",
             sanitizer="memory",
             package_type="deb",
         ),
         "package_debug": BuildConfig(
-            compiler="clang-16",
+            compiler="clang-17",
             debug_build=True,
             package_type="deb",
             comment="Note: sparse checkout was used",
         ),
         "binary_release": BuildConfig(
-            compiler="clang-16",
+            compiler="clang-17",
             package_type="binary",
         ),
         "binary_tidy": BuildConfig(
-            compiler="clang-16",
+            compiler="clang-17",
             debug_build=True,
             package_type="binary",
             static_binary_name="debug-amd64",
@@ -137,48 +137,48 @@ CI_CONFIG = CiConfig(
             comment="clang-tidy is used for static analysis",
         ),
         "binary_darwin": BuildConfig(
-            compiler="clang-16-darwin",
+            compiler="clang-17-darwin",
             package_type="binary",
             static_binary_name="macos",
         ),
         "binary_aarch64": BuildConfig(
-            compiler="clang-16-aarch64",
+            compiler="clang-17-aarch64",
             package_type="binary",
         ),
         "binary_aarch64_v80compat": BuildConfig(
-            compiler="clang-16-aarch64-v80compat",
+            compiler="clang-17-aarch64-v80compat",
             package_type="binary",
             static_binary_name="aarch64v80compat",
             comment="For ARMv8.1 and older",
         ),
         "binary_freebsd": BuildConfig(
-            compiler="clang-16-freebsd",
+            compiler="clang-17-freebsd",
             package_type="binary",
             static_binary_name="freebsd",
         ),
         "binary_darwin_aarch64": BuildConfig(
-            compiler="clang-16-darwin-aarch64",
+            compiler="clang-17-darwin-aarch64",
             package_type="binary",
             static_binary_name="macos-aarch64",
         ),
         "binary_ppc64le": BuildConfig(
-            compiler="clang-16-ppc64le",
+            compiler="clang-17-ppc64le",
             package_type="binary",
             static_binary_name="powerpc64le",
         ),
         "binary_amd64_compat": BuildConfig(
-            compiler="clang-16-amd64-compat",
+            compiler="clang-17-amd64-compat",
             package_type="binary",
             static_binary_name="amd64compat",
             comment="SSE2-only build",
         ),
         "binary_riscv64": BuildConfig(
-            compiler="clang-16-riscv64",
+            compiler="clang-17-riscv64",
             package_type="binary",
             static_binary_name="riscv64",
         ),
         "binary_s390x": BuildConfig(
-            compiler="clang-16-s390x",
+            compiler="clang-17-s390x",
             package_type="binary",
             static_binary_name="s390x",
         ),
diff --git a/tests/queries/0_stateless/02252_jit_profile_events.sql b/tests/queries/0_stateless/02252_jit_profile_events.sql
index ddb95d4fa37..eca3c06f9f1 100644
--- a/tests/queries/0_stateless/02252_jit_profile_events.sql
+++ b/tests/queries/0_stateless/02252_jit_profile_events.sql
@@ -1,4 +1,4 @@
--- Tags: no-fasttest, no-ubsan, no-cpu-aarch64
+-- Tags: no-fasttest, no-ubsan, no-asan, no-msan, no-cpu-aarch64
 
 SET compile_expressions = 1;
 SET min_count_to_compile_expression = 0;
diff --git a/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql
index 04e0fc5e0ba..a9a6d3058b2 100644
--- a/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql
+++ b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql
@@ -1,4 +1,4 @@
--- Tags: no-fasttest, no-ubsan, no-cpu-aarch64
+-- Tags: no-fasttest, no-ubsan, no-msan, no-cpu-aarch64
 
 drop table if exists dummy;
 CREATE TABLE dummy ( num1 Int32, num2 Enum8('foo' = 0, 'bar' = 1, 'tar' = 2) )

From d31a43a3798141d0e91307a46b1b5152451fed23 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 17 Sep 2023 13:36:56 +0000
Subject: [PATCH 094/243] Remove Woboq

---
 CMakeLists.txt                                | 11 -------
 README.md                                     |  1 -
 docker/images.json                            |  8 +----
 docker/test/codebrowser/Dockerfile            | 30 -------------------
 docker/test/codebrowser/build.sh              | 29 ------------------
 docs/en/development/developer-instruction.md  |  2 --
 docs/ru/development/developer-instruction.md  |  2 --
 docs/zh/development/developer-instruction.md  |  2 --
 .../aspell-ignore/en/aspell-dict.txt          |  1 -
 9 files changed, 1 insertion(+), 85 deletions(-)
 delete mode 100644 docker/test/codebrowser/Dockerfile
 delete mode 100755 docker/test/codebrowser/build.sh

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 65ff9dc5384..421f5f7dfe6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -101,17 +101,6 @@ if (ENABLE_FUZZING)
     set (ENABLE_PROTOBUF 1)
 endif()
 
-option (ENABLE_WOBOQ_CODEBROWSER "Build for woboq codebrowser" OFF)
-
-if (ENABLE_WOBOQ_CODEBROWSER)
-    set (ENABLE_EMBEDDED_COMPILER 0)
-    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-poison-system-directories")
-    # woboq codebrowser uses clang tooling, and they could add default system
-    # clang includes, and later clang will warn for those added by itself
-    # includes.
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-poison-system-directories")
-endif()
-
 # Global libraries
 # See:
 # - default_libs.cmake
diff --git a/README.md b/README.md
index 4ff9b9caaa1..6d253342a15 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,6 @@ curl https://clickhouse.com/ | sh
 * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
 * [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
 * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
-* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlighting and navigation.
 * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev.
 * [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements.
 * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
diff --git a/docker/images.json b/docker/images.json
index d895e2da2f0..a70eb9df2ef 100644
--- a/docker/images.json
+++ b/docker/images.json
@@ -1,9 +1,7 @@
 {
     "docker/packager/binary": {
         "name": "clickhouse/binary-builder",
-        "dependent": [
-            "docker/test/codebrowser"
-        ]
+        "dependent": []
     },
     "docker/test/compatibility/centos": {
         "name": "clickhouse/test-old-centos",
@@ -59,10 +57,6 @@
         "name": "clickhouse/upgrade-check",
         "dependent": []
     },
-    "docker/test/codebrowser": {
-        "name": "clickhouse/codebrowser",
-        "dependent": []
-    },
     "docker/test/integration/runner": {
         "only_amd64": true,
         "name": "clickhouse/integration-tests-runner",
diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile
deleted file mode 100644
index 8136fd1fbbc..00000000000
--- a/docker/test/codebrowser/Dockerfile
+++ /dev/null
@@ -1,30 +0,0 @@
-# rebuild in #33610
-# docker build --network=host -t clickhouse/codebrowser .
-# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output clickhouse/codebrowser
-ARG FROM_TAG=latest
-FROM clickhouse/binary-builder:$FROM_TAG
-
-# ARG for quick switch to a given ubuntu mirror
-ARG apt_archive="http://archive.ubuntu.com"
-RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
-
-RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev
-
-ARG TARGETARCH
-RUN arch=${TARGETARCH:-amd64} \
-    && case $arch in \
-        amd64) rarch=x86_64 ;; \
-        arm64) rarch=aarch64 ;; \
-        *) exit 1 ;; \
-    esac
-
-# repo versions doesn't work correctly with C++17
-# also we push reports to s3, so we add index.html to subfolder urls
-# https://github.com/ClickHouse/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
-RUN git clone --branch=master --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \
-  && cd /woboq_codebrowser \
-  && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} -DCLANG_BUILTIN_HEADERS_DIR=/usr/lib/llvm-${LLVM_VERSION}/lib/clang/${LLVM_VERSION}/include \
-  && ninja
-
-COPY build.sh /
-CMD ["bash", "-c", "/build.sh 2>&1"]
diff --git a/docker/test/codebrowser/build.sh b/docker/test/codebrowser/build.sh
deleted file mode 100755
index d76d0c3a039..00000000000
--- a/docker/test/codebrowser/build.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env bash
-
-set -x -e
-
-
-STATIC_DATA=${STATIC_DATA:-/woboq_codebrowser/data}
-SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-/build}
-BUILD_DIRECTORY=${BUILD_DIRECTORY:-/workdir/build}
-OUTPUT_DIRECTORY=${OUTPUT_DIRECTORY:-/workdir/output}
-HTML_RESULT_DIRECTORY=${HTML_RESULT_DIRECTORY:-$OUTPUT_DIRECTORY/html_report}
-SHA=${SHA:-nosha}
-DATA=${DATA:-https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data}
-nproc=$(($(nproc) + 2)) # increase parallelism
-
-read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
-
-mkdir -p "$BUILD_DIRECTORY" && cd "$BUILD_DIRECTORY"
-cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DENABLE_WOBOQ_CODEBROWSER=ON "${CMAKE_FLAGS[@]}"
-mkdir -p "$HTML_RESULT_DIRECTORY"
-echo 'Filter out too noisy "Error: filename" lines and keep them in full codebrowser_generator.log'
-/woboq_codebrowser/generator/codebrowser_generator -b "$BUILD_DIRECTORY" -a \
-  -o "$HTML_RESULT_DIRECTORY" --execute-concurrency="$nproc" -p "ClickHouse:$SOURCE_DIRECTORY:$SHA" \
-  -d "$DATA" \
-    |& ts '%Y-%m-%d %H:%M:%S' \
-    | tee "$OUTPUT_DIRECTORY/codebrowser_generator.log" \
-    | grep --line-buffered -v ':[0-9]* Error: '
-cp -r "$STATIC_DATA" "$HTML_RESULT_DIRECTORY/"
-/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator "$HTML_RESULT_DIRECTORY" \
-  -d "$DATA" |& ts '%Y-%m-%d %H:%M:%S'
diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index d632829d6a6..8ec049dd004 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -276,8 +276,6 @@ Most probably some of the builds will fail at first times. This is due to the fa
 
 ## Browse ClickHouse Source Code {#browse-clickhouse-source-code}
 
-You can use the **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation, semantic highlighting, search and indexing. The code snapshot is updated daily.
-
 You can use GitHub integrated code browser [here](https://github.dev/ClickHouse/ClickHouse).
 
 Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual.
diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md
index 7294bc2ae87..c63622594e4 100644
--- a/docs/ru/development/developer-instruction.md
+++ b/docs/ru/development/developer-instruction.md
@@ -289,6 +289,4 @@ Pull request можно создать, даже если работа над з
 
 ## Навигация по коду ClickHouse {#navigatsiia-po-kodu-clickhouse}
 
-Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно.
-
 Также вы можете просматривать исходники на [GitHub](https://github.com/ClickHouse/ClickHouse).
diff --git a/docs/zh/development/developer-instruction.md b/docs/zh/development/developer-instruction.md
index 557bf33ee0c..1c1d4a03d64 100644
--- a/docs/zh/development/developer-instruction.md
+++ b/docs/zh/development/developer-instruction.md
@@ -270,6 +270,4 @@ ClickHouse成员一旦在您的拉取请求上贴上«可以测试»标签，就
 
 ## 浏览ClickHouse源代码 {#browse-clickhouse-source-code}
 
-您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。
-
 此外，您还可以像往常一样浏览源代码 [GitHub](https://github.com/ClickHouse/ClickHouse)
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index bcb971951e5..2f14edda20d 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -958,7 +958,6 @@ WikiStat
 WindowView
 WithNames
 WithNamesAndTypes
-Woboq
 WordNet
 WriteBuffer
 WriteBuffers

From 54f902f0b7b5977b01868e156cb8a00e71908a2e Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Mon, 18 Sep 2023 19:50:05 +0200
Subject: [PATCH 095/243] Adjust information_schema for Tableau Online

---
 .../InformationSchema/key_column_usage.sql    | 29 ++++++++++++
 .../referential_constraints.sql               | 19 ++++++++
 .../System/InformationSchema/schemata.sql     | 45 +++++++++----------
 .../System/InformationSchema/tables.sql       | 41 ++++++++++-------
 .../System/attachInformationSchemaTables.cpp  | 14 +++++-
 5 files changed, 109 insertions(+), 39 deletions(-)
 create mode 100644 src/Storages/System/InformationSchema/key_column_usage.sql
 create mode 100644 src/Storages/System/InformationSchema/referential_constraints.sql

diff --git a/src/Storages/System/InformationSchema/key_column_usage.sql b/src/Storages/System/InformationSchema/key_column_usage.sql
new file mode 100644
index 00000000000..98b8e9de63e
--- /dev/null
+++ b/src/Storages/System/InformationSchema/key_column_usage.sql
@@ -0,0 +1,29 @@
+ATTACH VIEW key_column_usage
+    (
+     `referenced_table_schema` Nullable(String),
+     `referenced_table_name` Nullable(String),
+     `referenced_column_name` Nullable(String),
+     `table_schema` String,
+     `table_name` String,
+     `column_name` Nullable(String),
+     `ordinal_position` UInt32,
+     `constraint_name` Nullable(String),
+     `REFERENCED_TABLE_SCHEMA` Nullable(String) ALIAS referenced_table_schema,
+     `REFERENCED_TABLE_NAME` Nullable(String) ALIAS referenced_table_name,
+     `REFERENCED_COLUMN_NAME` Nullable(String) ALIAS referenced_column_name,
+     `TABLE_SCHEMA` String ALIAS table_schema,
+     `TABLE_NAME` String ALIAS table_name,
+     `COLUMN_NAME` Nullable(String) ALIAS column_name,
+     `ORDINAL_POSITION` UInt32 ALIAS ordinal_position,
+     `CONSTRAINT_NAME` Nullable(String) ALIAS constraint_name
+) AS
+SELECT NULL      AS `referenced_table_schema`,
+       NULL      AS `referenced_table_name`,
+       NULL      AS `referenced_column_name`,
+       database  AS `table_schema`,
+       table     AS `table_name`,
+       name      AS `column_name`,
+       position  AS `ordinal_position`,
+       'PRIMARY' AS `constraint_name`
+FROM system.columns
+WHERE is_in_primary_key;
diff --git a/src/Storages/System/InformationSchema/referential_constraints.sql b/src/Storages/System/InformationSchema/referential_constraints.sql
new file mode 100644
index 00000000000..73744bf59ac
--- /dev/null
+++ b/src/Storages/System/InformationSchema/referential_constraints.sql
@@ -0,0 +1,19 @@
+ATTACH VIEW referential_constraints
+    (
+     `constraint_name` Nullable(String),
+     `constraint_schema` String,
+     `table_name` String,
+     `update_rule` String,
+     `delete_rule` String,
+     `CONSTRAINT_NAME` Nullable(String) ALIAS constraint_name,
+     `CONSTRAINT_SCHEMA` String ALIAS constraint_schema,
+     `TABLE_NAME` String ALIAS table_name,
+     `UPDATE_RULE` String ALIAS update_rule,
+     `DELETE_RULE` String ALIAS delete_rule
+) AS
+SELECT NULL AS `constraint_name`,
+       ''   AS `constraint_schema`,
+       ''   AS `table_name`,
+       ''   AS `update_rule`,
+       ''   AS `delete_rule`
+WHERE false; -- make sure this view is always empty
diff --git a/src/Storages/System/InformationSchema/schemata.sql b/src/Storages/System/InformationSchema/schemata.sql
index 9686fcbf4fa..449e28769db 100644
--- a/src/Storages/System/InformationSchema/schemata.sql
+++ b/src/Storages/System/InformationSchema/schemata.sql
@@ -1,26 +1,25 @@
 ATTACH VIEW schemata
-(
-    `catalog_name` String,
-    `schema_name` String,
-    `schema_owner` String,
-    `default_character_set_catalog` Nullable(String),
-    `default_character_set_schema` Nullable(String),
-    `default_character_set_name` Nullable(String),
-    `sql_path` Nullable(String),
-    `CATALOG_NAME` String ALIAS catalog_name,
-    `SCHEMA_NAME` String ALIAS schema_name,
-    `SCHEMA_OWNER` String ALIAS schema_owner,
-    `DEFAULT_CHARACTER_SET_CATALOG` Nullable(String) ALIAS default_character_set_catalog,
-    `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String) ALIAS default_character_set_schema,
-    `DEFAULT_CHARACTER_SET_NAME` Nullable(String) ALIAS default_character_set_name,
-    `SQL_PATH` Nullable(String) ALIAS sql_path
+    (
+     `catalog_name` String,
+     `schema_name` String,
+     `schema_owner` String,
+     `default_character_set_catalog` Nullable(String),
+     `default_character_set_schema` Nullable(String),
+     `default_character_set_name` Nullable(String),
+     `sql_path` Nullable(String),
+     `CATALOG_NAME` String ALIAS catalog_name,
+     `SCHEMA_NAME` String ALIAS schema_name,
+     `SCHEMA_OWNER` String ALIAS schema_owner,
+     `DEFAULT_CHARACTER_SET_CATALOG` Nullable(String) ALIAS default_character_set_catalog,
+     `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String) ALIAS default_character_set_schema,
+     `DEFAULT_CHARACTER_SET_NAME` Nullable(String) ALIAS default_character_set_name,
+     `SQL_PATH` Nullable(String) ALIAS sql_path
 ) AS
-SELECT
-    name AS catalog_name,
-    name AS schema_name,
-    'default' AS schema_owner,
-    NULL AS default_character_set_catalog,
-    NULL AS default_character_set_schema,
-    NULL AS default_character_set_name,
-    NULL AS sql_path
+SELECT name         AS `catalog_name`,
+       name         AS `schema_name`,
+       'default'    AS `schema_owner`,
+       NULL         AS `default_character_set_catalog`,
+       NULL         AS `default_character_set_schema`,
+       NULL         AS `default_character_set_name`,
+       NULL         AS `sql_path`
 FROM system.databases
diff --git a/src/Storages/System/InformationSchema/tables.sql b/src/Storages/System/InformationSchema/tables.sql
index 8eea3713923..90b70332dbf 100644
--- a/src/Storages/System/InformationSchema/tables.sql
+++ b/src/Storages/System/InformationSchema/tables.sql
@@ -1,17 +1,28 @@
 ATTACH VIEW tables
-(
-    `table_catalog` String,
-    `table_schema` String,
-    `table_name` String,
-    `table_type` Enum8('BASE TABLE' = 1, 'VIEW' = 2, 'FOREIGN TABLE' = 3, 'LOCAL TEMPORARY' = 4, 'SYSTEM VIEW' = 5),
-    `TABLE_CATALOG` String ALIAS table_catalog,
-    `TABLE_SCHEMA` String ALIAS table_schema,
-    `TABLE_NAME` String ALIAS table_name,
-    `TABLE_TYPE` Enum8('BASE TABLE' = 1, 'VIEW' = 2, 'FOREIGN TABLE' = 3, 'LOCAL TEMPORARY' = 4, 'SYSTEM VIEW' = 5) ALIAS table_type
-) AS
-SELECT
-    database AS table_catalog,
-    database AS table_schema,
-    name AS table_name,
-    multiIf(is_temporary, 4, engine like '%View', 2, engine LIKE 'System%', 5, has_own_data = 0, 3, 1) AS table_type
+    (
+     `table_catalog` String,
+     `table_schema` String,
+     `table_name` String,
+     `table_type` String,
+     `table_comment` String,
+     `table_collation` String,
+     `TABLE_CATALOG` String ALIAS table_catalog,
+     `TABLE_SCHEMA` String ALIAS table_schema,
+     `TABLE_NAME` String ALIAS table_name,
+     `TABLE_TYPE` String ALIAS table_type,
+     `TABLE_COMMENT` String ALIAS table_comment,
+     `TABLE_COLLATION` String ALIAS table_collation
+        ) AS
+SELECT database             AS `table_catalog`,
+       database             AS `table_schema`,
+       name                 AS `table_name`,
+       comment              AS `table_comment`,
+       multiIf(
+               is_temporary, 'LOCAL TEMPORARY',
+               engine LIKE '%View', 'VIEW',
+               engine LIKE 'System%', 'SYSTEM VIEW',
+               has_own_data = 0, 'FOREIGN TABLE',
+               'BASE TABLE'
+           )                AS `table_type`,
+       'utf8mb4'            AS `table_collation`
 FROM system.tables
diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp
index 074a648d235..279720907c3 100644
--- a/src/Storages/System/attachInformationSchemaTables.cpp
+++ b/src/Storages/System/attachInformationSchemaTables.cpp
@@ -12,7 +12,8 @@ INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema
 INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql");
 INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql");
 INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql");
-
+INCBIN(resource_key_column_usage_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/key_column_usage.sql");
+INCBIN(resource_referential_constraints_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/referential_constraints.sql");
 
 namespace DB
 {
@@ -66,6 +67,17 @@ void attachInformationSchema(ContextMutablePtr context, IDatabase & information_
     createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
     createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
     createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
+    createInformationSchemaView(
+        context,
+        information_schema_database,
+        "key_column_usage",
+        std::string_view(reinterpret_cast<const char *>(gresource_key_column_usage_sqlData), gresource_key_column_usage_sqlSize));
+    createInformationSchemaView(
+        context,
+        information_schema_database,
+        "referential_constraints",
+        std::string_view(
+            reinterpret_cast<const char *>(gresource_referential_constraints_sqlData), gresource_referential_constraints_sqlSize));
 }
 
 }

From c5f3170a8294ef305d8293a89687ced09cc49d82 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Mon, 18 Sep 2023 18:12:55 +0000
Subject: [PATCH 096/243] update actual amount of tokens in bucket

---
 src/IO/Resource/ThrottlerConstraint.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/IO/Resource/ThrottlerConstraint.h b/src/IO/Resource/ThrottlerConstraint.h
index 763cd25cf9c..3418306109e 100644
--- a/src/IO/Resource/ThrottlerConstraint.h
+++ b/src/IO/Resource/ThrottlerConstraint.h
@@ -125,7 +125,9 @@ public:
 
     double getTokens() const
     {
-        return tokens;
+        auto now = event_queue->now();
+        double elapsed = std::chrono::nanoseconds(now - last_update).count() / 1e9;
+        return std::min(tokens + max_speed * elapsed, max_burst);
     }
 
     std::chrono::nanoseconds getThrottlingDuration() const

From 04d0b881ea12e18eb4edd6978f88f6e4718a42f4 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Mon, 18 Sep 2023 18:38:05 +0000
Subject: [PATCH 097/243] more docs

---
 docs/en/operations/workload-scheduling.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/operations/workload-scheduling.md b/docs/en/operations/workload-scheduling.md
index e061a2ab413..41efd68c34d 100644
--- a/docs/en/operations/workload-scheduling.md
+++ b/docs/en/operations/workload-scheduling.md
@@ -84,6 +84,8 @@ graph TD
 * `priority` (policy) - selects the next request to serve from one of its children nodes according to static priorities (lower value means higher priority); children nodes can specify `priority` (default is 0).
 * `fifo` (queue) - leaf of the hierarchy capable of holding requests that exceed resource capacity.
 
+To be able to use the full capacity of the underlying resource, you should use `inflight_limit`. Note that a low number of `max_requests` or `max_cost` could lead to not full resource utilization, while too high numbers could lead to empty queues inside the scheduler, which in turn will result in policies being ignored (unfairness or ignoring of priorities) in the subtree. On the other hand, if you want to protect resources from too high utilization, you should use `bandwidth_limit`. It throttles when the amount of resource consumed in `duration` seconds exceeds `max_burst + max_speed * duration` bytes. Two `bandwidth_limit` nodes on the same resource could be used to limit peak bandwidth during short intervals and average bandwidth for longer ones.
+
 The following example shows how to define IO scheduling hierarchies shown in the picture:
 
 ```xml

From 9a0d07c682b7ad55ccc7b418454ed10b5a1ce483 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Mon, 18 Sep 2023 22:14:53 +0200
Subject: [PATCH 098/243] Update information_schema docs

---
 .../system-tables/information_schema.md       | 85 ++++++++++++++++---
 .../InformationSchema/key_column_usage.sql    | 36 +++++---
 .../referential_constraints.sql               | 30 +++++--
 .../System/attachInformationSchemaTables.cpp  | 13 +--
 4 files changed, 125 insertions(+), 39 deletions(-)

diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md
index ee03441b9c1..a532300d336 100644
--- a/docs/en/operations/system-tables/information_schema.md
+++ b/docs/en/operations/system-tables/information_schema.md
@@ -13,16 +13,20 @@ SHOW TABLES FROM information_schema;
 ```
 
 ``` text
-┌─name─────┐
-│ COLUMNS  │
-│ SCHEMATA │
-│ TABLES   │
-│ VIEWS    │
-│ columns  │
-│ schemata │
-│ tables   │
-│ views    │
-└──────────┘
+┌─name────────────────────┐
+│ COLUMNS                 │
+│ KEY_COLUMN_USAGE        │
+│ REFERENTIAL_CONSTRAINTS │
+│ SCHEMATA                │
+│ TABLES                  │
+│ VIEWS                   │
+│ columns                 │
+│ key_column_usage        │
+│ referential_constraints │
+│ schemata                │
+│ tables                  │
+│ views                   │
+└─────────────────────────┘
 ```
 
 `INFORMATION_SCHEMA` contains the following views:
@@ -31,6 +35,8 @@ SHOW TABLES FROM information_schema;
 - [SCHEMATA](#schemata)
 - [TABLES](#tables)
 - [VIEWS](#views)
+- [KEY_COLUMN_USAGE](#key_column_usage)
+- [REFERENTIAL_CONSTRAINTS](#referential_constraints)
 
 Case-insensitive equivalent views, e.g. `INFORMATION_SCHEMA.columns` are provided for reasons of compatibility with other databases.
 
@@ -218,3 +224,62 @@ is_trigger_updatable:       NO
 is_trigger_deletable:       NO
 is_trigger_insertable_into: NO
 ```
+
+## KEY_COLUMN_USAGE (#key_column_usage)
+
+It was added for compatibility with third party tools such as Tableau Online. Contains only the primary keys columns read from [system.columns](../../operations/system-tables/columns.md).
+
+Columns:
+
+- `constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the catalog to which the constraint belongs. This value is always `def`.
+- `constraint_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema (database) to which the constraint belongs.
+- `constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the constraint.
+- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the catalog to which the table belongs. This value is always `def`.
+- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema (database) to which the table belongs.
+- `table_name` ([String](../../sql-reference/data-types/string.md)) — The name of the table that has the constraint.
+- `column_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the column that has the constraint.
+- `ordinal_position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The column's position within the constraint, not the column's position within the table. Column positions are numbered beginning with 1.
+- `position_in_unique_constraint` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Always `NULL`.
+- `referenced_table_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the schema referenced by the constraint. Always `NULL`.
+- `referenced_table_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the table referenced by the constraint. Always `NULL`.
+- `referenced_column_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the column referenced by the constraint. Always `NULL`.
+
+**Example**
+
+```sql
+CREATE TABLE test (i UInt32, s String) ENGINE MergeTree ORDER BY i;
+SELECT * FROM information_schema.key_column_usage WHERE table_name = 'test' FORMAT Vertical;
+```
+
+Result:
+
+```
+Row 1:
+──────
+referenced_table_schema: ᴺᵁᴸᴸ
+referenced_table_name:   ᴺᵁᴸᴸ
+referenced_column_name:  ᴺᵁᴸᴸ
+table_schema:            default
+table_name:              test
+column_name:             i
+ordinal_position:        1
+constraint_name:         PRIMARY
+```
+
+## REFERENTIAL_CONSTRAINTS (#referential_constraints)
+
+It was added for compatibility with third party tools such as Tableau Online. Reads no data by design, selects from this view will always yield an empty result set.
+
+Columns:
+
+- `constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the catalog to which the constraint belongs.
+- `constraint_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema (database) to which the constraint belongs.
+- `constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the constraint.
+- `unique_constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the catalog containing the unique constraint that the constraint references.
+- `unique_constraint_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema containing the unique constraint that the constraint references.
+- `unique_constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the unique constraint that the constraint references.
+- `match_option` ([String](../../sql-reference/data-types/string.md)) — The value of the constraint `MATCH` attribute.
+- `update_rule` ([String](../../sql-reference/data-types/string.md)) — The value of the constraint `ON UPDATE` attribute.
+- `delete_rule` ([String](../../sql-reference/data-types/string.md)) — The value of the constraint `ON DELETE` attribute.
+- `table_name` ([String](../../sql-reference/data-types/string.md)) — The name of the table.
+- `referenced_table_name` ([String](../../sql-reference/data-types/string.md)) — The name of the table referenced by the constraint.
diff --git a/src/Storages/System/InformationSchema/key_column_usage.sql b/src/Storages/System/InformationSchema/key_column_usage.sql
index 98b8e9de63e..bd8ad27f567 100644
--- a/src/Storages/System/InformationSchema/key_column_usage.sql
+++ b/src/Storages/System/InformationSchema/key_column_usage.sql
@@ -1,29 +1,41 @@
 ATTACH VIEW key_column_usage
     (
-     `referenced_table_schema` Nullable(String),
-     `referenced_table_name` Nullable(String),
-     `referenced_column_name` Nullable(String),
+     `constraint_catalog` String,
+     `constraint_schema` String,
+     `constraint_name` Nullable(String),
+     `table_catalog` String,
      `table_schema` String,
      `table_name` String,
      `column_name` Nullable(String),
      `ordinal_position` UInt32,
-     `constraint_name` Nullable(String),
-     `REFERENCED_TABLE_SCHEMA` Nullable(String) ALIAS referenced_table_schema,
-     `REFERENCED_TABLE_NAME` Nullable(String) ALIAS referenced_table_name,
-     `REFERENCED_COLUMN_NAME` Nullable(String) ALIAS referenced_column_name,
+     `position_in_unique_constraint` Nullable(UInt32),
+     `referenced_table_schema` Nullable(String),
+     `referenced_table_name` Nullable(String),
+     `referenced_column_name` Nullable(String),
+     `CONSTRAINT_CATALOG` Nullable(String) ALIAS constraint_catalog, 
+     `CONSTRAINT_SCHEMA` Nullable(String) ALIAS constraint_schema,
+     `CONSTRAINT_NAME` Nullable(String) ALIAS constraint_name,
+     `TABLE_CATALOG` String ALIAS table_catalog,
      `TABLE_SCHEMA` String ALIAS table_schema,
      `TABLE_NAME` String ALIAS table_name,
      `COLUMN_NAME` Nullable(String) ALIAS column_name,
      `ORDINAL_POSITION` UInt32 ALIAS ordinal_position,
-     `CONSTRAINT_NAME` Nullable(String) ALIAS constraint_name
+     `POSITION_IN_UNIQUE_CONSTRAINT` Nullable(UInt32) ALIAS position_in_unique_constraint,
+     `REFERENCED_TABLE_SCHEMA` Nullable(String) ALIAS referenced_table_schema,
+     `REFERENCED_TABLE_NAME` Nullable(String) ALIAS referenced_table_name,
+     `REFERENCED_COLUMN_NAME` Nullable(String) ALIAS referenced_column_name
 ) AS
-SELECT NULL      AS `referenced_table_schema`,
-       NULL      AS `referenced_table_name`,
-       NULL      AS `referenced_column_name`,
+SELECT 'def'     AS `constraint_catalog`,
+       database  AS `constraint_schema`,
+       'PRIMARY' AS `constraint_name`,
+       'def'     AS `table_catalog`,
        database  AS `table_schema`,
        table     AS `table_name`,
        name      AS `column_name`,
        position  AS `ordinal_position`,
-       'PRIMARY' AS `constraint_name`
+       NULL      AS `position_in_unique_constraint`,
+       NULL      AS `referenced_table_schema`,
+       NULL      AS `referenced_table_name`,
+       NULL      AS `referenced_column_name`
 FROM system.columns
 WHERE is_in_primary_key;
diff --git a/src/Storages/System/InformationSchema/referential_constraints.sql b/src/Storages/System/InformationSchema/referential_constraints.sql
index 73744bf59ac..b33b5a80ec7 100644
--- a/src/Storages/System/InformationSchema/referential_constraints.sql
+++ b/src/Storages/System/InformationSchema/referential_constraints.sql
@@ -1,19 +1,37 @@
 ATTACH VIEW referential_constraints
     (
-     `constraint_name` Nullable(String),
+     `constraint_catalog` String,
      `constraint_schema` String,
-     `table_name` String,
+     `constraint_name` Nullable(String),
+     `unique_constraint_catalog` String,
+     `unique_constraint_schema` String,
+     `unique_constraint_name` Nullable(String),
+     `match_option` String,
      `update_rule` String,
      `delete_rule` String,
-     `CONSTRAINT_NAME` Nullable(String) ALIAS constraint_name,
+     `table_name` String,
+     `referenced_table_name` String,
+     `CONSTRAINT_CATALOG` String ALIAS constraint_catalog,
      `CONSTRAINT_SCHEMA` String ALIAS constraint_schema,
-     `TABLE_NAME` String ALIAS table_name,
+     `CONSTRAINT_NAME` Nullable(String) ALIAS constraint_name,
+     `UNIQUE_CONSTRAINT_CATALOG` String ALIAS unqiue_constraint_catalog,
+     `UNIQUE_CONSTRAINT_SCHEMA` String ALIAS unqiue_constraint_schema,
+     `UNIQUE_CONSTRAINT_NAME` Nullable(String) ALIAS unqiue_constraint_name,
+     `MATCH_OPTION` String ALIAS match_option,
      `UPDATE_RULE` String ALIAS update_rule,
      `DELETE_RULE` String ALIAS delete_rule
+     `TABLE_NAME` String ALIAS table_name,
+     `REFERENCED_TABLE_NAME` String ALIAS referenced_table_name
 ) AS
-SELECT NULL AS `constraint_name`,
+SELECT ''   AS `constraint_catalog`,
+       NULL AS `constraint_name`,
        ''   AS `constraint_schema`,
-       ''   AS `table_name`,
+       ''   AS `unique_constraint_catalog`,
+       NULL AS `unique_constraint_name`,
+       ''   AS `unique_constraint_schema`,
+       ''   AS `match_option`,
        ''   AS `update_rule`,
        ''   AS `delete_rule`
+       ''   AS `table_name`,
+       ''   AS `referenced_table_name`
 WHERE false; -- make sure this view is always empty
diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp
index 279720907c3..d4775bf0d4a 100644
--- a/src/Storages/System/attachInformationSchemaTables.cpp
+++ b/src/Storages/System/attachInformationSchemaTables.cpp
@@ -67,17 +67,8 @@ void attachInformationSchema(ContextMutablePtr context, IDatabase & information_
     createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
     createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
     createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
-    createInformationSchemaView(
-        context,
-        information_schema_database,
-        "key_column_usage",
-        std::string_view(reinterpret_cast<const char *>(gresource_key_column_usage_sqlData), gresource_key_column_usage_sqlSize));
-    createInformationSchemaView(
-        context,
-        information_schema_database,
-        "referential_constraints",
-        std::string_view(
-            reinterpret_cast<const char *>(gresource_referential_constraints_sqlData), gresource_referential_constraints_sqlSize));
+    createInformationSchemaView(context, information_schema_database, "key_column_usage", std::string_view(reinterpret_cast<const char *>(gresource_key_column_usage_sqlData), gresource_key_column_usage_sqlSize));
+    createInformationSchemaView(context, information_schema_database, "referential_constraints", std::string_view(reinterpret_cast<const char *>(gresource_referential_constraints_sqlData), gresource_referential_constraints_sqlSize));
 }
 
 }

From ad7b707658dbc313bcec84f13f630dbda28c295e Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Mon, 18 Sep 2023 22:30:27 +0200
Subject: [PATCH 099/243] Update information_schema.tables sql and docs

---
 .../system-tables/information_schema.md       |  4 +-
 .../System/InformationSchema/tables.sql       | 52 +++++++++----------
 2 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md
index a532300d336..b990a312019 100644
--- a/docs/en/operations/system-tables/information_schema.md
+++ b/docs/en/operations/system-tables/information_schema.md
@@ -153,12 +153,14 @@ Columns:
 - `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located.
 - `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located.
 - `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name.
-- `table_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Table type. Possible values:
+- `table_type` ([String](../../sql-reference/data-types/string.md)) — Table type. Possible values:
     - `BASE TABLE`
     - `VIEW`
     - `FOREIGN TABLE`
     - `LOCAL TEMPORARY`
     - `SYSTEM VIEW`
+- `table_comment` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The comment used when creating the table.
+- `table_collation` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The table default collation. Always `utf8mb4`.
 
 **Example**
 
diff --git a/src/Storages/System/InformationSchema/tables.sql b/src/Storages/System/InformationSchema/tables.sql
index 90b70332dbf..34dbd108342 100644
--- a/src/Storages/System/InformationSchema/tables.sql
+++ b/src/Storages/System/InformationSchema/tables.sql
@@ -1,28 +1,28 @@
 ATTACH VIEW tables
-    (
-     `table_catalog` String,
-     `table_schema` String,
-     `table_name` String,
-     `table_type` String,
-     `table_comment` String,
-     `table_collation` String,
-     `TABLE_CATALOG` String ALIAS table_catalog,
-     `TABLE_SCHEMA` String ALIAS table_schema,
-     `TABLE_NAME` String ALIAS table_name,
-     `TABLE_TYPE` String ALIAS table_type,
-     `TABLE_COMMENT` String ALIAS table_comment,
-     `TABLE_COLLATION` String ALIAS table_collation
-        ) AS
-SELECT database             AS `table_catalog`,
-       database             AS `table_schema`,
-       name                 AS `table_name`,
-       comment              AS `table_comment`,
-       multiIf(
-               is_temporary, 'LOCAL TEMPORARY',
-               engine LIKE '%View', 'VIEW',
-               engine LIKE 'System%', 'SYSTEM VIEW',
-               has_own_data = 0, 'FOREIGN TABLE',
-               'BASE TABLE'
-           )                AS `table_type`,
-       'utf8mb4'            AS `table_collation`
+(
+    `table_catalog` String,
+    `table_schema` String,
+    `table_name` String,
+    `table_type` String,
+    `table_comment` Nullable(String),
+    `table_collation` Nullable(String),
+    `TABLE_CATALOG` String ALIAS table_catalog,
+    `TABLE_SCHEMA` String ALIAS table_schema,
+    `TABLE_NAME` String ALIAS table_name,
+    `TABLE_TYPE` String ALIAS table_type,
+    `TABLE_COMMENT` Nullable(String) ALIAS table_comment,
+    `TABLE_COLLATION` Nullable(String) ALIAS table_collation
+) AS
+SELECT
+    database  AS table_catalog,
+    database  AS table_schema,
+    name      AS table_name,
+    comment   AS table_comment,
+    multiIf(is_temporary,          'LOCAL TEMPORARY', 
+            engine LIKE '%View',   'VIEW', 
+            engine LIKE 'System%', 'SYSTEM VIEW', 
+            has_own_data = 0,      'FOREIGN TABLE', 
+            'BASE TABLE'
+            ) AS table_type,
+    'utf8mb4' AS table_collation
 FROM system.tables

From d91a656dd4f4b0cbb5d41f681fd5f00aa0cb9fd7 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Mon, 18 Sep 2023 22:33:16 +0200
Subject: [PATCH 100/243] Revert schemata.sql format

---
 .../System/InformationSchema/schemata.sql     | 45 ++++++++++---------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/src/Storages/System/InformationSchema/schemata.sql b/src/Storages/System/InformationSchema/schemata.sql
index 449e28769db..9686fcbf4fa 100644
--- a/src/Storages/System/InformationSchema/schemata.sql
+++ b/src/Storages/System/InformationSchema/schemata.sql
@@ -1,25 +1,26 @@
 ATTACH VIEW schemata
-    (
-     `catalog_name` String,
-     `schema_name` String,
-     `schema_owner` String,
-     `default_character_set_catalog` Nullable(String),
-     `default_character_set_schema` Nullable(String),
-     `default_character_set_name` Nullable(String),
-     `sql_path` Nullable(String),
-     `CATALOG_NAME` String ALIAS catalog_name,
-     `SCHEMA_NAME` String ALIAS schema_name,
-     `SCHEMA_OWNER` String ALIAS schema_owner,
-     `DEFAULT_CHARACTER_SET_CATALOG` Nullable(String) ALIAS default_character_set_catalog,
-     `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String) ALIAS default_character_set_schema,
-     `DEFAULT_CHARACTER_SET_NAME` Nullable(String) ALIAS default_character_set_name,
-     `SQL_PATH` Nullable(String) ALIAS sql_path
+(
+    `catalog_name` String,
+    `schema_name` String,
+    `schema_owner` String,
+    `default_character_set_catalog` Nullable(String),
+    `default_character_set_schema` Nullable(String),
+    `default_character_set_name` Nullable(String),
+    `sql_path` Nullable(String),
+    `CATALOG_NAME` String ALIAS catalog_name,
+    `SCHEMA_NAME` String ALIAS schema_name,
+    `SCHEMA_OWNER` String ALIAS schema_owner,
+    `DEFAULT_CHARACTER_SET_CATALOG` Nullable(String) ALIAS default_character_set_catalog,
+    `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String) ALIAS default_character_set_schema,
+    `DEFAULT_CHARACTER_SET_NAME` Nullable(String) ALIAS default_character_set_name,
+    `SQL_PATH` Nullable(String) ALIAS sql_path
 ) AS
-SELECT name         AS `catalog_name`,
-       name         AS `schema_name`,
-       'default'    AS `schema_owner`,
-       NULL         AS `default_character_set_catalog`,
-       NULL         AS `default_character_set_schema`,
-       NULL         AS `default_character_set_name`,
-       NULL         AS `sql_path`
+SELECT
+    name AS catalog_name,
+    name AS schema_name,
+    'default' AS schema_owner,
+    NULL AS default_character_set_catalog,
+    NULL AS default_character_set_schema,
+    NULL AS default_character_set_name,
+    NULL AS sql_path
 FROM system.databases

From c3d12ce0e1baafec63398e19732e5f153afe46aa Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Tue, 19 Sep 2023 08:18:37 +0000
Subject: [PATCH 101/243] update comment

---
 src/Storages/StorageMySQL.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h
index a98afc7ac4d..5303117cf5c 100644
--- a/src/Storages/StorageMySQL.h
+++ b/src/Storages/StorageMySQL.h
@@ -20,7 +20,6 @@ class NamedCollection;
 
 /** Implements storage in the MySQL database.
   * Use ENGINE = mysql(host_port, database_name, table_name, user_name, password)
-  * Read only.
   */
 class StorageMySQL final : public IStorage, WithContext
 {

From 142a4631f14f02ad198e9c67700f44ec1e001dae Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 19 Sep 2023 11:22:10 +0200
Subject: [PATCH 102/243] Updated to clear current_row_sources when cleanedup
 rows are added

---
 .../Merges/Algorithms/ReplacingSortedAlgorithm.cpp         | 7 ++++++-
 .../00577_replacing_merge_tree_vertical_merge.sql          | 5 +++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
index 18f144bee3d..0c0598171b3 100644
--- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
@@ -28,7 +28,6 @@ ReplacingSortedAlgorithm::ReplacingSortedAlgorithm(
     , cleanup(cleanup_)
     , cleanedup_rows_count(cleanedup_rows_count_)
 {
-
     if (!is_deleted_column.empty())
         is_deleted_column_number = header_.getPositionByName(is_deleted_column);
     if (!version_column.empty())
@@ -84,7 +83,10 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge()
                     if (!cleanup || !value)
                         insertRow();
                     else if (cleanup && cleanedup_rows_count != nullptr)
+                    {
                         *cleanedup_rows_count += current_row_sources.size();
+                        current_row_sources.resize(0);
+                    }
                 }
                 else
                     insertRow();
@@ -142,7 +144,10 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge()
             if (!cleanup || !value)
                 insertRow();
             else if (cleanup && cleanedup_rows_count != nullptr)
+            {
                 *cleanedup_rows_count += current_row_sources.size();
+                current_row_sources.resize(0);
+            }
         }
         else
             insertRow();
diff --git a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql
index 931297fdd3b..e3c1bb10426 100644
--- a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql
+++ b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql
@@ -7,7 +7,7 @@ create table tab_00577 (date Date, version UInt64, val UInt64) engine = Replacin
 insert into tab_00577 values ('2018-01-01', 2, 2), ('2018-01-01', 1, 1);
 insert into tab_00577 values ('2018-01-01', 0, 0);
 select * from tab_00577 order by version;
-OPTIMIZE TABLE tab_00577;
+OPTIMIZE TABLE tab_00577 FINAL CLEANUP;
 select * from tab_00577;
 drop table tab_00577;
 
@@ -15,7 +15,8 @@ drop table tab_00577;
 DROP TABLE IF EXISTS testCleanupR1;
 CREATE TABLE testCleanupR1 (uid String, version UInt32, is_deleted UInt8)
     ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version, is_deleted)
-    ORDER BY uid SETTINGS enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0;
+    ORDER BY uid SETTINGS enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 0, vertical_merge_algorithm_min_columns_to_activate = 0, min_rows_for_wide_part = 0,
+    min_bytes_for_wide_part = 0;
 INSERT INTO testCleanupR1 (*) VALUES ('d1', 1, 0),('d2', 1, 0),('d3', 1, 0),('d4', 1, 0);
 INSERT INTO testCleanupR1 (*) VALUES ('d3', 2, 1);
 INSERT INTO testCleanupR1 (*) VALUES ('d1', 2, 1);

From 834ec7b3823a8a36ea283e30adfacc82026cf3ed Mon Sep 17 00:00:00 2001
From: lgbo-ustc <lgbo.ustc@gmail.com>
Date: Thu, 14 Sep 2023 09:26:30 +0800
Subject: [PATCH 103/243] reuse GeneratorJSONPath

---
 src/Functions/FunctionSQLJSON.h                    | 14 +++++++-------
 .../JSONPath/Generator/GeneratorJSONPath.h         | 10 ++++++++++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h
index 55ffd817bb1..1786b613f98 100644
--- a/src/Functions/FunctionSQLJSON.h
+++ b/src/Functions/FunctionSQLJSON.h
@@ -199,6 +199,7 @@ public:
 
             /// Parse JSON for every row
             Impl impl;
+            GeneratorJSONPath<JSONParser> generator_json_path(res);
             for (const auto i : collections::range(0, input_rows_count))
             {
                 std::string_view json{
@@ -208,7 +209,9 @@ public:
                 bool added_to_column = false;
                 if (document_ok)
                 {
-                    added_to_column = impl.insertResultToColumn(*to, document, res, context);
+                    // Instead of creating a new generator for each row, we can reuse the same one.
+                    generator_json_path.reinitialize();
+                    added_to_column = impl.insertResultToColumn(*to, document, generator_json_path, context);
                 }
                 if (!added_to_column)
                 {
@@ -287,9 +290,8 @@ public:
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr, const ContextPtr &)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const ContextPtr &)
     {
-        GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
         Element current_element = root;
         VisitorStatus status;
         while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
@@ -337,9 +339,8 @@ public:
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr, const ContextPtr & context)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const ContextPtr & context)
     {
-        GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
         Element current_element = root;
         VisitorStatus status;
 
@@ -405,11 +406,10 @@ public:
 
     static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }
 
-    static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr, const ContextPtr &)
+    static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath<JSONParser> & generator_json_path, const ContextPtr &)
     {
         ColumnString & col_str = assert_cast<ColumnString &>(dest);
 
-        GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
         Element current_element = root;
         VisitorStatus status;
         bool success = false;
diff --git a/src/Functions/JSONPath/Generator/GeneratorJSONPath.h b/src/Functions/JSONPath/Generator/GeneratorJSONPath.h
index be02656b07d..de62be98d31 100644
--- a/src/Functions/JSONPath/Generator/GeneratorJSONPath.h
+++ b/src/Functions/JSONPath/Generator/GeneratorJSONPath.h
@@ -105,6 +105,16 @@ public:
         }
     }
 
+    void reinitialize()
+    {
+        while (current_visitor >= 0)
+        {
+            visitors[current_visitor]->reinitialize();
+            current_visitor--;
+        }
+        current_visitor = 0;
+    }
+
 private:
     bool updateVisitorsForNextRun()
     {

From b2ed38742ccc45efcf3d91e9f6def8154c089473 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Tue, 19 Sep 2023 11:32:05 +0200
Subject: [PATCH 104/243] Update src/IO/ISchedulerNode.h

Co-authored-by: vdimir <vdimir@clickhouse.com>
---
 src/IO/ISchedulerNode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/ISchedulerNode.h b/src/IO/ISchedulerNode.h
index 628dfd53e41..126daeb1ee1 100644
--- a/src/IO/ISchedulerNode.h
+++ b/src/IO/ISchedulerNode.h
@@ -139,7 +139,7 @@ public:
             if (i->id == postponed_id)
             {
                 postponed.erase(i);
-                // It is O(n), but we do not expect neither big heaps, nor frequent cancels. So it is fine.
+                // It is O(n), but we do not expect either big heaps or frequent cancels. So it is fine.
                 std::make_heap(postponed.begin(), postponed.end());
                 return true;
             }

From c7ddbab9bc49757e41b737e85cbea8e1176a47bb Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 19 Sep 2023 13:06:19 +0300
Subject: [PATCH 105/243] Updated implementation

---
 src/DataTypes/Utils.cpp                 | 231 ++++++++++++++++++++++++
 src/DataTypes/Utils.h                   |  19 ++
 src/Functions/FunctionHelpers.cpp       |   1 -
 src/Storages/MergeTree/KeyCondition.cpp |  34 +++-
 4 files changed, 275 insertions(+), 10 deletions(-)
 create mode 100644 src/DataTypes/Utils.cpp
 create mode 100644 src/DataTypes/Utils.h

diff --git a/src/DataTypes/Utils.cpp b/src/DataTypes/Utils.cpp
new file mode 100644
index 00000000000..0168c6d256c
--- /dev/null
+++ b/src/DataTypes/Utils.cpp
@@ -0,0 +1,231 @@
+#include <DataTypes/Utils.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeMap.h>
+#include <DataTypes/DataTypeTuple.h>
+
+namespace DB
+{
+
+bool canBeSafelyCasted(const DataTypePtr & from_type, const DataTypePtr & to_type)
+{
+    auto from_which_type = WhichDataType(from_type->getTypeId());
+    bool to_type_was_nullable = isNullableOrLowCardinalityNullable(to_type);
+    auto to_type_unwrapped = removeNullable(removeLowCardinality(to_type));
+
+    if (from_type->equals(*to_type))
+        return true;
+
+    auto to_which_type = WhichDataType(to_type->getTypeId());
+
+    switch (from_which_type.idx)
+    {
+        case TypeIndex::UInt8:
+        case TypeIndex::UInt16:
+        case TypeIndex::UInt32:
+        case TypeIndex::UInt64:
+        case TypeIndex::UInt128:
+        case TypeIndex::UInt256:
+        {
+            if (to_which_type.isUInt() &&
+                to_type_unwrapped->getSizeOfValueInMemory() >= from_type->getSizeOfValueInMemory())
+                return true;
+
+            if (to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::Int8:
+        case TypeIndex::Int16:
+        case TypeIndex::Int32:
+        case TypeIndex::Int64:
+        case TypeIndex::Int128:
+        case TypeIndex::Int256:
+        {
+            if (to_which_type.isInt() &&
+                to_type_unwrapped->getSizeOfValueInMemory() >= from_type->getSizeOfValueInMemory())
+                return true;
+
+            if (to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::Float32:
+        {
+            if (to_which_type.isFloat64() || to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::Float64:
+        case TypeIndex::Date:
+        case TypeIndex::Date32:
+        case TypeIndex::DateTime:
+        case TypeIndex::DateTime64:
+        case TypeIndex::FixedString:
+        case TypeIndex::Enum8:
+        case TypeIndex::Enum16:
+        case TypeIndex::IPv6:
+        {
+            if (to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::Decimal32:
+        case TypeIndex::Decimal64:
+        case TypeIndex::Decimal128:
+        case TypeIndex::Decimal256:
+        {
+            if (to_which_type.isDecimal())
+            {
+                auto from_type_decimal_precision = getDecimalPrecision(*from_type);
+                auto to_type_decimal_precision = getDecimalPrecision(*to_type_unwrapped);
+                if (from_type_decimal_precision > to_type_decimal_precision)
+                    return false;
+
+                auto from_type_decimal_scale = getDecimalScale(*from_type);
+                auto to_type_decimal_scale = getDecimalScale(*to_type_unwrapped);
+                if (from_type_decimal_scale > to_type_decimal_scale)
+                    return false;
+
+                return true;
+            }
+
+            if (to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::UUID:
+        {
+            if (to_which_type.isUInt128() || to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::IPv4:
+        {
+            if (to_which_type.isUInt32() || to_which_type.isUInt64() || to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::Nullable:
+        {
+            if (to_type_was_nullable)
+            {
+                const auto & from_type_nullable = assert_cast<const DataTypeNullable &>(*from_type);
+                return canBeSafelyCasted(from_type_nullable.getNestedType(), to_type_unwrapped);
+            }
+
+            if (to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::LowCardinality:
+        {
+            const auto & from_type_low_cardinality = assert_cast<const DataTypeLowCardinality &>(*from_type);
+            return canBeSafelyCasted(from_type_low_cardinality.getDictionaryType(), to_type_unwrapped);
+        }
+        case TypeIndex::Array:
+        {
+            if (to_which_type.isArray())
+            {
+                const auto & from_type_array = assert_cast<const DataTypeArray &>(*from_type);
+                const auto & to_type_array = assert_cast<const DataTypeArray &>(*to_type_unwrapped);
+                return canBeSafelyCasted(from_type_array.getNestedType(), to_type_array.getNestedType());
+            }
+
+            if (to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::Map:
+        {
+            if (to_which_type.isMap())
+            {
+                const auto & from_type_map = assert_cast<const DataTypeMap &>(*from_type);
+                const auto & to_type_map = assert_cast<const DataTypeMap &>(*to_type_unwrapped);
+                if (!canBeSafelyCasted(from_type_map.getKeyType(), to_type_map.getKeyType()))
+                    return false;
+
+                if (!canBeSafelyCasted(from_type_map.getValueType(), to_type_map.getValueType()))
+                    return false;
+
+                return true;
+            }
+
+            if (to_which_type.isArray())
+            {
+                // Map nested type is Array(Tuple(key_type, value_type))
+                const auto & from_type_map = assert_cast<const DataTypeMap &>(*from_type);
+                const auto & to_type_array = assert_cast<const DataTypeArray &>(*to_type_unwrapped);
+                const auto * to_type_nested_tuple_type = typeid_cast<const DataTypeTuple *>(to_type_array.getNestedType().get());
+                if (!to_type_nested_tuple_type)
+                    return false;
+
+                const auto & to_type_tuple_elements = to_type_nested_tuple_type->getElements();
+                if (to_type_tuple_elements.size() != 2)
+                    return false;
+
+                if (!canBeSafelyCasted(from_type_map.getKeyType(), to_type_tuple_elements[0]))
+                    return false;
+
+                if (!canBeSafelyCasted(from_type_map.getValueType(), to_type_tuple_elements[1]))
+                    return false;
+
+                return true;
+            }
+
+            if (to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::Tuple:
+        {
+            if (to_which_type.isTuple())
+            {
+                const auto & from_type_tuple = assert_cast<const DataTypeTuple &>(*from_type);
+                const auto & to_type_tuple = assert_cast<const DataTypeTuple &>(*to_type_unwrapped);
+
+                const auto & from_tuple_type_elements = from_type_tuple.getElements();
+                const auto & to_tuple_type_elements = to_type_tuple.getElements();
+
+                size_t lhs_type_elements_size = from_tuple_type_elements.size();
+                if (lhs_type_elements_size != to_tuple_type_elements.size())
+                    return false;
+
+                for (size_t i = 0; i < lhs_type_elements_size; ++i)
+                    if (!canBeSafelyCasted(from_tuple_type_elements[i], to_tuple_type_elements[i]))
+                        return false;
+
+                return true;
+            }
+
+            if (to_which_type.isString())
+                return true;
+
+            return false;
+        }
+        case TypeIndex::String:
+        case TypeIndex::Object:
+        case TypeIndex::Set:
+        case TypeIndex::Interval:
+        case TypeIndex::Function:
+        case TypeIndex::AggregateFunction:
+        case TypeIndex::Nothing:
+            return false;
+    }
+
+    return true;
+}
+
+}
diff --git a/src/DataTypes/Utils.h b/src/DataTypes/Utils.h
new file mode 100644
index 00000000000..bee109f4524
--- /dev/null
+++ b/src/DataTypes/Utils.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+
+namespace DB
+{
+
+/** Returns true if from_type can be safely casted to to_type.
+  *
+  * Examples:
+  * From type UInt8 to type UInt16 returns true.
+  * From type UInt16 to type UInt8 returns false.
+  * From type String to type LowCardinality(String) returns true.
+  * From type LowCardinality(String) to type String returns true.
+  * From type String to type UInt8 returns false.
+  */
+bool canBeSafelyCasted(const DataTypePtr & from_type, const DataTypePtr & to_type);
+
+}
diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp
index 7a9817ad344..6d3c20ef2ca 100644
--- a/src/Functions/FunctionHelpers.cpp
+++ b/src/Functions/FunctionHelpers.cpp
@@ -6,7 +6,6 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnLowCardinality.h>
 #include <Common/assert_cast.h>
-#include <DataTypes/DataTypeNullable.h>
 
 
 namespace DB
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 47521b9887b..7abd708a944 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -3,6 +3,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/FieldToDataType.h>
 #include <DataTypes/getLeastSupertype.h>
+#include <DataTypes/Utils.h>
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/ExpressionActions.h>
@@ -1257,7 +1258,7 @@ bool KeyCondition::tryPrepareSetIndex(
     if (!future_set)
         return false;
 
-    const auto & set_types = future_set->getTypes();
+    const auto set_types = future_set->getTypes();
     size_t set_types_size = set_types.size();
     size_t indexes_mapping_size = indexes_mapping.size();
 
@@ -1283,24 +1284,37 @@ bool KeyCondition::tryPrepareSetIndex(
 
     for (size_t indexes_mapping_index = 0; indexes_mapping_index < indexes_mapping_size; ++indexes_mapping_index)
     {
+        const auto & key_column_type = data_types[indexes_mapping_index];
         size_t set_element_index = indexes_mapping[indexes_mapping_index].tuple_index;
-        const auto & set_element_type = set_types[set_element_index];
-        auto & set_column = set_columns[set_element_index];
+        auto set_element_type = set_types[set_element_index];
+        auto set_column = set_columns[set_element_index];
 
-        bool is_set_column_nullable = set_element_type->isNullable();
-        bool is_set_column_low_cardinality_nullable = set_element_type->isLowCardinalityNullable();
+        if (canBeSafelyCasted(set_element_type, key_column_type))
+        {
+            set_columns[set_element_index] = castColumn({set_column, set_element_type, {}}, key_column_type);
+            continue;
+        }
+
+        if (!key_column_type->canBeInsideNullable())
+            return false;
 
         const NullMap * set_column_null_map = nullptr;
 
-        if (is_set_column_nullable || is_set_column_low_cardinality_nullable)
+        if (isNullableOrLowCardinalityNullable(set_element_type))
         {
-            if (is_set_column_low_cardinality_nullable)
+            if (WhichDataType(set_element_type).isLowCardinality())
+            {
+                set_element_type = removeLowCardinality(set_element_type);
                 set_column = set_column->convertToFullColumnIfLowCardinality();
+            }
 
-            set_column_null_map = &assert_cast<const ColumnNullable &>(*set_column).getNullMapData();
+            set_element_type = removeNullable(set_element_type);
+            const auto & set_column_nullable = assert_cast<const ColumnNullable &>(*set_column);
+            set_column_null_map = &set_column_nullable.getNullMapData();
+            set_column = set_column_nullable.getNestedColumnPtr();
         }
 
-        auto nullable_set_column = castColumnAccurateOrNull({set_column, set_element_type, {}}, data_types[indexes_mapping_index]);
+        auto nullable_set_column = castColumnAccurateOrNull({set_column, set_element_type, {}}, key_column_type);
         const auto & nullable_set_column_typed = assert_cast<const ColumnNullable &>(*nullable_set_column);
         const auto & nullable_set_column_null_map = nullable_set_column_typed.getNullMapData();
         size_t nullable_set_column_null_map_size = nullable_set_column_null_map.size();
@@ -1321,6 +1335,8 @@ bool KeyCondition::tryPrepareSetIndex(
 
             set_column = nullable_set_column_typed.getNestedColumn().filter(filter, 0);
         }
+
+        set_columns[set_element_index] = std::move(set_column);
     }
 
     out.set_index = std::make_shared<MergeTreeSetIndex>(set_columns, std::move(indexes_mapping));

From 71655cda8ea7486ea44e3dd6d8dbddfe62d0d94b Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Tue, 19 Sep 2023 10:08:09 +0000
Subject: [PATCH 106/243] add more tests

---
 .../tests/gtest_throttler_constraint.cpp      | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/src/IO/Resource/tests/gtest_throttler_constraint.cpp b/src/IO/Resource/tests/gtest_throttler_constraint.cpp
index 4f12caec6c2..d3f69212e8b 100644
--- a/src/IO/Resource/tests/gtest_throttler_constraint.cpp
+++ b/src/IO/Resource/tests/gtest_throttler_constraint.cpp
@@ -42,6 +42,43 @@ TEST(IOResourceThrottlerConstraint, LeakyBucketConstraint)
     t.consumed("A", 10);
 }
 
+TEST(IOResourceThrottlerConstraint, Unlimited)
+{
+    ResourceTest t;
+    EventQueue::TimePoint start = std::chrono::system_clock::now();
+    t.process(start, 0);
+
+    t.add<ThrottlerConstraint>("/", "");
+    t.add<FifoQueue>("/A", "");
+
+    for (int i = 0; i < 10; i++)
+    {
+        t.enqueue("/A", {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000});
+        t.process(start + std::chrono::seconds(i / 2)); // Stick to the same time twice
+        t.consumed("A", 11111111);
+    }
+}
+
+TEST(IOResourceThrottlerConstraint, Pacing)
+{
+    ResourceTest t;
+    EventQueue::TimePoint start = std::chrono::system_clock::now();
+    t.process(start, 0);
+
+    // Zero burst allows you to send one request of any `size` and than throttle for `size/max_speed` seconds.
+    // Useful if outgoing traffic should be "paced", i.e. have the least possible burstiness.
+    t.add<ThrottlerConstraint>("/", "<max_burst>0</max_burst><max_speed>1</max_speed>");
+    t.add<FifoQueue>("/A", "");
+
+    t.enqueue("/A", {1, 2, 3, 1, 2, 1});
+    int output[] = {1, 2, 0, 3, 0, 0, 1, 2, 0, 1, 0};
+    for (int i = 0; i < std::size(output); i++)
+    {
+        t.process(start + std::chrono::seconds(i));
+        t.consumed("A", output[i]);
+    }
+}
+
 TEST(IOResourceThrottlerConstraint, BucketFilling)
 {
     ResourceTest t;

From d97b4f0685ce25e8119f4d1eceb762327c024f25 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Tue, 19 Sep 2023 10:09:16 +0000
Subject: [PATCH 107/243] better semantics for locking

---
 src/IO/ISchedulerNode.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/IO/ISchedulerNode.h b/src/IO/ISchedulerNode.h
index 628dfd53e41..0c89bb31788 100644
--- a/src/IO/ISchedulerNode.h
+++ b/src/IO/ISchedulerNode.h
@@ -165,12 +165,12 @@ public:
         std::unique_lock lock{mutex};
         if (!queue.empty())
         {
-            processQueue(lock);
+            processQueue(std::move(lock));
             return true;
         }
         if (!postponed.empty())
         {
-            processPostponed(lock);
+            processPostponed(std::move(lock));
             return true;
         }
         return false;
@@ -183,7 +183,7 @@ public:
         std::unique_lock lock{mutex};
         if (!queue.empty())
         {
-            processQueue(lock);
+            processQueue(std::move(lock));
             return true;
         }
         if (postponed.empty())
@@ -192,7 +192,7 @@ public:
         {
             if (postponed.front().key <= now())
             {
-                processPostponed(lock);
+                processPostponed(std::move(lock));
                 return true;
             }
             return false;
@@ -206,13 +206,13 @@ public:
         while (true)
         {
             if (!queue.empty())
-                return processQueue(lock);
+                return processQueue(std::move(lock));
             if (postponed.empty())
                 wait(lock);
             else
             {
                 if (postponed.front().key <= now())
-                    return processPostponed(lock);
+                    return processPostponed(std::move(lock));
                 waitUntil(lock, postponed.front().key);
             }
         }
@@ -256,7 +256,7 @@ private:
             pending.wait(lock);
     }
 
-    void processQueue(std::unique_lock<std::mutex> & lock)
+    void processQueue(std::unique_lock<std::mutex> && lock)
     {
         Event event = std::move(queue.front());
         queue.pop_front();
@@ -264,7 +264,7 @@ private:
         event();
     }
 
-    void processPostponed(std::unique_lock<std::mutex> & lock)
+    void processPostponed(std::unique_lock<std::mutex> && lock)
     {
         Event event = std::move(*postponed.front().event);
         std::pop_heap(postponed.begin(), postponed.end());

From 311db946405c21a33e06fe9275eb1827d2937ed7 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Tue, 19 Sep 2023 10:10:21 +0000
Subject: [PATCH 108/243] default `max_burst` equals to `max_speed` as in
 Throttler

---
 docs/en/operations/system-tables/scheduler.md  |  4 ++--
 docs/en/operations/workload-scheduling.md      |  2 +-
 src/IO/Resource/ThrottlerConstraint.h          | 10 ++++++----
 src/Storages/System/StorageSystemScheduler.cpp |  8 ++++----
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md
index ca2d4be9642..953db4c28f2 100644
--- a/docs/en/operations/system-tables/scheduler.md
+++ b/docs/en/operations/system-tables/scheduler.md
@@ -38,8 +38,8 @@ inflight_requests: ᴺᵁᴸᴸ
 inflight_cost:     ᴺᵁᴸᴸ
 max_requests:      ᴺᵁᴸᴸ
 max_cost:          ᴺᵁᴸᴸ
-max_burst:         ᴺᵁᴸᴸ
 max_speed:         ᴺᵁᴸᴸ
+max_burst:         ᴺᵁᴸᴸ
 throttling_us:     ᴺᵁᴸᴸ
 tokens:            ᴺᵁᴸᴸ
 ```
@@ -66,7 +66,7 @@ Columns:
 - `inflight_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The sum of costs (e.g. bytes) of all resource requests dequeued from this node, that are currently in consumption state.
 - `max_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_requests` leading to constraint violation.
 - `max_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_cost` leading to constraint violation.
-- `max_burst` (`Nullable(Float64)`) - For `bandwidth_limit` nodes only. Upper limit for `tokens` available in token-bucket throttler.
 - `max_speed` (`Nullable(Float64)`) - For `bandwidth_limit` nodes only. Upper limit for bandwidth in tokens per second.
+- `max_burst` (`Nullable(Float64)`) - For `bandwidth_limit` nodes only. Upper limit for `tokens` available in token-bucket throttler.
 - `throttling_us` (`Nullable(Int64)`) - For `bandwidth_limit` nodes only. Total number of microseconds this node was in throttling state.
 - `tokens` (`Nullable(Float64)`) - For `bandwidth_limit` nodes only. Number of tokens currently available in token-bucket throttler.
diff --git a/docs/en/operations/workload-scheduling.md b/docs/en/operations/workload-scheduling.md
index 41efd68c34d..24149099892 100644
--- a/docs/en/operations/workload-scheduling.md
+++ b/docs/en/operations/workload-scheduling.md
@@ -79,7 +79,7 @@ graph TD
 
 **Possible node types:**
 * `inflight_limit` (constraint) - blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost`; must have a single child.
-* `bandwidth_limit` (constraint) - blocks if burst exceeds `max_burst` (default 0) or current bandwidth exceeds `max_speed` (0 means unlimited); must have a single child.
+* `bandwidth_limit` (constraint) - blocks if current bandwidth exceeds `max_speed` (0 means unlimited) or burst exceeds `max_burst` (by default equals `max_speed`); must have a single child.
 * `fair` (policy) - selects the next request to serve from one of its children nodes according to max-min fairness; children nodes can specify `weight` (default is 1).
 * `priority` (policy) - selects the next request to serve from one of its children nodes according to static priorities (lower value means higher priority); children nodes can specify `priority` (default is 0).
 * `fifo` (queue) - leaf of the hierarchy capable of holding requests that exceed resource capacity.
diff --git a/src/IO/Resource/ThrottlerConstraint.h b/src/IO/Resource/ThrottlerConstraint.h
index 3418306109e..7de53fe0728 100644
--- a/src/IO/Resource/ThrottlerConstraint.h
+++ b/src/IO/Resource/ThrottlerConstraint.h
@@ -18,10 +18,12 @@ namespace DB
 class ThrottlerConstraint : public ISchedulerConstraint
 {
 public:
+    static constexpr double default_burst_seconds = 1.0;
+
     ThrottlerConstraint(EventQueue * event_queue_, const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
         : ISchedulerConstraint(event_queue_, config, config_prefix)
-        , max_burst(config.getDouble(config_prefix + ".max_burst", 0))
         , max_speed(config.getDouble(config_prefix + ".max_speed", 0))
+        , max_burst(config.getDouble(config_prefix + ".max_burst", default_burst_seconds * max_speed))
         , last_update(event_queue_->now())
         , tokens(max_burst)
     {}
@@ -37,7 +39,7 @@ public:
         if (!ISchedulerNode::equals(other))
             return false;
         if (auto * o = dynamic_cast<ThrottlerConstraint *>(other))
-            return max_burst == o->max_burst && max_speed == o->max_speed;
+            return max_speed == o->max_speed && max_burst == o->max_burst;
         return false;
     }
 
@@ -137,7 +139,7 @@ public:
 
     std::pair<double, double> getParams() const
     {
-        return {max_burst, max_speed};
+        return {max_speed, max_burst};
     }
 
 private:
@@ -184,8 +186,8 @@ private:
         return satisfied() && child_active;
     }
 
-    const double max_burst{0}; /// in tokens
     const double max_speed{0}; /// in tokens per second
+    const double max_burst{0}; /// in tokens
 
     EventQueue::TimePoint last_update;
     UInt64 postponed = EventQueue::not_postponed;
diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp
index 9eb4ce82c54..0a6d8f91678 100644
--- a/src/Storages/System/StorageSystemScheduler.cpp
+++ b/src/Storages/System/StorageSystemScheduler.cpp
@@ -41,8 +41,8 @@ NamesAndTypesList StorageSystemScheduler::getNamesAndTypes()
         {"inflight_cost", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>())},
         {"max_requests", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>())},
         {"max_cost", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>())},
-        {"max_burst", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())},
         {"max_speed", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())},
+        {"max_burst", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())},
         {"throttling_us", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>())},
         {"tokens", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())},
     };
@@ -76,8 +76,8 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c
         Field inflight_cost;
         Field max_requests;
         Field max_cost;
-        Field max_burst;
         Field max_speed;
+        Field max_burst;
         Field throttling_us;
         Field tokens;
 
@@ -101,7 +101,7 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c
         }
         if (auto * ptr = dynamic_cast<ThrottlerConstraint *>(node.get()))
         {
-            std::tie(max_burst, max_speed) = ptr->getParams();
+            std::tie(max_speed, max_burst) = ptr->getParams();
             throttling_us = ptr->getThrottlingDuration().count() / 1000;
             tokens = ptr->getTokens();
         }
@@ -116,8 +116,8 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c
         res_columns[i++]->insert(inflight_cost);
         res_columns[i++]->insert(max_requests);
         res_columns[i++]->insert(max_cost);
-        res_columns[i++]->insert(max_burst);
         res_columns[i++]->insert(max_speed);
+        res_columns[i++]->insert(max_burst);
         res_columns[i++]->insert(throttling_us);
         res_columns[i++]->insert(tokens);
     });

From 14569b0f7bfd01deea4d6aa11ba76e3cc09eccee Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Tue, 19 Sep 2023 10:13:05 +0000
Subject: [PATCH 109/243] fix start time in test

---
 src/IO/Resource/tests/gtest_throttler_constraint.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/IO/Resource/tests/gtest_throttler_constraint.cpp b/src/IO/Resource/tests/gtest_throttler_constraint.cpp
index d3f69212e8b..e6d83d0d0ca 100644
--- a/src/IO/Resource/tests/gtest_throttler_constraint.cpp
+++ b/src/IO/Resource/tests/gtest_throttler_constraint.cpp
@@ -144,8 +144,7 @@ TEST(IOResourceThrottlerConstraint, PeekAndAvgLimits)
 TEST(IOResourceThrottlerConstraint, ThrottlerAndFairness)
 {
     ResourceTest t;
-    EventQueue::TimePoint start;
-    start += EventQueue::Duration(1000000000);
+    EventQueue::TimePoint start = std::chrono::system_clock::now();
     t.process(start, 0);
 
     t.add<ThrottlerConstraint>("/", "<max_burst>100.0</max_burst><max_speed>10.0</max_speed>");

From 94d737264cb62016cef3d89dc3c73fce6a5c9fd8 Mon Sep 17 00:00:00 2001
From: priera <pedro.riera.marto@gmail.com>
Date: Thu, 31 Aug 2023 15:37:48 +0200
Subject: [PATCH 110/243] inserting non-duplicate chunks

---
 src/Processors/Sinks/IOutputChunkGenerator.h  | 22 +++++
 src/Processors/Sinks/OutputChunkGenerator.cpp | 80 +++++++++++++++++++
 src/Processors/Sinks/SinkToStorage.cpp        | 13 ++-
 src/Processors/Sinks/SinkToStorage.h          |  7 +-
 .../MergeTree/ReplicatedMergeTreeSink.cpp     | 24 ++----
 .../MergeTree/ReplicatedMergeTreeSink.h       | 12 ---
 ...erialized_views_duplicated_parts.reference | 14 ++++
 ...rt_materialized_views_duplicated_parts.sql | 44 ++++++++++
 8 files changed, 179 insertions(+), 37 deletions(-)
 create mode 100644 src/Processors/Sinks/IOutputChunkGenerator.h
 create mode 100644 src/Processors/Sinks/OutputChunkGenerator.cpp
 create mode 100644 tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.reference
 create mode 100644 tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.sql

diff --git a/src/Processors/Sinks/IOutputChunkGenerator.h b/src/Processors/Sinks/IOutputChunkGenerator.h
new file mode 100644
index 00000000000..3a92368df14
--- /dev/null
+++ b/src/Processors/Sinks/IOutputChunkGenerator.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <Processors/Chunk.h>
+#include <Interpreters/Context.h>
+
+namespace DB
+{
+
+class IOutputChunkGenerator {
+public:
+    static std::unique_ptr<IOutputChunkGenerator> createCopyRanges(ContextPtr context);
+    static std::unique_ptr<IOutputChunkGenerator> createDefault();
+
+    virtual ~IOutputChunkGenerator() = default;
+
+    virtual void onNewChunkArrived(Chunk chunk) = 0;
+    virtual void onRowsProcessed(size_t row_count, bool append) = 0;
+
+    virtual Chunk generateChunk() = 0;
+};
+
+}
diff --git a/src/Processors/Sinks/OutputChunkGenerator.cpp b/src/Processors/Sinks/OutputChunkGenerator.cpp
new file mode 100644
index 00000000000..dab5adb1792
--- /dev/null
+++ b/src/Processors/Sinks/OutputChunkGenerator.cpp
@@ -0,0 +1,80 @@
+#include <Processors/Sinks/IOutputChunkGenerator.h>
+
+namespace DB {
+
+class ForwardEverythingGenerator : public IOutputChunkGenerator {
+public:
+
+    explicit ForwardEverythingGenerator() = default;
+
+    void onNewChunkArrived(Chunk chunk) override
+    {
+        in_chunk = chunk.clone();
+    }
+
+    void onRowsProcessed(size_t /*row_count*/, bool /*append*/)  override
+    {}
+
+    Chunk generateChunk()  override
+    {
+        return std::move(in_chunk);
+    }
+
+private:
+    Chunk in_chunk;
+};
+
+class CopyRangesGenerator : public IOutputChunkGenerator {
+public:
+    explicit CopyRangesGenerator() = default;
+
+    void onNewChunkArrived(Chunk chunk) override
+    {
+        out_cols = chunk.cloneEmptyColumns();
+        in_chunk = std::move(chunk);
+    }
+
+    void onRowsProcessed(size_t row_count, bool append)  override
+    {
+        if (append)
+        {
+            const Columns& in_cols = in_chunk.getColumns();
+            for (size_t i = 0; i < out_cols.size(); i++)
+            {
+                out_cols[i]->insertRangeFrom(*(in_cols[i]), row_offset, row_count);
+            }
+            final_chunk_rows += row_count;
+        }
+
+        row_offset += row_count;
+    }
+
+    Chunk generateChunk()  override
+    {
+        return Chunk(std::move(out_cols), final_chunk_rows);
+    }
+
+private:
+    Chunk in_chunk;
+    MutableColumns out_cols;
+    size_t row_offset = 0;
+    size_t final_chunk_rows = 0;
+};
+
+std::unique_ptr<IOutputChunkGenerator> IOutputChunkGenerator::createCopyRanges(ContextPtr context)
+{
+    // If MV is responsible for deduplication, block is not considered duplicated.
+    if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views)
+    {
+        return createDefault();
+    }
+
+    return std::make_unique<CopyRangesGenerator>();
+}
+
+std::unique_ptr<IOutputChunkGenerator> IOutputChunkGenerator::createDefault()
+{
+    return std::make_unique<ForwardEverythingGenerator>();
+}
+
+}
diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp
index 5f9f9f9b1a1..84743306446 100644
--- a/src/Processors/Sinks/SinkToStorage.cpp
+++ b/src/Processors/Sinks/SinkToStorage.cpp
@@ -4,7 +4,12 @@
 namespace DB
 {
 
-SinkToStorage::SinkToStorage(const Block & header) : ExceptionKeepingTransform(header, header, false) {}
+SinkToStorage::SinkToStorage(const Block & header) : SinkToStorage(header, IOutputChunkGenerator::createDefault()) {}
+
+SinkToStorage::SinkToStorage(const Block & header, std::unique_ptr<IOutputChunkGenerator> output_generator_)
+    : ExceptionKeepingTransform(header, header, false),
+    output_generator(std::move(output_generator_))
+{ }
 
 void SinkToStorage::onConsume(Chunk chunk)
 {
@@ -15,15 +20,15 @@ void SinkToStorage::onConsume(Chunk chunk)
       */
     Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns()));
 
+    output_generator->onNewChunkArrived(chunk.clone());
     consume(chunk.clone());
-    if (!lastBlockIsDuplicate())
-        cur_chunk = std::move(chunk);
 }
 
 SinkToStorage::GenerateResult SinkToStorage::onGenerate()
 {
     GenerateResult res;
-    res.chunk = std::move(cur_chunk);
+
+    res.chunk = output_generator->generateChunk();
     res.is_done = true;
     return res;
 }
diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h
index 023bbd8b094..e810578f651 100644
--- a/src/Processors/Sinks/SinkToStorage.h
+++ b/src/Processors/Sinks/SinkToStorage.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <Storages/TableLockHolder.h>
 #include <Processors/Transforms/ExceptionKeepingTransform.h>
+#include <Processors/Sinks/IOutputChunkGenerator.h>
 
 namespace DB
 {
@@ -13,13 +14,15 @@ friend class PartitionedSink;
 
 public:
     explicit SinkToStorage(const Block & header);
+    explicit SinkToStorage(const Block & header, std::unique_ptr<IOutputChunkGenerator> output_chunk_generator);
 
     const Block & getHeader() const { return inputs.front().getHeader(); }
     void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }
 
 protected:
     virtual void consume(Chunk chunk) = 0;
-    virtual bool lastBlockIsDuplicate() const { return false; }
+
+    IOutputChunkGenerator& getOutputGenerator() { return *output_generator; }
 
 private:
     std::vector<TableLockHolder> table_locks;
@@ -27,7 +30,7 @@ private:
     void onConsume(Chunk chunk) override;
     GenerateResult onGenerate() override;
 
-    Chunk cur_chunk;
+    std::unique_ptr<IOutputChunkGenerator> output_generator;
 };
 
 using SinkToStoragePtr = std::shared_ptr<SinkToStorage>;
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
index 75679a5750a..9fc7492a7fe 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@@ -130,7 +130,7 @@ ReplicatedMergeTreeSinkImpl<async_insert>::ReplicatedMergeTreeSinkImpl(
     bool majority_quorum,
     ContextPtr context_,
     bool is_attach_)
-    : SinkToStorage(metadata_snapshot_->getSampleBlock())
+    : SinkToStorage(metadata_snapshot_->getSampleBlock(), IOutputChunkGenerator::createCopyRanges(context_))
     , storage(storage_)
     , metadata_snapshot(metadata_snapshot_)
     , required_quorum_size(majority_quorum ? std::nullopt : std::make_optional<size_t>(quorum_size))
@@ -386,13 +386,7 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
     finishDelayedChunk(zookeeper);
     delayed_chunk = std::make_unique<ReplicatedMergeTreeSinkImpl::DelayedChunk>();
     delayed_chunk->partitions = std::move(partitions);
-
-    /// If deduplicated data should not be inserted into MV, we need to set proper
-    /// value for `last_block_is_duplicate`, which is possible only after the part is committed.
-    /// Othervide we can delay commit.
-    /// TODO: we can also delay commit if there is no MVs.
-    if (!settings.deduplicate_blocks_in_dependent_materialized_views)
-        finishDelayedChunk(zookeeper);
+    finishDelayedChunk(zookeeper);
 
     ++num_blocks_processed;
 }
@@ -403,8 +397,6 @@ void ReplicatedMergeTreeSinkImpl<false>::finishDelayedChunk(const ZooKeeperWithF
     if (!delayed_chunk)
         return;
 
-    last_block_is_duplicate = false;
-
     for (auto & partition : delayed_chunk->partitions)
     {
         ProfileEventsScope scoped_attach(&partition.part_counters);
@@ -415,9 +407,10 @@ void ReplicatedMergeTreeSinkImpl<false>::finishDelayedChunk(const ZooKeeperWithF
 
         try
         {
-            bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num, false).second;
+            const size_t rowsCount = partition.temp_part.part->rows_count;
+            const bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num, false).second;
 
-            last_block_is_duplicate = last_block_is_duplicate || deduplicated;
+            getOutputGenerator().onRowsProcessed(rowsCount, !deduplicated);
 
             /// Set a special error code if the block is duplicate
             int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0;
@@ -1092,13 +1085,6 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::onStart()
     storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, true);
 }
 
-template<bool async_insert>
-void ReplicatedMergeTreeSinkImpl<async_insert>::onFinish()
-{
-    auto zookeeper = storage.getZooKeeper();
-    finishDelayedChunk(std::make_shared<ZooKeeperWithFaultInjection>(zookeeper));
-}
-
 template<bool async_insert>
 void ReplicatedMergeTreeSinkImpl<async_insert>::waitForQuorum(
     const ZooKeeperWithFaultInjectionPtr & zookeeper,
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
index 4a192a822f5..b208154631c 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
@@ -51,23 +51,12 @@ public:
 
     void onStart() override;
     void consume(Chunk chunk) override;
-    void onFinish() override;
 
     String getName() const override { return "ReplicatedMergeTreeSink"; }
 
     /// For ATTACHing existing data on filesystem.
     bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part);
 
-    /// For proper deduplication in MaterializedViews
-    bool lastBlockIsDuplicate() const override
-    {
-        /// If MV is responsible for deduplication, block is not considered duplicating.
-        if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views)
-            return false;
-
-        return last_block_is_duplicate;
-    }
-
     struct DelayedChunk;
 private:
     using BlockIDsType = std::conditional_t<async_insert, std::vector<String>, String>;
@@ -122,7 +111,6 @@ private:
     bool is_attach = false;
     bool quorum_parallel = false;
     const bool deduplicate = true;
-    bool last_block_is_duplicate = false;
     UInt64 num_blocks_processed = 0;
 
     using Logger = Poco::Logger;
diff --git a/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.reference b/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.reference
new file mode 100644
index 00000000000..325f639813a
--- /dev/null
+++ b/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.reference
@@ -0,0 +1,14 @@
+Initial
+2020-01-01 13:00:00	24
+Last block is duplicate
+2020-01-01 13:00:00	24
+2021-09-01 11:00:00	24
+One block is duplicate (default setting)
+2020-01-01 13:00:00	24
+2021-09-01 11:00:00	24
+2022-01-01 12:00:00	24
+One block is duplicate (changed setting)
+2020-01-01 13:00:00	24
+2021-09-01 11:00:00	24
+2022-01-01 12:00:00	24
+2023-01-01 12:00:00	24
diff --git a/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.sql b/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.sql
new file mode 100644
index 00000000000..c087e826a13
--- /dev/null
+++ b/tests/queries/0_stateless/02869_insert_materialized_views_duplicated_parts.sql
@@ -0,0 +1,44 @@
+-- Tags: zookeeper
+
+DROP TABLE IF EXISTS landing SYNC;
+DROP TABLE IF EXISTS mv SYNC;
+
+CREATE TABLE landing
+(
+    `time` DateTime,
+    `number` Int64
+)
+ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/landing/', 'r1')
+PARTITION BY toYYYYMMDD(time)
+ORDER BY time;
+
+CREATE MATERIALIZED VIEW mv
+ENGINE = ReplicatedSummingMergeTree('/clickhouse/{database}/tables/mv', 'r1')
+PARTITION BY toYYYYMMDD(hour) ORDER BY hour
+AS SELECT
+       toStartOfHour(time) AS hour,
+       sum(number) AS sum_amount
+   FROM landing GROUP BY hour;
+
+SELECT 'Initial';
+INSERT INTO landing VALUES ('2020-01-01 13:23:34', 24);
+SELECT * FROM mv ORDER BY hour;
+
+SELECT 'Last block is duplicate';
+INSERT INTO landing VALUES ('2021-09-01 11:00:00', 24), ('2020-01-01 13:23:34', 24);
+SELECT * FROM mv ORDER BY hour;
+
+SELECT 'One block is duplicate (default setting)';
+SET max_insert_delayed_streams_for_parallel_write = 0;
+INSERT INTO landing VALUES ('2021-09-01 11:00:00', 24), ('2022-01-01 12:03:00', 24);
+SELECT * FROM mv ORDER BY hour;
+
+SELECT 'One block is duplicate (changed setting)';
+SET max_insert_delayed_streams_for_parallel_write = 5;
+INSERT INTO landing VALUES ('2021-09-01 11:00:00', 24), ('2023-01-01 12:03:00', 24);
+
+SELECT * FROM mv ORDER BY hour;
+
+DROP TABLE mv;
+DROP TABLE landing;
+

From 6d27ea2742a162351cf1f1738523949a6e7d4ce1 Mon Sep 17 00:00:00 2001
From: priera <pedro.riera.marto@gmail.com>
Date: Tue, 12 Sep 2023 12:49:43 +0200
Subject: [PATCH 111/243] fixed style

---
 src/Processors/Sinks/IOutputChunkGenerator.h  | 3 ++-
 src/Processors/Sinks/OutputChunkGenerator.cpp | 9 ++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/Processors/Sinks/IOutputChunkGenerator.h b/src/Processors/Sinks/IOutputChunkGenerator.h
index 3a92368df14..b5e1d76af26 100644
--- a/src/Processors/Sinks/IOutputChunkGenerator.h
+++ b/src/Processors/Sinks/IOutputChunkGenerator.h
@@ -6,7 +6,8 @@
 namespace DB
 {
 
-class IOutputChunkGenerator {
+class IOutputChunkGenerator
+{
 public:
     static std::unique_ptr<IOutputChunkGenerator> createCopyRanges(ContextPtr context);
     static std::unique_ptr<IOutputChunkGenerator> createDefault();
diff --git a/src/Processors/Sinks/OutputChunkGenerator.cpp b/src/Processors/Sinks/OutputChunkGenerator.cpp
index dab5adb1792..eb161c0be1e 100644
--- a/src/Processors/Sinks/OutputChunkGenerator.cpp
+++ b/src/Processors/Sinks/OutputChunkGenerator.cpp
@@ -1,8 +1,10 @@
 #include <Processors/Sinks/IOutputChunkGenerator.h>
 
-namespace DB {
+namespace DB
+{
 
-class ForwardEverythingGenerator : public IOutputChunkGenerator {
+class ForwardEverythingGenerator : public IOutputChunkGenerator
+{
 public:
 
     explicit ForwardEverythingGenerator() = default;
@@ -24,7 +26,8 @@ private:
     Chunk in_chunk;
 };
 
-class CopyRangesGenerator : public IOutputChunkGenerator {
+class CopyRangesGenerator : public IOutputChunkGenerator
+{
 public:
     explicit CopyRangesGenerator() = default;
 

From b30e33580b54349952b7f52813a3011efbe509a9 Mon Sep 17 00:00:00 2001
From: priera <pedro.riera.marto@gmail.com>
Date: Tue, 12 Sep 2023 17:12:43 +0200
Subject: [PATCH 112/243] fix test

---
 src/Processors/Sinks/OutputChunkGenerator.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Processors/Sinks/OutputChunkGenerator.cpp b/src/Processors/Sinks/OutputChunkGenerator.cpp
index eb161c0be1e..107c21b2319 100644
--- a/src/Processors/Sinks/OutputChunkGenerator.cpp
+++ b/src/Processors/Sinks/OutputChunkGenerator.cpp
@@ -35,6 +35,8 @@ public:
     {
         out_cols = chunk.cloneEmptyColumns();
         in_chunk = std::move(chunk);
+        row_offset = 0;
+        final_chunk_rows = 0;
     }
 
     void onRowsProcessed(size_t row_count, bool append)  override

From 663ce74609170031d22a330c3478569f7723fea0 Mon Sep 17 00:00:00 2001
From: priera <pedro.riera.marto@gmail.com>
Date: Wed, 13 Sep 2023 14:39:29 +0200
Subject: [PATCH 113/243] fixed clang-tidy error

---
 src/Processors/Sinks/SinkToStorage.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h
index e810578f651..dc7ba23b52a 100644
--- a/src/Processors/Sinks/SinkToStorage.h
+++ b/src/Processors/Sinks/SinkToStorage.h
@@ -14,7 +14,7 @@ friend class PartitionedSink;
 
 public:
     explicit SinkToStorage(const Block & header);
-    explicit SinkToStorage(const Block & header, std::unique_ptr<IOutputChunkGenerator> output_chunk_generator);
+    explicit SinkToStorage(const Block & header, std::unique_ptr<IOutputChunkGenerator> output_generator_);
 
     const Block & getHeader() const { return inputs.front().getHeader(); }
     void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); }

From b92e37d2dfe0c2088429e1c90fbe73e9f384aa3b Mon Sep 17 00:00:00 2001
From: priera <pedro.riera.marto@gmail.com>
Date: Tue, 19 Sep 2023 12:13:39 +0200
Subject: [PATCH 114/243] Fixing PR comments

---
 src/Processors/Sinks/IOutputChunkGenerator.h  |  5 ++++-
 src/Processors/Sinks/OutputChunkGenerator.cpp | 20 ++++++++++++-------
 .../MergeTree/ReplicatedMergeTreeSink.cpp     |  3 ++-
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/Processors/Sinks/IOutputChunkGenerator.h b/src/Processors/Sinks/IOutputChunkGenerator.h
index b5e1d76af26..824313a2394 100644
--- a/src/Processors/Sinks/IOutputChunkGenerator.h
+++ b/src/Processors/Sinks/IOutputChunkGenerator.h
@@ -6,10 +6,13 @@
 namespace DB
 {
 
+/// This interface is meant to be used by the SinkToStorage processor
+/// SinkToStorage delegates on it the creation of the data chunk that will deliver to the next stages of the query pipeline
+/// Default implementation (createDefault() factory method) just forwards everything that it receives
 class IOutputChunkGenerator
 {
 public:
-    static std::unique_ptr<IOutputChunkGenerator> createCopyRanges(ContextPtr context);
+    static std::unique_ptr<IOutputChunkGenerator> createCopyRanges(bool deduplicate_later);
     static std::unique_ptr<IOutputChunkGenerator> createDefault();
 
     virtual ~IOutputChunkGenerator() = default;
diff --git a/src/Processors/Sinks/OutputChunkGenerator.cpp b/src/Processors/Sinks/OutputChunkGenerator.cpp
index 107c21b2319..942bf49a2d4 100644
--- a/src/Processors/Sinks/OutputChunkGenerator.cpp
+++ b/src/Processors/Sinks/OutputChunkGenerator.cpp
@@ -3,6 +3,7 @@
 namespace DB
 {
 
+/// Default implementation. The new chunk received is forwarded as-is to the next stages of the query
 class ForwardEverythingGenerator : public IOutputChunkGenerator
 {
 public:
@@ -14,10 +15,10 @@ public:
         in_chunk = chunk.clone();
     }
 
-    void onRowsProcessed(size_t /*row_count*/, bool /*append*/)  override
+    void onRowsProcessed(size_t /*row_count*/, bool /*append*/) override
     {}
 
-    Chunk generateChunk()  override
+    Chunk generateChunk() override
     {
         return std::move(in_chunk);
     }
@@ -26,6 +27,10 @@ private:
     Chunk in_chunk;
 };
 
+/// Specific implementation which generates a chunk with just a subset of the rows received originally
+/// Rows are assumed to be processed in the same order than they appear in the original chunk
+/// Is up to the client to decide how many rows process at once, but after each range processed,
+/// onRowsProcessed() has to be called, indicating whether append that range to the output chunk or not
 class CopyRangesGenerator : public IOutputChunkGenerator
 {
 public:
@@ -39,7 +44,7 @@ public:
         final_chunk_rows = 0;
     }
 
-    void onRowsProcessed(size_t row_count, bool append)  override
+    void onRowsProcessed(size_t row_count, bool append) override
     {
         if (append)
         {
@@ -54,7 +59,7 @@ public:
         row_offset += row_count;
     }
 
-    Chunk generateChunk()  override
+    Chunk generateChunk() override
     {
         return Chunk(std::move(out_cols), final_chunk_rows);
     }
@@ -66,10 +71,11 @@ private:
     size_t final_chunk_rows = 0;
 };
 
-std::unique_ptr<IOutputChunkGenerator> IOutputChunkGenerator::createCopyRanges(ContextPtr context)
+std::unique_ptr<IOutputChunkGenerator> IOutputChunkGenerator::createCopyRanges(bool deduplicate_later)
 {
-    // If MV is responsible for deduplication, block is not considered duplicated.
-    if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views)
+    // If MV is responsible for deduplication, block won't be considered duplicated.
+    // So default implementation, forwarding all the data, is used
+    if (deduplicate_later)
     {
         return createDefault();
     }
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
index 9fc7492a7fe..9cc1c6932c7 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@@ -130,7 +130,8 @@ ReplicatedMergeTreeSinkImpl<async_insert>::ReplicatedMergeTreeSinkImpl(
     bool majority_quorum,
     ContextPtr context_,
     bool is_attach_)
-    : SinkToStorage(metadata_snapshot_->getSampleBlock(), IOutputChunkGenerator::createCopyRanges(context_))
+    : SinkToStorage(metadata_snapshot_->getSampleBlock(),
+                    IOutputChunkGenerator::createCopyRanges(context_->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views))
     , storage(storage_)
     , metadata_snapshot(metadata_snapshot_)
     , required_quorum_size(majority_quorum ? std::nullopt : std::make_optional<size_t>(quorum_size))

From 363ee7cae58c4118452bd856406d55469dae9e94 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 19 Sep 2023 13:41:55 +0200
Subject: [PATCH 115/243] Better check for exceptions in checkData of non
 replicated merge tree

---
 src/Storages/MergeTree/checkDataPart.cpp | 46 ++++++++++++++++++++++++
 src/Storages/MergeTree/checkDataPart.h   |  2 +-
 src/Storages/StorageMergeTree.cpp        | 14 +++++---
 3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp
index 1717d91271a..6d8e02701fa 100644
--- a/src/Storages/MergeTree/checkDataPart.cpp
+++ b/src/Storages/MergeTree/checkDataPart.cpp
@@ -15,6 +15,11 @@
 #include <IO/HashingReadBuffer.h>
 #include <IO/S3Common.h>
 #include <Common/CurrentMetrics.h>
+#include <Poco/Net/NetException.h>
+
+#if USE_AZURE_BLOB_STORAGE
+#    include <azure/core/http/http.hpp>
+#endif
 
 namespace CurrentMetrics
 {
@@ -68,6 +73,47 @@ bool isRetryableException(const Exception & e)
     return false;
 }
 
+bool isRetryableException(const std::exception_ptr exception_ptr)
+{
+    try
+    {
+        rethrow_exception(exception_ptr);
+    }
+#if USE_AWS_S3
+    catch (const S3Exception & s3_exception)
+    {
+        if (s3_exception.isRetryableError())
+            return true;
+    }
+#endif
+#if USE_AZURE_BLOB_STORAGE
+    catch (const Azure::Core::RequestFailedException &)
+    {
+        return true;
+    }
+#endif
+    catch (const Exception & e)
+    {
+        if (isNotEnoughMemoryErrorCode(e.code()))
+            return true;
+
+        if (e.code() == ErrorCodes::NETWORK_ERROR || e.code() == ErrorCodes::SOCKET_TIMEOUT)
+            return true;
+    }
+    catch (const Poco::Net::NetException &)
+    {
+        return true;
+    }
+    catch (const Poco::TimeoutException &)
+    {
+        return true;
+    }
+
+    /// In fact, there can be other similar situations.
+    /// But it is OK, because there is a safety guard against deleting too many parts.
+    return false;
+}
+
 
 static IMergeTreeDataPart::Checksums checkDataPart(
     MergeTreeData::DataPartPtr data_part,
diff --git a/src/Storages/MergeTree/checkDataPart.h b/src/Storages/MergeTree/checkDataPart.h
index 20ddecad3ed..88e86db37fc 100644
--- a/src/Storages/MergeTree/checkDataPart.h
+++ b/src/Storages/MergeTree/checkDataPart.h
@@ -14,5 +14,5 @@ IMergeTreeDataPart::Checksums checkDataPart(
 
 bool isNotEnoughMemoryErrorCode(int code);
 bool isRetryableException(const Exception & e);
-
+bool isRetryableException(const std::exception_ptr exception_ptr);
 }
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 4f27cbc76ed..b2e6e6d33fa 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -2222,10 +2222,13 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_
 
                 results.emplace_back(part->name, true, "Checksums recounted and written to disk.");
             }
-            catch (const Exception & ex)
+            catch (...)
             {
+                if (isRetryableException(std::current_exception()))
+                    throw;
+
                 tryLogCurrentException(log, __PRETTY_FUNCTION__);
-                results.emplace_back(part->name, false, "Check of part finished with error: '" + ex.message() + "'");
+                results.emplace_back(part->name, false, "Check of part finished with error: '" + getCurrentExceptionMessage(false) + "'");
             }
         }
         else
@@ -2235,9 +2238,12 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_
                 checkDataPart(part, true);
                 results.emplace_back(part->name, true, "");
             }
-            catch (const Exception & ex)
+            catch (...)
             {
-                results.emplace_back(part->name, false, ex.message());
+                if (isRetryableException(std::current_exception()))
+                    throw;
+
+                results.emplace_back(part->name, false, getCurrentExceptionMessage(false));
             }
         }
     }

From 7b1cf9d912c00eb26f9d368f2fcc9742a74249eb Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 19 Sep 2023 13:45:34 +0200
Subject: [PATCH 116/243] Revert "Better check for exceptions in checkData of
 non replicated merge tree"

This reverts commit 363ee7cae58c4118452bd856406d55469dae9e94.
---
 src/Storages/MergeTree/checkDataPart.cpp | 46 ------------------------
 src/Storages/MergeTree/checkDataPart.h   |  2 +-
 src/Storages/StorageMergeTree.cpp        | 14 +++-----
 3 files changed, 5 insertions(+), 57 deletions(-)

diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp
index 6d8e02701fa..1717d91271a 100644
--- a/src/Storages/MergeTree/checkDataPart.cpp
+++ b/src/Storages/MergeTree/checkDataPart.cpp
@@ -15,11 +15,6 @@
 #include <IO/HashingReadBuffer.h>
 #include <IO/S3Common.h>
 #include <Common/CurrentMetrics.h>
-#include <Poco/Net/NetException.h>
-
-#if USE_AZURE_BLOB_STORAGE
-#    include <azure/core/http/http.hpp>
-#endif
 
 namespace CurrentMetrics
 {
@@ -73,47 +68,6 @@ bool isRetryableException(const Exception & e)
     return false;
 }
 
-bool isRetryableException(const std::exception_ptr exception_ptr)
-{
-    try
-    {
-        rethrow_exception(exception_ptr);
-    }
-#if USE_AWS_S3
-    catch (const S3Exception & s3_exception)
-    {
-        if (s3_exception.isRetryableError())
-            return true;
-    }
-#endif
-#if USE_AZURE_BLOB_STORAGE
-    catch (const Azure::Core::RequestFailedException &)
-    {
-        return true;
-    }
-#endif
-    catch (const Exception & e)
-    {
-        if (isNotEnoughMemoryErrorCode(e.code()))
-            return true;
-
-        if (e.code() == ErrorCodes::NETWORK_ERROR || e.code() == ErrorCodes::SOCKET_TIMEOUT)
-            return true;
-    }
-    catch (const Poco::Net::NetException &)
-    {
-        return true;
-    }
-    catch (const Poco::TimeoutException &)
-    {
-        return true;
-    }
-
-    /// In fact, there can be other similar situations.
-    /// But it is OK, because there is a safety guard against deleting too many parts.
-    return false;
-}
-
 
 static IMergeTreeDataPart::Checksums checkDataPart(
     MergeTreeData::DataPartPtr data_part,
diff --git a/src/Storages/MergeTree/checkDataPart.h b/src/Storages/MergeTree/checkDataPart.h
index 88e86db37fc..20ddecad3ed 100644
--- a/src/Storages/MergeTree/checkDataPart.h
+++ b/src/Storages/MergeTree/checkDataPart.h
@@ -14,5 +14,5 @@ IMergeTreeDataPart::Checksums checkDataPart(
 
 bool isNotEnoughMemoryErrorCode(int code);
 bool isRetryableException(const Exception & e);
-bool isRetryableException(const std::exception_ptr exception_ptr);
+
 }
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index b2e6e6d33fa..4f27cbc76ed 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -2222,13 +2222,10 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_
 
                 results.emplace_back(part->name, true, "Checksums recounted and written to disk.");
             }
-            catch (...)
+            catch (const Exception & ex)
             {
-                if (isRetryableException(std::current_exception()))
-                    throw;
-
                 tryLogCurrentException(log, __PRETTY_FUNCTION__);
-                results.emplace_back(part->name, false, "Check of part finished with error: '" + getCurrentExceptionMessage(false) + "'");
+                results.emplace_back(part->name, false, "Check of part finished with error: '" + ex.message() + "'");
             }
         }
         else
@@ -2238,12 +2235,9 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_
                 checkDataPart(part, true);
                 results.emplace_back(part->name, true, "");
             }
-            catch (...)
+            catch (const Exception & ex)
             {
-                if (isRetryableException(std::current_exception()))
-                    throw;
-
-                results.emplace_back(part->name, false, getCurrentExceptionMessage(false));
+                results.emplace_back(part->name, false, ex.message());
             }
         }
     }

From 268732742b5f4ed97b80699218cd460db838112d Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 18 Sep 2023 16:44:59 +0000
Subject: [PATCH 117/243] Don't use default move assignment in TimerDescriptor

---
 src/Common/TimerDescriptor.cpp | 6 ++++++
 src/Common/TimerDescriptor.h   | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp
index 1f07f548d85..2fb9618b60a 100644
--- a/src/Common/TimerDescriptor.cpp
+++ b/src/Common/TimerDescriptor.cpp
@@ -32,6 +32,12 @@ TimerDescriptor::TimerDescriptor(TimerDescriptor && other) noexcept : timer_fd(o
     other.timer_fd = -1;
 }
 
+TimerDescriptor & TimerDescriptor::operator=(DB::TimerDescriptor && other) noexcept
+{
+    std::swap(timer_fd, other.timer_fd);
+    return *this;
+}
+
 TimerDescriptor::~TimerDescriptor()
 {
     /// Do not check for result cause cannot throw exception.
diff --git a/src/Common/TimerDescriptor.h b/src/Common/TimerDescriptor.h
index 5263c05edea..0292f85d770 100644
--- a/src/Common/TimerDescriptor.h
+++ b/src/Common/TimerDescriptor.h
@@ -18,7 +18,7 @@ public:
     TimerDescriptor(const TimerDescriptor &) = delete;
     TimerDescriptor & operator=(const TimerDescriptor &) = delete;
     TimerDescriptor(TimerDescriptor && other) noexcept;
-    TimerDescriptor & operator=(TimerDescriptor &&) = default;
+    TimerDescriptor & operator=(TimerDescriptor &&) noexcept;
 
     int getDescriptor() const { return timer_fd; }
 

From 9dcc41cf83d71f014c7e612c9653c1c789ca6d34 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 19 Sep 2023 13:47:32 +0200
Subject: [PATCH 118/243] Fxi

---
 src/Storages/StorageMergeTree.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index bae91ec8bb1..22700712829 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -2231,10 +2231,13 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_
 
                 results.emplace_back(part->name, true, "Checksums recounted and written to disk.");
             }
-            catch (const Exception & ex)
+            catch (...)
             {
+                if (isRetryableException(std::current_exception()))
+                    throw;
+
                 tryLogCurrentException(log, __PRETTY_FUNCTION__);
-                results.emplace_back(part->name, false, "Check of part finished with error: '" + ex.message() + "'");
+                results.emplace_back(part->name, false, "Check of part finished with error: '" + getCurrentExceptionMessage(false) + "'");
             }
         }
         else
@@ -2244,9 +2247,12 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_
                 checkDataPart(part, true);
                 results.emplace_back(part->name, true, "");
             }
-            catch (const Exception & ex)
+            catch (...)
             {
-                results.emplace_back(part->name, false, ex.message());
+                if (isRetryableException(std::current_exception()))
+                    throw;
+
+                results.emplace_back(part->name, false, getCurrentExceptionMessage(false));
             }
         }
     }

From 8c29408f5eba3388a094fa937d19c251a5770791 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 7 Sep 2023 14:06:19 +0000
Subject: [PATCH 119/243] Parse data in JSON format as JSONEachRow if failed to
 parse metadata

---
 src/Formats/JSONUtils.cpp                     | 17 ++--
 src/Formats/JSONUtils.h                       |  1 +
 .../Formats/Impl/JSONEachRowRowInputFormat.h  |  3 +-
 .../Formats/Impl/JSONRowInputFormat.cpp       | 82 ++++++++++++++-----
 .../Formats/Impl/JSONRowInputFormat.h         | 18 +++-
 ..._as_json_each_row_on_no_metadata.reference |  3 +
 ...e_json_as_json_each_row_on_no_metadata.sql |  3 +
 7 files changed, 98 insertions(+), 29 deletions(-)
 create mode 100644 tests/queries/0_stateless/02874_parse_json_as_json_each_row_on_no_metadata.reference
 create mode 100644 tests/queries/0_stateless/02874_parse_json_as_json_each_row_on_no_metadata.sql

diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp
index 6fbda869154..1a2849493c3 100644
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@@ -687,10 +687,9 @@ namespace JSONUtils
         return names_and_types;
     }
 
-    NamesAndTypesList readMetadataAndValidateHeader(ReadBuffer & in, const Block & header)
+    void validateMetadataByHeader(const NamesAndTypesList & names_and_types_from_metadata, const Block & header)
     {
-        auto names_and_types = JSONUtils::readMetadata(in);
-        for (const auto & [name, type] : names_and_types)
+        for (const auto & [name, type] : names_and_types_from_metadata)
         {
             if (!header.has(name))
                 continue;
@@ -698,10 +697,16 @@ namespace JSONUtils
             auto header_type = header.getByName(name).type;
             if (!type->equals(*header_type))
                 throw Exception(
-                                ErrorCodes::INCORRECT_DATA,
-                                "Type {} of column '{}' from metadata is not the same as type in header {}",
-                                type->getName(), name, header_type->getName());
+                    ErrorCodes::INCORRECT_DATA,
+                    "Type {} of column '{}' from metadata is not the same as type in header {}",
+                    type->getName(), name, header_type->getName());
         }
+    }
+
+    NamesAndTypesList readMetadataAndValidateHeader(ReadBuffer & in, const Block & header)
+    {
+        auto names_and_types = JSONUtils::readMetadata(in);
+        validateMetadataByHeader(names_and_types, header);
         return names_and_types;
     }
 
diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h
index bd56eb646cb..27a357447f5 100644
--- a/src/Formats/JSONUtils.h
+++ b/src/Formats/JSONUtils.h
@@ -124,6 +124,7 @@ namespace JSONUtils
 
     NamesAndTypesList readMetadata(ReadBuffer & in);
     NamesAndTypesList readMetadataAndValidateHeader(ReadBuffer & in, const Block & header);
+    void validateMetadataByHeader(const NamesAndTypesList & names_and_types_from_metadata, const Block & header);
 
     bool skipUntilFieldInObject(ReadBuffer & in, const String & desired_field_name);
     void skipTheRestOfObject(ReadBuffer & in);
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
index d4246c37ea0..ad494d07fbc 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
@@ -32,10 +32,11 @@ public:
     String getName() const override { return "JSONEachRowRowInputFormat"; }
     void resetParser() override;
 
-private:
+protected:
     void readPrefix() override;
     void readSuffix() override;
 
+private:
     bool readRow(MutableColumns & columns, RowReadExtension & ext) override;
     bool allowSyncAfterError() const override { return true; }
     void syncAfterError() override;
diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
index ac7ba6048a5..a1507050fbc 100644
--- a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
@@ -12,42 +12,84 @@ namespace ErrorCodes
 }
 
 JSONRowInputFormat::JSONRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_)
-    : JSONEachRowRowInputFormat(in_, header_, params_, format_settings_, false), validate_types_from_metadata(format_settings_.json.validate_types_from_metadata)
+    : JSONRowInputFormat(std::make_unique<PeekableReadBuffer>(in_), header_, params_, format_settings_)
+{
+}
+
+JSONRowInputFormat::JSONRowInputFormat(std::unique_ptr<PeekableReadBuffer> buf, const DB::Block & header_, DB::IRowInputFormat::Params params_, const DB::FormatSettings & format_settings_)
+    : JSONEachRowRowInputFormat(*buf, header_, params_, format_settings_, false), validate_types_from_metadata(format_settings_.json.validate_types_from_metadata), peekable_buf(std::move(buf))
 {
 }
 
 void JSONRowInputFormat::readPrefix()
 {
-    skipBOMIfExists(*in);
-    JSONUtils::skipObjectStart(*in);
+    skipBOMIfExists(*peekable_buf);
+
+    PeekableReadBufferCheckpoint checkpoint(*peekable_buf);
+    NamesAndTypesList names_and_types_from_metadata;
+
+    /// Try to parse metadata, if failed, try to parse data as JSONEachRow format.
+    try
+    {
+        JSONUtils::skipObjectStart(*peekable_buf);
+        names_and_types_from_metadata = JSONUtils::readMetadata(*peekable_buf);
+        JSONUtils::skipComma(*peekable_buf);
+        if (!JSONUtils::skipUntilFieldInObject(*peekable_buf, "data"))
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Expected field \"data\" with table content");
+
+        JSONUtils::skipArrayStart(*peekable_buf);
+        data_in_square_brackets = true;
+    }
+    catch (...)
+    {
+        peekable_buf->rollbackToCheckpoint();
+        JSONEachRowRowInputFormat::readPrefix();
+        parse_as_json_each_row = true;
+        return;
+    }
+
     if (validate_types_from_metadata)
-        JSONUtils::readMetadataAndValidateHeader(*in, getPort().getHeader());
-    else
-        JSONUtils::readMetadata(*in);
-
-    JSONUtils::skipComma(*in);
-    if (!JSONUtils::skipUntilFieldInObject(*in, "data"))
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Expected field \"data\" with table content");
-
-    JSONUtils::skipArrayStart(*in);
-    data_in_square_brackets = true;
+        JSONUtils::validateMetadataByHeader(names_and_types_from_metadata, getPort().getHeader());
 }
 
 void JSONRowInputFormat::readSuffix()
 {
-    JSONUtils::skipArrayEnd(*in);
-    JSONUtils::skipTheRestOfObject(*in);
+    if (parse_as_json_each_row)
+    {
+        JSONEachRowRowInputFormat::readSuffix();
+    }
+    else
+    {
+        JSONUtils::skipArrayEnd(*peekable_buf);
+        JSONUtils::skipTheRestOfObject(*peekable_buf);
+    }
 }
 
-JSONRowSchemaReader::JSONRowSchemaReader(ReadBuffer & in_) : ISchemaReader(in_)
+JSONRowSchemaReader::JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
+    : JSONRowSchemaReader(std::make_unique<PeekableReadBuffer>(in_), format_settings_)
+{
+}
+
+JSONRowSchemaReader::JSONRowSchemaReader(std::unique_ptr<PeekableReadBuffer> buf, const DB::FormatSettings & format_settings_)
+    : JSONEachRowSchemaReader(*buf, format_settings_), peekable_buf(std::move(buf))
 {
 }
 
 NamesAndTypesList JSONRowSchemaReader::readSchema()
 {
-    skipBOMIfExists(in);
-    JSONUtils::skipObjectStart(in);
-    return JSONUtils::readMetadata(in);
+    skipBOMIfExists(*peekable_buf);
+    PeekableReadBufferCheckpoint checkpoint(*peekable_buf);
+    /// Try to parse metadata, if failed, try to parse data as JSONEachRow format
+    try
+    {
+        JSONUtils::skipObjectStart(*peekable_buf);
+        return JSONUtils::readMetadata(*peekable_buf);
+    }
+    catch (...)
+    {
+        peekable_buf->rollbackToCheckpoint(true);
+        return JSONEachRowSchemaReader::readSchema();
+    }
 }
 
 void registerInputFormatJSON(FormatFactory & factory)
@@ -69,7 +111,7 @@ void registerJSONSchemaReader(FormatFactory & factory)
     auto register_schema_reader = [&](const String & format)
     {
         factory.registerSchemaReader(
-            format, [](ReadBuffer & buf, const FormatSettings &) { return std::make_unique<JSONRowSchemaReader>(buf); });
+            format, [](ReadBuffer & buf, const FormatSettings & format_settings) { return std::make_unique<JSONRowSchemaReader>(buf, format_settings); });
     };
     register_schema_reader("JSON");
     /// JSONCompact has the same suffix with metadata.
diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.h b/src/Processors/Formats/Impl/JSONRowInputFormat.h
index 40b7cc2a268..5bdffe3dec0 100644
--- a/src/Processors/Formats/Impl/JSONRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONRowInputFormat.h
@@ -24,20 +24,34 @@ public:
     String getName() const override { return "JSONRowInputFormat"; }
 
 private:
+    JSONRowInputFormat(
+        std::unique_ptr<PeekableReadBuffer> buf,
+        const Block & header_,
+        Params params_,
+        const FormatSettings & format_settings_);
+
     void readPrefix() override;
     void readSuffix() override;
 
     const bool validate_types_from_metadata;
+    bool parse_as_json_each_row = false;
+    std::unique_ptr<PeekableReadBuffer> peekable_buf;
+    std::exception_ptr reading_metadata_exception;
 };
 
-class JSONRowSchemaReader : public ISchemaReader
+class JSONRowSchemaReader : public JSONEachRowSchemaReader
 {
 public:
-    JSONRowSchemaReader(ReadBuffer & in_);
+    JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_);
 
     NamesAndTypesList readSchema() override;
 
     bool hasStrictOrderOfColumns() const override { return false; }
+
+private:
+    JSONRowSchemaReader(std::unique_ptr<PeekableReadBuffer> buf, const FormatSettings & format_settings_);
+
+    std::unique_ptr<PeekableReadBuffer> peekable_buf;
 };
 
 }
diff --git a/tests/queries/0_stateless/02874_parse_json_as_json_each_row_on_no_metadata.reference b/tests/queries/0_stateless/02874_parse_json_as_json_each_row_on_no_metadata.reference
new file mode 100644
index 00000000000..9c61868a7dd
--- /dev/null
+++ b/tests/queries/0_stateless/02874_parse_json_as_json_each_row_on_no_metadata.reference
@@ -0,0 +1,3 @@
+a	Nullable(Int64)					
+b	Nullable(String)					
+10	Hello
diff --git a/tests/queries/0_stateless/02874_parse_json_as_json_each_row_on_no_metadata.sql b/tests/queries/0_stateless/02874_parse_json_as_json_each_row_on_no_metadata.sql
new file mode 100644
index 00000000000..ba7b720e3b6
--- /dev/null
+++ b/tests/queries/0_stateless/02874_parse_json_as_json_each_row_on_no_metadata.sql
@@ -0,0 +1,3 @@
+desc format(JSON, '{"a" : 10, "b" : "Hello"}');
+select * from format(JSON, '{"a" : 10, "b" : "Hello"}');
+

From 5bd2e9f61043a30cb24f1f4221c50546d3426192 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 7 Sep 2023 16:57:25 +0000
Subject: [PATCH 120/243] Fix tests

---
 src/Processors/Formats/Impl/JSONRowInputFormat.cpp | 11 +++++++++++
 src/Processors/Formats/Impl/JSONRowInputFormat.h   |  3 +++
 2 files changed, 14 insertions(+)

diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
index a1507050fbc..745c510be1a 100644
--- a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
@@ -65,6 +65,17 @@ void JSONRowInputFormat::readSuffix()
     }
 }
 
+void JSONRowInputFormat::setReadBuffer(DB::ReadBuffer & in_)
+{
+    peekable_buf->setSubBuffer(in_);
+}
+
+void JSONRowInputFormat::resetParser()
+{
+    JSONEachRowRowInputFormat::resetParser();
+    peekable_buf->reset();
+}
+
 JSONRowSchemaReader::JSONRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
     : JSONRowSchemaReader(std::make_unique<PeekableReadBuffer>(in_), format_settings_)
 {
diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.h b/src/Processors/Formats/Impl/JSONRowInputFormat.h
index 5bdffe3dec0..771c0b64aee 100644
--- a/src/Processors/Formats/Impl/JSONRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONRowInputFormat.h
@@ -23,6 +23,9 @@ public:
 
     String getName() const override { return "JSONRowInputFormat"; }
 
+    void setReadBuffer(ReadBuffer & in_) override;
+    void resetParser() override;
+
 private:
     JSONRowInputFormat(
         std::unique_ptr<PeekableReadBuffer> buf,

From f974970c3cb88474f053ce59808926e8c2f28a76 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 18 Sep 2023 18:41:04 +0000
Subject: [PATCH 121/243] Apply suggestion

---
 .../Formats/Impl/JSONRowInputFormat.cpp       | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
index 745c510be1a..90537061723 100644
--- a/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONRowInputFormat.cpp
@@ -40,16 +40,27 @@ void JSONRowInputFormat::readPrefix()
         JSONUtils::skipArrayStart(*peekable_buf);
         data_in_square_brackets = true;
     }
-    catch (...)
+    catch (const ParsingException &)
+    {
+        parse_as_json_each_row = true;
+    }
+    catch (const Exception & e)
+    {
+        if (e.code() != ErrorCodes::INCORRECT_DATA)
+            throw;
+
+        parse_as_json_each_row = true;
+    }
+
+    if (parse_as_json_each_row)
     {
         peekable_buf->rollbackToCheckpoint();
         JSONEachRowRowInputFormat::readPrefix();
-        parse_as_json_each_row = true;
-        return;
     }
-
-    if (validate_types_from_metadata)
+    else if (validate_types_from_metadata)
+    {
         JSONUtils::validateMetadataByHeader(names_and_types_from_metadata, getPort().getHeader());
+    }
 }
 
 void JSONRowInputFormat::readSuffix()

From e4e97275722ad9ac7733fea8b749b0f768b19d39 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Tue, 19 Sep 2023 14:14:17 +0200
Subject: [PATCH 122/243] Update information_schema docs

---
 .../system-tables/information_schema.md       | 38 +++++++++----------
 .../System/InformationSchema/tables.sql       | 10 ++---
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md
index b990a312019..d9fcb544ebb 100644
--- a/docs/en/operations/system-tables/information_schema.md
+++ b/docs/en/operations/system-tables/information_schema.md
@@ -229,22 +229,22 @@ is_trigger_insertable_into: NO
 
 ## KEY_COLUMN_USAGE (#key_column_usage)
 
-It was added for compatibility with third party tools such as Tableau Online. Contains only the primary keys columns read from [system.columns](../../operations/system-tables/columns.md).
+Contains columns from the [system.tables](../../operations/system-tables/tables.md) system table which are restricted by constraints.
 
 Columns:
 
-- `constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the catalog to which the constraint belongs. This value is always `def`.
+- `constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — Currently unused. Always `def`.
 - `constraint_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema (database) to which the constraint belongs.
 - `constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the constraint.
-- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the catalog to which the table belongs. This value is always `def`.
+- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — Currently unused. Always `def`.
 - `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema (database) to which the table belongs.
 - `table_name` ([String](../../sql-reference/data-types/string.md)) — The name of the table that has the constraint.
 - `column_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the column that has the constraint.
 - `ordinal_position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The column's position within the constraint, not the column's position within the table. Column positions are numbered beginning with 1.
-- `position_in_unique_constraint` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Always `NULL`.
-- `referenced_table_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the schema referenced by the constraint. Always `NULL`.
-- `referenced_table_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the table referenced by the constraint. Always `NULL`.
-- `referenced_column_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the column referenced by the constraint. Always `NULL`.
+- `position_in_unique_constraint` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Currently unused. Always `NULL`.
+- `referenced_table_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused. Always NULL.
+- `referenced_table_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused. Always NULL.
+- `referenced_column_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused. Always NULL.
 
 **Example**
 
@@ -270,18 +270,18 @@ constraint_name:         PRIMARY
 
 ## REFERENTIAL_CONSTRAINTS (#referential_constraints)
 
-It was added for compatibility with third party tools such as Tableau Online. Reads no data by design, selects from this view will always yield an empty result set.
+Contains information about foreign keys. Currently returns an empty result (no rows) which is just enough to provide compatibility with 3rd party tools like Tableau Online.
 
 Columns:
 
-- `constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the catalog to which the constraint belongs.
-- `constraint_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema (database) to which the constraint belongs.
-- `constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the constraint.
-- `unique_constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the catalog containing the unique constraint that the constraint references.
-- `unique_constraint_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema containing the unique constraint that the constraint references.
-- `unique_constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the unique constraint that the constraint references.
-- `match_option` ([String](../../sql-reference/data-types/string.md)) — The value of the constraint `MATCH` attribute.
-- `update_rule` ([String](../../sql-reference/data-types/string.md)) — The value of the constraint `ON UPDATE` attribute.
-- `delete_rule` ([String](../../sql-reference/data-types/string.md)) — The value of the constraint `ON DELETE` attribute.
-- `table_name` ([String](../../sql-reference/data-types/string.md)) — The name of the table.
-- `referenced_table_name` ([String](../../sql-reference/data-types/string.md)) — The name of the table referenced by the constraint.
+- `constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
+- `constraint_schema` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
+- `constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused.
+- `unique_constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
+- `unique_constraint_schema` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
+- `unique_constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused.
+- `match_option` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
+- `update_rule` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
+- `delete_rule` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
+- `table_name` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
+- `referenced_table_name` ([String](../../sql-reference/data-types/string.md)) — Currently unused.
diff --git a/src/Storages/System/InformationSchema/tables.sql b/src/Storages/System/InformationSchema/tables.sql
index 34dbd108342..d02963b0675 100644
--- a/src/Storages/System/InformationSchema/tables.sql
+++ b/src/Storages/System/InformationSchema/tables.sql
@@ -4,25 +4,25 @@ ATTACH VIEW tables
     `table_schema` String,
     `table_name` String,
     `table_type` String,
-    `table_comment` Nullable(String),
     `table_collation` Nullable(String),
+    `table_comment` Nullable(String),
     `TABLE_CATALOG` String ALIAS table_catalog,
     `TABLE_SCHEMA` String ALIAS table_schema,
     `TABLE_NAME` String ALIAS table_name,
     `TABLE_TYPE` String ALIAS table_type,
-    `TABLE_COMMENT` Nullable(String) ALIAS table_comment,
-    `TABLE_COLLATION` Nullable(String) ALIAS table_collation
+    `TABLE_COLLATION` Nullable(String) ALIAS table_collation,
+    `TABLE_COMMENT` Nullable(String) ALIAS table_comment
 ) AS
 SELECT
     database  AS table_catalog,
     database  AS table_schema,
     name      AS table_name,
-    comment   AS table_comment,
     multiIf(is_temporary,          'LOCAL TEMPORARY', 
             engine LIKE '%View',   'VIEW', 
             engine LIKE 'System%', 'SYSTEM VIEW', 
             has_own_data = 0,      'FOREIGN TABLE', 
             'BASE TABLE'
             ) AS table_type,
-    'utf8mb4' AS table_collation
+    'utf8mb4' AS table_collation,
+    comment   AS table_comment
 FROM system.tables

From 774c4b52dadbd0fbb2430d2abbf62c3b630204ef Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 18 Sep 2023 20:08:37 +0000
Subject: [PATCH 123/243] Rework

---
 docs/en/operations/settings/settings.md       |  11 +-
 .../functions/splitting-merging-functions.md  |  16 +-
 src/Core/Settings.h                           |   2 +-
 src/Functions/FunctionsStringArray.cpp        |  22 +-
 src/Functions/FunctionsStringArray.h          | 339 +++++-------------
 src/Functions/URL/URLHierarchy.cpp            |   2 +-
 src/Functions/URL/URLPathHierarchy.cpp        |   2 +-
 .../URL/extractURLParameterNames.cpp          |   2 +-
 src/Functions/URL/extractURLParameters.cpp    |   2 +-
 .../02475_split_with_max_substrings.reference | 204 ++++++++---
 .../02475_split_with_max_substrings.sql       | 226 +++++++++---
 ...6_splitby_max_substring_behavior.reference | 126 -------
 .../02876_splitby_max_substring_behavior.sql  | 151 --------
 13 files changed, 446 insertions(+), 659 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02876_splitby_max_substring_behavior.reference
 delete mode 100644 tests/queries/0_stateless/02876_splitby_max_substring_behavior.sql

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index ad1437ea3eb..ef4703e3bc3 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4067,17 +4067,16 @@ Result:
 └─────┴─────┴───────┘
 ```
 
-## splitby_max_substring_behavior {#splitby-max-substring-behavior}
+## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string}
 
-Controls how functions [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with given `max_substring` argument behave.
+Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array.
 
 Possible values:
 
-- `''` - If `max_substring` >=1, return the first `max_substring`-many splits.
-- `'python'` - If `max_substring` >= 0, split `max_substring`-many times, and return `max_substring + 1` elements where the last element contains the remaining string.
-- `'spark'` - If `max_substring` >= 1, split `max_substring`-many times, and return `max_substring + 1` elements where the last element contains the remaining string.
+- `0` - The remaining string will not be included in the last element of the result array.
+- `1` - The remaining string will be included in the last element of the result array. This is the behavior of Spark's [`split()`](https://spark.apache.org/docs/3.1.2/api/python/reference/api/pyspark.sql.functions.split.html) function and Python's ['string.split()'](https://docs.python.org/3/library/stdtypes.html#str.split) method.
 
-Default value: ``.
+Default value: `0`
 
 ## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
 
diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index 1e0bc3da664..614bf556c8e 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -21,7 +21,7 @@ splitByChar(separator, s[, max_substrings]))
 
 - `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md).
 - `s` — The string to split. [String](../../sql-reference/data-types/string.md).
-- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+- `max_substrings` — An optional `Int64` defaulting to 0. If `max_substrings` > 0, the returned array will contain at most `max_substrings` substrings, otherwise the function will return as many substrings as possible.
 
 **Returned value(s)**
 
@@ -39,7 +39,9 @@ For example,
 - in v22.10: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b','c=d']`
 - in v22.11: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b']`
 
-The previous behavior can be restored by setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) = 'python'.
+A behavior similar to ClickHouse pre-v22.11 can be achieved by setting
+[splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string)
+`SELECT splitByChar('=', 'a=b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1 -- ['a', 'b=c=d']`
 :::
 
 **Example**
@@ -82,7 +84,7 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere
 - There are multiple consecutive non-empty separators;
 - The original string `s` is empty while the separator is not empty.
 
-Setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) (default: '') controls the behavior with `max_substrings` > 0.
+Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
 
 **Example**
 
@@ -137,7 +139,7 @@ Returns an array of selected substrings. Empty substrings may be selected when:
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
-Setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) (default: '') controls the behavior with `max_substrings` > 0.
+Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
 
 **Example**
 
@@ -188,7 +190,7 @@ Returns an array of selected substrings.
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
-Setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) (default: '') controls the behavior with `max_substrings` > 0.
+Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
 
 **Example**
 
@@ -227,7 +229,7 @@ Returns an array of selected substrings.
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
-Setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) (default: '') controls the behavior with `max_substrings` > 0.
+Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
 
 **Example**
 
@@ -289,7 +291,7 @@ Returns an array of selected substrings.
 
 Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
-Setting [splitby_max_substring_behavior](../../operations/settings/settings.md#splitby-max-substring-behavior) (default: '') controls the behavior with `max_substrings` > 0.
+Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
 
 **Example**
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ca8f82ed8b6..fe9f50baf20 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -502,7 +502,7 @@ class IColumn;
     M(Bool, reject_expensive_hyperscan_regexps, true, "Reject patterns which will likely be expensive to evaluate with hyperscan (due to NFA state explosion)", 0) \
     M(Bool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \
     M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
-    M(String, splitby_max_substring_behavior, "", "Control the behavior of the 'max_substring' argument in functions splitBy*(): '' (default), 'python' or 'spark'", 0) \
+    M(Bool, splitby_max_substrings_includes_remaining_string, false, "Functions 'splitBy*()' with 'max_substrings' argument > 0 include the remaining string as last element in the result", 0) \
     \
     M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
     M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' produces a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
diff --git a/src/Functions/FunctionsStringArray.cpp b/src/Functions/FunctionsStringArray.cpp
index 326651c111d..4afee55704f 100644
--- a/src/Functions/FunctionsStringArray.cpp
+++ b/src/Functions/FunctionsStringArray.cpp
@@ -19,7 +19,7 @@ std::optional<Int64> extractMaxSplitsImpl(const ColumnWithTypeAndName & argument
     return static_cast<Int64>(value);
 }
 
-std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position, MaxSubstringBehavior max_substring_behavior)
+std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position)
 {
     if (max_substrings_argument_position >= arguments.size())
         return std::nullopt;
@@ -35,24 +35,8 @@ std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments,
             arguments[max_substrings_argument_position].column->getName(),
             max_substrings_argument_position + 1);
 
-    if (max_splits)
-        switch (max_substring_behavior)
-        {
-            case MaxSubstringBehavior::LikeClickHouse:
-            case MaxSubstringBehavior::LikeSpark:
-            {
-                if (*max_splits <= 0)
-                    return std::nullopt;
-                break;
-            }
-            case MaxSubstringBehavior::LikePython:
-            {
-                if (*max_splits < 0)
-                    return std::nullopt;
-                break;
-            }
-        }
-
+    if (*max_splits <= 0)
+        return std::nullopt;
 
     return max_splits;
 }
diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h
index e720fc96e52..d7d7e3b5100 100644
--- a/src/Functions/FunctionsStringArray.h
+++ b/src/Functions/FunctionsStringArray.h
@@ -54,14 +54,7 @@ namespace ErrorCodes
 
 using Pos = const char *;
 
-enum class MaxSubstringBehavior
-{
-    LikeClickHouse,
-    LikeSpark,
-    LikePython
-};
-
-std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position, MaxSubstringBehavior max_substring_behavior);
+std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position);
 
 /// Substring generators. All of them have a common interface.
 
@@ -72,7 +65,7 @@ private:
     Pos end;
     std::optional<size_t> max_splits;
     size_t splits;
-    MaxSubstringBehavior max_substring_behavior;
+    bool max_substrings_includes_remaining_string;
 
 public:
     static constexpr auto name = "alphaTokens";
@@ -97,10 +90,10 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
     {
-        max_substring_behavior = max_substring_behavior_;
-        max_splits = extractMaxSplits(arguments, 1, max_substring_behavior);
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 1);
     }
 
     /// Called for each next string.
@@ -125,35 +118,18 @@ public:
 
         if (max_splits)
         {
-            switch (max_substring_behavior)
+            if (max_substrings_includes_remaining_string)
             {
-                case MaxSubstringBehavior::LikeClickHouse:
+                if (splits == *max_splits - 1)
                 {
-                    if (splits == *max_splits)
-                        return false;
-                    break;
-                }
-                case MaxSubstringBehavior::LikeSpark:
-                {
-                    if (splits == *max_splits - 1)
-                    {
-                        token_end = end;
-                        pos = end;
-                        return true;
-                    }
-                    break;
-                }
-                case MaxSubstringBehavior::LikePython:
-                {
-                    if (splits == *max_splits)
-                    {
-                        token_end = end;
-                        pos = end;
-                        return true;
-                    }
-                    break;
+                    token_end = end;
+                    pos = end;
+                    return true;
                 }
             }
+            else
+                if (splits == *max_splits)
+                    return false;
         }
 
         while (pos < end && isAlphaASCII(*pos))
@@ -173,7 +149,7 @@ private:
     Pos end;
     std::optional<size_t> max_splits;
     size_t splits;
-    MaxSubstringBehavior max_substring_behavior;
+    bool max_substrings_includes_remaining_string;
 
 public:
     /// Get the name of the function.
@@ -190,10 +166,10 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
     {
-        max_substring_behavior = max_substring_behavior_;
-        max_splits = extractMaxSplits(arguments, 1, max_substring_behavior);
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 1);
     }
 
     /// Called for each next string.
@@ -218,35 +194,18 @@ public:
 
         if (max_splits)
         {
-            switch (max_substring_behavior)
+            if (max_substrings_includes_remaining_string)
             {
-                case MaxSubstringBehavior::LikeClickHouse:
+                if (splits == *max_splits - 1)
                 {
-                    if (splits == *max_splits)
-                        return false;
-                    break;
-                }
-                case MaxSubstringBehavior::LikeSpark:
-                {
-                    if (splits == *max_splits - 1)
-                    {
-                        token_end = end;
-                        pos = end;
-                        return true;
-                    }
-                    break;
-                }
-                case MaxSubstringBehavior::LikePython:
-                {
-                    if (splits == *max_splits)
-                    {
-                        token_end = end;
-                        pos = end;
-                        return true;
-                    }
-                    break;
+                    token_end = end;
+                    pos = end;
+                    return true;
                 }
             }
+            else
+                if (splits == *max_splits)
+                    return false;
         }
 
         while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
@@ -266,7 +225,7 @@ private:
     Pos end;
     std::optional<size_t> max_splits;
     size_t splits;
-    MaxSubstringBehavior max_substring_behavior;
+    bool max_substrings_includes_remaining_string;
 
 public:
     static constexpr auto name = "splitByWhitespace";
@@ -282,10 +241,10 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
     {
-        max_substring_behavior = max_substring_behavior_;
-        max_splits = extractMaxSplits(arguments, 1, max_substring_behavior);
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 1);
     }
 
     /// Called for each next string.
@@ -310,35 +269,18 @@ public:
 
         if (max_splits)
         {
-            switch (max_substring_behavior)
+            if (max_substrings_includes_remaining_string)
             {
-                case MaxSubstringBehavior::LikeClickHouse:
+                if (splits == *max_splits - 1)
                 {
-                    if (splits == *max_splits)
-                        return false;
-                    break;
-                }
-                case MaxSubstringBehavior::LikeSpark:
-                {
-                    if (splits == *max_splits - 1)
-                    {
-                        token_end = end;
-                        pos = end;
-                        return true;
-                    }
-                    break;
-                }
-                case MaxSubstringBehavior::LikePython:
-                {
-                    if (splits == *max_splits)
-                    {
-                        token_end = end;
-                        pos = end;
-                        return true;
-                    }
-                    break;
+                    token_end = end;
+                    pos = end;
+                    return true;
                 }
             }
+            else
+                if (splits == *max_splits)
+                    return false;
         }
 
         while (pos < end && !isWhitespaceASCII(*pos))
@@ -359,7 +301,7 @@ private:
     char separator;
     std::optional<size_t> max_splits;
     size_t splits;
-    MaxSubstringBehavior max_substring_behavior;
+    bool max_substrings_includes_remaining_string;
 
 public:
     static constexpr auto name = "splitByChar";
@@ -383,7 +325,7 @@ public:
 
     static constexpr auto strings_argument_position = 1uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
 
@@ -398,8 +340,8 @@ public:
 
         separator = sep_str[0];
 
-        max_substring_behavior = max_substring_behavior_;
-        max_splits = extractMaxSplits(arguments, 2, max_substring_behavior);
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 2);
     }
 
     void set(Pos pos_, Pos end_)
@@ -418,35 +360,18 @@ public:
 
         if (max_splits)
         {
-            switch (max_substring_behavior)
+            if (max_substrings_includes_remaining_string)
             {
-                case MaxSubstringBehavior::LikeClickHouse:
+                if (splits == *max_splits - 1)
                 {
-                    if (splits == *max_splits)
-                        return false;
-                    break;
-                }
-                case MaxSubstringBehavior::LikeSpark:
-                {
-                    if (splits == *max_splits - 1)
-                    {
-                        token_end = end;
-                        pos = nullptr;
-                        return true;
-                    }
-                    break;
-                }
-                case MaxSubstringBehavior::LikePython:
-                {
-                    if (splits == *max_splits)
-                    {
-                        token_end = end;
-                        pos = nullptr;
-                        return true;
-                    }
-                    break;
+                    token_end = end;
+                    pos = nullptr;
+                    return true;
                 }
             }
+            else
+               if (splits == *max_splits)
+                   return false;
         }
 
         pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
@@ -472,7 +397,7 @@ private:
     String separator;
     std::optional<size_t> max_splits;
     size_t splits;
-    MaxSubstringBehavior max_substring_behavior;
+    bool max_substrings_includes_remaining_string;
 
 public:
     static constexpr auto name = "splitByString";
@@ -487,7 +412,7 @@ public:
 
     static constexpr auto strings_argument_position = 1uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
 
@@ -497,8 +422,8 @@ public:
 
         separator = col->getValue<String>();
 
-        max_substring_behavior = max_substring_behavior_;
-        max_splits = extractMaxSplits(arguments, 2, max_substring_behavior);
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 2);
     }
 
     /// Called for each next string.
@@ -521,35 +446,18 @@ public:
 
             if (max_splits)
             {
-                switch (max_substring_behavior)
+                if (max_substrings_includes_remaining_string)
                 {
-                    case MaxSubstringBehavior::LikeClickHouse:
+                    if (splits == *max_splits - 1)
                     {
-                        if (splits == *max_splits)
-                            return false;
-                        break;
-                    }
-                    case MaxSubstringBehavior::LikeSpark:
-                    {
-                        if (splits == *max_splits - 1)
-                        {
-                            token_end = end;
-                            pos = end;
-                            return true;
-                        }
-                        break;
-                    }
-                    case MaxSubstringBehavior::LikePython:
-                    {
-                        if (splits == *max_splits)
-                        {
-                            token_end = end;
-                            pos = end;
-                            return true;
-                        }
-                        break;
+                        token_end = end;
+                        pos = end;
+                        return true;
                     }
                 }
+                else
+                    if (splits == *max_splits)
+                        return false;
             }
 
             pos += 1;
@@ -565,35 +473,18 @@ public:
 
             if (max_splits)
             {
-                switch (max_substring_behavior)
+                if (max_substrings_includes_remaining_string)
                 {
-                    case MaxSubstringBehavior::LikeClickHouse:
+                    if (splits == *max_splits - 1)
                     {
-                        if (splits == *max_splits)
-                            return false;
-                        break;
-                    }
-                    case MaxSubstringBehavior::LikeSpark:
-                    {
-                        if (splits == *max_splits - 1)
-                        {
-                            token_end = end;
-                            pos = nullptr;
-                            return true;
-                        }
-                        break;
-                    }
-                    case MaxSubstringBehavior::LikePython:
-                    {
-                        if (splits == *max_splits)
-                        {
-                            token_end = end;
-                            pos = nullptr;
-                            return true;
-                        }
-                        break;
+                        token_end = end;
+                        pos = nullptr;
+                        return true;
                     }
                 }
+                else
+                    if (splits == *max_splits)
+                        return false;
             }
 
             pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
@@ -622,7 +513,7 @@ private:
 
     std::optional<size_t> max_splits;
     size_t splits;
-    MaxSubstringBehavior max_substring_behavior;
+    bool max_substrings_includes_remaining_string;
 
 public:
     static constexpr auto name = "splitByRegexp";
@@ -638,7 +529,7 @@ public:
 
     static constexpr auto strings_argument_position = 1uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior max_substring_behavior_)
+    void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
 
@@ -649,8 +540,8 @@ public:
         if (!col->getValue<String>().empty())
             re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
 
-        max_substring_behavior = max_substring_behavior_;
-        max_splits = extractMaxSplits(arguments, 2, max_substring_behavior);
+        max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
+        max_splits = extractMaxSplits(arguments, 2);
     }
 
     /// Called for each next string.
@@ -673,35 +564,18 @@ public:
 
             if (max_splits)
             {
-                switch (max_substring_behavior)
+                if (max_substrings_includes_remaining_string)
                 {
-                    case MaxSubstringBehavior::LikeClickHouse:
+                    if (splits == *max_splits - 1)
                     {
-                        if (splits == *max_splits)
-                            return false;
-                        break;
-                    }
-                    case MaxSubstringBehavior::LikeSpark:
-                    {
-                        if (splits == *max_splits - 1)
-                        {
-                            token_end = end;
-                            pos = end;
-                            return true;
-                        }
-                        break;
-                    }
-                    case MaxSubstringBehavior::LikePython:
-                    {
-                        if (splits == *max_splits)
-                        {
-                            token_end = end;
-                            pos = end;
-                            return true;
-                        }
-                        break;
+                        token_end = end;
+                        pos = end;
+                        return true;
                     }
                 }
+                else
+                    if (splits == *max_splits)
+                        return false;
             }
 
             pos += 1;
@@ -717,35 +591,18 @@ public:
 
             if (max_splits)
             {
-                switch (max_substring_behavior)
+                if (max_substrings_includes_remaining_string)
                 {
-                    case MaxSubstringBehavior::LikeClickHouse:
+                    if (splits == *max_splits - 1)
                     {
-                        if (splits == *max_splits)
-                            return false;
-                        break;
-                    }
-                    case MaxSubstringBehavior::LikeSpark:
-                    {
-                        if (splits == *max_splits - 1)
-                        {
-                            token_end = end;
-                            pos = nullptr;
-                            return true;
-                        }
-                        break;
-                    }
-                    case MaxSubstringBehavior::LikePython:
-                    {
-                        if (splits == *max_splits)
-                        {
-                            token_end = end;
-                            pos = nullptr;
-                            return true;
-                        }
-                        break;
+                        token_end = end;
+                        pos = nullptr;
+                        return true;
                     }
                 }
+                else
+                    if (splits == *max_splits)
+                        return false;
             }
 
             if (!re->match(pos, end - pos, matches) || !matches[0].length)
@@ -792,7 +649,7 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & arguments, MaxSubstringBehavior /*max_substring_behavior*/)
+    void init(const ColumnsWithTypeAndName & arguments, bool /*max_substrings_includes_remaining_string*/)
     {
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
 
@@ -845,7 +702,7 @@ template <typename Generator>
 class FunctionTokens : public IFunction
 {
 private:
-    MaxSubstringBehavior max_substring_behavior;
+    bool max_substrings_includes_remaining_string;
 
 public:
     static constexpr auto name = Generator::name;
@@ -854,17 +711,7 @@ public:
     explicit FunctionTokens<Generator>(ContextPtr context)
     {
         const Settings & settings = context->getSettingsRef();
-        if (settings.splitby_max_substring_behavior.value == "")
-            max_substring_behavior = MaxSubstringBehavior::LikeClickHouse;
-        else if (settings.splitby_max_substring_behavior.value == "python")
-            max_substring_behavior = MaxSubstringBehavior::LikePython;
-        else if (settings.splitby_max_substring_behavior.value == "spark")
-            max_substring_behavior = MaxSubstringBehavior::LikeSpark;
-        else
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal value {} for setting splitby_max_substring_behavior in function {}, must be '', 'python' or 'spark'",
-                settings.splitby_max_substring_behavior.value, getName());
+        max_substrings_includes_remaining_string = settings.splitby_max_substrings_includes_remaining_string;
     }
 
     String getName() const override { return name; }
@@ -885,7 +732,7 @@ public:
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
     {
         Generator generator;
-        generator.init(arguments, max_substring_behavior);
+        generator.init(arguments, max_substrings_includes_remaining_string);
 
         const auto & array_argument = arguments[generator.strings_argument_position];
 
diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp
index 260053dc401..96b64d3182b 100644
--- a/src/Functions/URL/URLHierarchy.cpp
+++ b/src/Functions/URL/URLHierarchy.cpp
@@ -30,7 +30,7 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/, MaxSubstringBehavior /*max_substring_behavior*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp
index a11be358a70..7fd6601d780 100644
--- a/src/Functions/URL/URLPathHierarchy.cpp
+++ b/src/Functions/URL/URLPathHierarchy.cpp
@@ -29,7 +29,7 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/, MaxSubstringBehavior /*max_substring_behavior*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substring_behavior*/) {}
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp
index 2b79be07cae..b792d9140d6 100644
--- a/src/Functions/URL/extractURLParameterNames.cpp
+++ b/src/Functions/URL/extractURLParameterNames.cpp
@@ -29,7 +29,7 @@ public:
 
     static constexpr auto strings_argument_position = 0uz;
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/, MaxSubstringBehavior /*max_substring_behavior*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
 
     /// Called for each next string.
     void set(Pos pos_, Pos end_)
diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp
index 271e5dc89c9..e1243d8fbcd 100644
--- a/src/Functions/URL/extractURLParameters.cpp
+++ b/src/Functions/URL/extractURLParameters.cpp
@@ -27,7 +27,7 @@ public:
         validateFunctionArgumentTypes(func, arguments, mandatory_args);
     }
 
-    void init(const ColumnsWithTypeAndName & /*arguments*/, MaxSubstringBehavior /*max_substring_behavior*/) {}
+    void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
 
     static constexpr auto strings_argument_position = 0uz;
 
diff --git a/tests/queries/0_stateless/02475_split_with_max_substrings.reference b/tests/queries/0_stateless/02475_split_with_max_substrings.reference
index d55ef45a5e0..904441f83fa 100644
--- a/tests/queries/0_stateless/02475_split_with_max_substrings.reference
+++ b/tests/queries/0_stateless/02475_split_with_max_substrings.reference
@@ -1,44 +1,160 @@
-['1','2','3']
-['1','2','3']
-['1','2','3']
-['1']
-['1','2']
-['1','2','3']
-['1','2','3']
-['one','two','three','']
-['one','two','three','']
-['one','two','three','']
-['one']
-['one','two']
-['one','two','three']
-['one','two','three','']
-['one','two','three','']
-['abca','abc']
-['abca','abc']
-['abca','abc']
-['abca']
-['abca','abc']
-['abca','abc']
-['abca','abc']
-['1','a','b']
-['1','a','b']
-['1','a','b']
-['1']
-['1','a']
-['1','a','b']
-['1','a','b']
-['1!','a,','b.']
-['1!','a,','b.']
-['1!','a,','b.']
-['1!']
-['1!','a,']
-['1!','a,','b.']
-['1!','a,','b.']
-['1','2 3','4,5','abcde']
-['1','2 3','4,5','abcde']
-['1','2 3','4,5','abcde']
-['1']
-['1','2 3']
-['1','2 3','4,5']
-['1','2 3','4,5','abcde']
-['1','2 3','4,5','abcde']
+-- negative tests
+-- splitByChar
+-- (default)
+['a','','b','c','d']
+['a','','b','c','d']
+['a','','b','c','d']
+['a']
+['a','']
+['a','','b']
+['a','','b','c']
+['a','','b','c','d']
+['a','','b','c','d']
+-- (include remainder)
+['a','','b','c','d']
+['a','','b','c','d']
+['a','','b','c','d']
+['a==b=c=d']
+['a','=b=c=d']
+['a','','b=c=d']
+['a','','b','c=d']
+['a','','b','c','d']
+['a','','b','c','d']
+-- splitByString
+-- (default)
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a']
+['a','=']
+['a','=','=']
+['a','=','=','b']
+['a','=','=','b','=']
+['a','=','=','b','=','c']
+['a','=','=','b','=','c','=']
+['a','=','=','b','=','c','=']
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a','','b','c','d']
+['a','','b','c','d']
+['a','','b','c','d']
+['a']
+['a','']
+['a','','b']
+['a','','b','c']
+['a','','b','c','d']
+['a','','b','c','d']
+-- (include remainder)
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a==b=c=d']
+['a','==b=c=d']
+['a','=','=b=c=d']
+['a','=','=','b=c=d']
+['a','=','=','b','=c=d']
+['a','=','=','b','=','c=d']
+['a','=','=','b','=','c','=d']
+['a','=','=','b','=','c','=','d']
+['a','=','=','b','=','c','=','d']
+['a','','b','c','d']
+['a','','b','c','d']
+['a','','b','c','d']
+['a==b=c=d']
+['a','=b=c=d']
+['a','','b=c=d']
+['a','','b','c=d']
+['a','','b','c','d']
+['a','','b','c','d']
+-- splitByRegexp
+-- (default)
+['a','bc','de','f']
+['a','bc','de','f']
+['a','bc','de','f']
+['a']
+['a','bc']
+['a','bc','de']
+['a','bc','de','f']
+['a','bc','de','f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a']
+['a','1']
+['a','1','2']
+['a','1','2','b']
+['a','1','2','b','c']
+-- (include remainder)
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a','1','2','b','c','2','3','d','e','3','4','5','f']
+['a12bc23de345f']
+['a','12bc23de345f']
+['a','1','2bc23de345f']
+['a','1','2','bc23de345f']
+['a','1','2','b','c23de345f']
+['a','bc','de','f']
+['a','bc','de','f']
+['a','bc','de','f']
+['a12bc23de345f']
+['a','bc23de345f']
+['a','bc','de345f']
+['a','bc','de','f']
+['a','bc','de','f']
+-- splitByAlpha
+-- (default)
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+['ab']
+['ab','cd']
+['ab','cd','ef']
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+-- (include remainder)
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+['ab.cd.ef.gh']
+['ab','cd.ef.gh']
+['ab','cd','ef.gh']
+['ab','cd','ef','gh']
+['ab','cd','ef','gh']
+-- splitByNonAlpha
+-- (default)
+['128','0','0','1']
+['128','0','0','1']
+['128','0','0','1']
+['128']
+['128','0']
+['128','0','0']
+['128','0','0','1']
+['128','0','0','1']
+-- (include remainder)
+['128','0','0','1']
+['128','0','0','1']
+['128','0','0','1']
+['128.0.0.1']
+['128','0.0.1']
+['128','0','0.1']
+['128','0','0','1']
+['128','0','0','1']
+-- splitByWhitespace
+-- (default)
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein,']
+['Nein,','nein,']
+['Nein,','nein,','nein!']
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
+-- (include remainder)
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein, nein, nein! Doch!']
+['Nein,','nein, nein! Doch!']
+['Nein,','nein,','nein! Doch!']
+['Nein,','nein,','nein!','Doch!']
+['Nein,','nein,','nein!','Doch!']
diff --git a/tests/queries/0_stateless/02475_split_with_max_substrings.sql b/tests/queries/0_stateless/02475_split_with_max_substrings.sql
index c51133c604e..3f367c75433 100644
--- a/tests/queries/0_stateless/02475_split_with_max_substrings.sql
+++ b/tests/queries/0_stateless/02475_split_with_max_substrings.sql
@@ -1,59 +1,175 @@
-select splitByChar(',', '1,2,3');
-select splitByChar(',', '1,2,3', -1);
-select splitByChar(',', '1,2,3', 0);
-select splitByChar(',', '1,2,3', 1);
-select splitByChar(',', '1,2,3', 2);
-select splitByChar(',', '1,2,3', 3);
-select splitByChar(',', '1,2,3', 4);
-
-select splitByRegexp('[ABC]', 'oneAtwoBthreeC');
-select splitByRegexp('[ABC]', 'oneAtwoBthreeC', -1);
-select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 0);
-select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 1);
-select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 2);
-select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 3);
-select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 4);
-select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 5);
-
-SELECT alphaTokens('abca1abc');
-SELECT alphaTokens('abca1abc', -1);
-SELECT alphaTokens('abca1abc', 0);
-SELECT alphaTokens('abca1abc', 1);
-SELECT alphaTokens('abca1abc', 2);
-SELECT alphaTokens('abca1abc', 3);
-
-SELECT splitByAlpha('abca1abc');
-
-SELECT splitByNonAlpha('  1!  a,  b.  ');
-SELECT splitByNonAlpha('  1!  a,  b.  ', -1);
-SELECT splitByNonAlpha('  1!  a,  b.  ',  0);
-SELECT splitByNonAlpha('  1!  a,  b.  ',  1);
-SELECT splitByNonAlpha('  1!  a,  b.  ',  2);
-SELECT splitByNonAlpha('  1!  a,  b.  ',  3);
-SELECT splitByNonAlpha('  1!  a,  b.  ',  4);
-
-SELECT splitByWhitespace('  1!  a,  b.  ');
-SELECT splitByWhitespace('  1!  a,  b.  ', -1);
-SELECT splitByWhitespace('  1!  a,  b.  ', 0);
-SELECT splitByWhitespace('  1!  a,  b.  ', 1);
-SELECT splitByWhitespace('  1!  a,  b.  ', 2);
-SELECT splitByWhitespace('  1!  a,  b.  ', 3);
-SELECT splitByWhitespace('  1!  a,  b.  ', 4);
-
-SELECT splitByString(', ', '1, 2 3, 4,5, abcde');
-SELECT splitByString(', ', '1, 2 3, 4,5, abcde', -1);
-SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 0);
-SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 1);
-SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 2);
-SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 3);
-SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 4);
-SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 5);
-
-
-select splitByChar(',', '1,2,3', ''); -- { serverError 43 }
-select splitByRegexp('[ABC]', 'oneAtwoBthreeC', ''); -- { serverError 43 }
+SELECT '-- negative tests';
+SELECT splitByChar(',', '1,2,3', ''); -- { serverError 43 }
+SELECT splitByRegexp('[ABC]', 'oneAtwoBthreeC', ''); -- { serverError 43 }
 SELECT alphaTokens('abca1abc', ''); -- { serverError 43 }
 SELECT splitByAlpha('abca1abc', ''); -- { serverError 43 }
 SELECT splitByNonAlpha('  1!  a,  b.  ',  ''); -- { serverError 43 }
 SELECT splitByWhitespace('  1!  a,  b.  ', ''); -- { serverError 43 }
-SELECT splitByString(', ', '1, 2 3, 4,5, abcde', ''); -- { serverError 43 }
\ No newline at end of file
+SELECT splitByString(', ', '1, 2 3, 4,5, abcde', ''); -- { serverError 43 }
+
+SELECT '-- splitByChar';
+SELECT '-- (default)';
+SELECT splitByChar('=', 'a==b=c=d');
+SELECT splitByChar('=', 'a==b=c=d', -1);
+SELECT splitByChar('=', 'a==b=c=d', 0);
+SELECT splitByChar('=', 'a==b=c=d', 1);
+SELECT splitByChar('=', 'a==b=c=d', 2);
+SELECT splitByChar('=', 'a==b=c=d', 3);
+SELECT splitByChar('=', 'a==b=c=d', 4);
+SELECT splitByChar('=', 'a==b=c=d', 5);
+SELECT splitByChar('=', 'a==b=c=d', 6);
+SELECT '-- (include remainder)';
+SELECT splitByChar('=', 'a==b=c=d') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByChar('=', 'a==b=c=d', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByChar('=', 'a==b=c=d', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByChar('=', 'a==b=c=d', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByChar('=', 'a==b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByChar('=', 'a==b=c=d', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByChar('=', 'a==b=c=d', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByChar('=', 'a==b=c=d', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByChar('=', 'a==b=c=d', 6) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+
+SELECT '-- splitByString';
+SELECT '-- (default)';
+SELECT splitByString('', 'a==b=c=d') SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 6) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 7) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 7) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 8) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('', 'a==b=c=d', 9) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('=', 'a==b=c=d') SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('=', 'a==b=c=d', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('=', 'a==b=c=d', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('=', 'a==b=c=d', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('=', 'a==b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('=', 'a==b=c=d', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('=', 'a==b=c=d', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('=', 'a==b=c=d', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT splitByString('=', 'a==b=c=d', 6) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
+SELECT '-- (include remainder)';
+SELECT splitByString('', 'a==b=c=d') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', 6) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', 7) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', 8) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('', 'a==b=c=d', 9) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('=', 'a==b=c=d') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('=', 'a==b=c=d', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('=', 'a==b=c=d', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('=', 'a==b=c=d', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('=', 'a==b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('=', 'a==b=c=d', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('=', 'a==b=c=d', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('=', 'a==b=c=d', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByString('=', 'a==b=c=d', 6) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+
+
+SELECT '-- splitByRegexp';
+SELECT '-- (default)';
+SELECT splitByRegexp('\\d+', 'a12bc23de345f');
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', -1);
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 0);
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 1);
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2);
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 3);
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 4);
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 5);
+SELECT splitByRegexp('', 'a12bc23de345f');
+SELECT splitByRegexp('', 'a12bc23de345f', -1);
+SELECT splitByRegexp('', 'a12bc23de345f', 0);
+SELECT splitByRegexp('', 'a12bc23de345f', 1);
+SELECT splitByRegexp('', 'a12bc23de345f', 2);
+SELECT splitByRegexp('', 'a12bc23de345f', 3);
+SELECT splitByRegexp('', 'a12bc23de345f', 4);
+SELECT splitByRegexp('', 'a12bc23de345f', 5);
+SELECT '-- (include remainder)';
+SELECT splitByRegexp('', 'a12bc23de345f') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('', 'a12bc23de345f', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('', 'a12bc23de345f', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('', 'a12bc23de345f', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('', 'a12bc23de345f', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('', 'a12bc23de345f', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('', 'a12bc23de345f', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('\\d+', 'a12bc23de345f') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByRegexp('\\d+', 'a12bc23de345f', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+
+SELECT '-- splitByAlpha';
+SELECT '-- (default)';
+SELECT splitByAlpha('ab.cd.ef.gh');
+SELECT splitByAlpha('ab.cd.ef.gh', -1);
+SELECT splitByAlpha('ab.cd.ef.gh', 0);
+SELECT splitByAlpha('ab.cd.ef.gh', 1);
+SELECT splitByAlpha('ab.cd.ef.gh', 2);
+SELECT splitByAlpha('ab.cd.ef.gh', 3);
+SELECT splitByAlpha('ab.cd.ef.gh', 4);
+SELECT splitByAlpha('ab.cd.ef.gh', 5);
+SELECT '-- (include remainder)';
+SELECT splitByAlpha('ab.cd.ef.gh') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByAlpha('ab.cd.ef.gh', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByAlpha('ab.cd.ef.gh', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByAlpha('ab.cd.ef.gh', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByAlpha('ab.cd.ef.gh', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByAlpha('ab.cd.ef.gh', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByAlpha('ab.cd.ef.gh', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByAlpha('ab.cd.ef.gh', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+
+SELECT '-- splitByNonAlpha';
+SELECT '-- (default)';
+SELECT splitByNonAlpha('128.0.0.1');
+SELECT splitByNonAlpha('128.0.0.1', -1);
+SELECT splitByNonAlpha('128.0.0.1', 0);
+SELECT splitByNonAlpha('128.0.0.1', 1);
+SELECT splitByNonAlpha('128.0.0.1', 2);
+SELECT splitByNonAlpha('128.0.0.1', 3);
+SELECT splitByNonAlpha('128.0.0.1', 4);
+SELECT splitByNonAlpha('128.0.0.1', 5);
+SELECT '-- (include remainder)';
+SELECT splitByNonAlpha('128.0.0.1') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByNonAlpha('128.0.0.1', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByNonAlpha('128.0.0.1', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByNonAlpha('128.0.0.1', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByNonAlpha('128.0.0.1', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByNonAlpha('128.0.0.1', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByNonAlpha('128.0.0.1', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByNonAlpha('128.0.0.1', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+--
+--
+SELECT '-- splitByWhitespace';
+SELECT '-- (default)';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!');
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', -1);
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 0);
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 1);
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2);
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 3);
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 4);
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 5);
+SELECT '-- (include remainder)';
+SELECT splitByWhitespace('Nein, nein, nein! Doch!') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
+SELECT splitByWhitespace('Nein, nein, nein! Doch!', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
diff --git a/tests/queries/0_stateless/02876_splitby_max_substring_behavior.reference b/tests/queries/0_stateless/02876_splitby_max_substring_behavior.reference
deleted file mode 100644
index 9966c7d090e..00000000000
--- a/tests/queries/0_stateless/02876_splitby_max_substring_behavior.reference
+++ /dev/null
@@ -1,126 +0,0 @@
--- splitByAlpha
-['ab','cd','ef','gh']
-['ab','cd','ef','gh']
-['ab','cd','ef','gh']
-['ab']
-['ab','cd']
-['ab','cd','ef','gh']
-['ab','cd','ef','gh']
-['ab.cd.ef.gh']
-['ab','cd.ef.gh']
-['ab','cd','ef.gh']
-['ab','cd','ef','gh']
-['ab','cd','ef','gh']
-['ab','cd','ef','gh']
-['ab.cd.ef.gh']
-['ab','cd.ef.gh']
--- splitByNonAlpha
-['128','0','0','1']
-['128','0','0','1']
-['128','0','0','1']
-['128']
-['128','0']
-['128','0','0','1']
-['128','0','0','1']
-['128.0.0.1']
-['128','0.0.1']
-['128','0','0.1']
-['128','0','0','1']
-['128','0','0','1']
-['128','0','0','1']
-['128.0.0.1']
-['128','0.0.1']
--- splitByWhitespace
-['Nein,','nein,','nein!','Doch!']
-['Nein,','nein,','nein!','Doch!']
-['Nein,','nein,','nein!','Doch!']
-['Nein,']
-['Nein,','nein,']
-['Nein,','nein,','nein!','Doch!']
-['Nein,','nein,','nein!','Doch!']
-['Nein, nein, nein! Doch!']
-['Nein,','nein, nein! Doch!']
-['Nein,','nein,','nein! Doch!']
-['Nein,','nein,','nein!','Doch!']
-['Nein,','nein,','nein!','Doch!']
-['Nein,','nein,','nein!','Doch!']
-['Nein, nein, nein! Doch!']
-['Nein,','nein, nein! Doch!']
--- splitByChar
-['a','','b','c','d']
-['a','','b','c','d']
-['a','','b','c','d']
-['a']
-['a','']
-['a','','b','c','d']
-['a','','b','c','d']
-['a==b=c=d']
-['a','=b=c=d']
-['a','','b=c=d']
-['a','','b','c','d']
-['a','','b','c','d']
-['a','','b','c','d']
-['a==b=c=d']
-['a','=b=c=d']
--- splitByString
-['a','b=c=d']
-['a','b=c=d']
-['a','b=c=d']
-['a']
-['a','b=c=d']
-['a','b=c=d']
-['a','b=c=d']
-['a==b=c=d']
-['a','b=c=d']
-['a','b=c=d']
-['a','b=c=d']
-['a','b=c=d']
-['a','b=c=d']
-['a==b=c=d']
-['a','b=c=d']
-['a','=','=','b','=','c','=','d']
-['a','=','=','b','=','c','=','d']
-['a','=','=','b','=','c','=','d']
-['a']
-['a','=']
-['a','=','=','b','=','c','=','d']
-['a','=','=','b','=','c','=','d']
-['a==b=c=d']
-['a','==b=c=d']
-['a','=','=b=c=d']
-['a','=','=','b','=','c','=','d']
-['a','=','=','b','=','c','=','d']
-['a','=','=','b','=','c','=','d']
-['a==b=c=d']
-['a','==b=c=d']
--- splitByRegexp
-['a','bc','de','f']
-['a','bc','de','f']
-['a','bc','de','f']
-['a']
-['a','bc']
-['a','bc','de','f']
-['a','bc','de','f']
-['a12bc23de345f']
-['a','bc23de345f']
-['a','bc','de345f']
-['a','bc','de','f']
-['a','bc','de','f']
-['a','bc','de','f']
-['a12bc23de345f']
-['a','bc23de345f']
-['a','1','2','b','c','2','3','d','e','3','4','5','f']
-['a','1','2','b','c','2','3','d','e','3','4','5','f']
-['a','1','2','b','c','2','3','d','e','3','4','5','f']
-['a']
-['a','1']
-['a','1','2','b','c','2','3','d','e','3','4','5','f']
-['a','1','2','b','c','2','3','d','e','3','4','5','f']
-['a12bc23de345f']
-['a','12bc23de345f']
-['a','1','2bc23de345f']
-['a','1','2','b','c','2','3','d','e','3','4','5','f']
-['a','1','2','b','c','2','3','d','e','3','4','5','f']
-['a','1','2','b','c','2','3','d','e','3','4','5','f']
-['a12bc23de345f']
-['a','12bc23de345f']
diff --git a/tests/queries/0_stateless/02876_splitby_max_substring_behavior.sql b/tests/queries/0_stateless/02876_splitby_max_substring_behavior.sql
deleted file mode 100644
index 1dcad65f09b..00000000000
--- a/tests/queries/0_stateless/02876_splitby_max_substring_behavior.sql
+++ /dev/null
@@ -1,151 +0,0 @@
-SELECT '-- splitByAlpha';
-SELECT splitByAlpha('ab.cd.ef.gh') SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByAlpha('ab.cd.ef.gh', -1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByAlpha('ab.cd.ef.gh', 0) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByAlpha('ab.cd.ef.gh', 1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByAlpha('ab.cd.ef.gh', 2) SETTINGS splitby_max_substring_behavior = '';
-
-SELECT splitByAlpha('ab.cd.ef.gh') SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByAlpha('ab.cd.ef.gh', -1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByAlpha('ab.cd.ef.gh', 0) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByAlpha('ab.cd.ef.gh', 1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByAlpha('ab.cd.ef.gh', 2) SETTINGS splitby_max_substring_behavior = 'python';
-
-SELECT splitByAlpha('ab.cd.ef.gh') SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByAlpha('ab.cd.ef.gh', -1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByAlpha('ab.cd.ef.gh', 0) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByAlpha('ab.cd.ef.gh', 1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByAlpha('ab.cd.ef.gh', 2) SETTINGS splitby_max_substring_behavior = 'spark';
-
-SELECT '-- splitByNonAlpha';
-SELECT splitByNonAlpha('128.0.0.1') SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByNonAlpha('128.0.0.1', -1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByNonAlpha('128.0.0.1', 0) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByNonAlpha('128.0.0.1', 1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByNonAlpha('128.0.0.1', 2) SETTINGS splitby_max_substring_behavior = '';
-
-SELECT splitByNonAlpha('128.0.0.1') SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByNonAlpha('128.0.0.1', -1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByNonAlpha('128.0.0.1', 0) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByNonAlpha('128.0.0.1', 1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByNonAlpha('128.0.0.1', 2) SETTINGS splitby_max_substring_behavior = 'python';
-
-SELECT splitByNonAlpha('128.0.0.1') SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByNonAlpha('128.0.0.1', -1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByNonAlpha('128.0.0.1', 0) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByNonAlpha('128.0.0.1', 1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByNonAlpha('128.0.0.1', 2) SETTINGS splitby_max_substring_behavior = 'spark';
-
-SELECT '-- splitByWhitespace';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!') SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', -1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 0) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) SETTINGS splitby_max_substring_behavior = '';
-
-SELECT splitByWhitespace('Nein, nein, nein! Doch!') SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', -1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 0) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) SETTINGS splitby_max_substring_behavior = 'python';
-
-SELECT splitByWhitespace('Nein, nein, nein! Doch!') SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', -1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 0) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) SETTINGS splitby_max_substring_behavior = 'spark';
-
-SELECT '-- splitByChar';
-SELECT splitByChar('=', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByChar('=', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByChar('=', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByChar('=', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByChar('=', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = '';
-
-SELECT splitByChar('=', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByChar('=', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByChar('=', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByChar('=', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByChar('=', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'python';
-
-SELECT splitByChar('=', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByChar('=', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByChar('=', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByChar('=', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByChar('=', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'spark';
-
-SELECT '-- splitByString';
-
-SELECT splitByString('==', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByString('==', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByString('==', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByString('==', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByString('==', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = '';
-
-SELECT splitByString('==', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByString('==', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByString('==', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByString('==', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByString('==', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'python';
-
-SELECT splitByString('==', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByString('==', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByString('==', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByString('==', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByString('==', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'spark';
-
-SELECT splitByString('', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByString('', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByString('', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByString('', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByString('', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = '';
-
-SELECT splitByString('', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByString('', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByString('', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByString('', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByString('', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'python';
-
-SELECT splitByString('', 'a==b=c=d') SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByString('', 'a==b=c=d', -1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByString('', 'a==b=c=d', 0) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByString('', 'a==b=c=d', 1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByString('', 'a==b=c=d', 2) SETTINGS splitby_max_substring_behavior = 'spark';
-
-SELECT '-- splitByRegexp';
-
-SELECT splitByRegexp('\\d+', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = '';
-
-SELECT splitByRegexp('\\d+', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = 'python';
-
-SELECT splitByRegexp('\\d+', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = 'spark';
-
-SELECT splitByRegexp('', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByRegexp('', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByRegexp('', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByRegexp('', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = '';
-SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = '';
-
-SELECT splitByRegexp('', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByRegexp('', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByRegexp('', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByRegexp('', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = 'python';
-SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = 'python';
-
-SELECT splitByRegexp('', 'a12bc23de345f') SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByRegexp('', 'a12bc23de345f', -1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByRegexp('', 'a12bc23de345f', 0) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByRegexp('', 'a12bc23de345f', 1) SETTINGS splitby_max_substring_behavior = 'spark';
-SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS splitby_max_substring_behavior = 'spark';

From 003f5f77eb22731b86a316a2ab133fbfb920a84a Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 19 Sep 2023 15:01:28 +0200
Subject: [PATCH 124/243] refine error code of duplicated index in create query

---
 src/Interpreters/InterpreterCreateQuery.cpp            |  4 ++++
 .../0_stateless/02884_duplicate_index_name.reference   |  0
 .../queries/0_stateless/02884_duplicate_index_name.sql | 10 ++++++++++
 3 files changed, 14 insertions(+)
 create mode 100644 tests/queries/0_stateless/02884_duplicate_index_name.reference
 create mode 100644 tests/queries/0_stateless/02884_duplicate_index_name.sql

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 1cd34c2a0f6..b426025413c 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -95,6 +95,7 @@ namespace ErrorCodes
     extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY;
     extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE;
     extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_INDEX;
     extern const int LOGICAL_ERROR;
     extern const int UNKNOWN_DATABASE;
     extern const int PATH_ACCESS_DENIED;
@@ -697,6 +698,8 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
             for (const auto & index : create.columns_list->indices->children)
             {
                 IndexDescription index_desc = IndexDescription::getIndexFromAST(index->clone(), properties.columns, getContext());
+                if (properties.indices.has(index_desc.name))
+                    throw Exception(ErrorCodes::ILLEGAL_INDEX, "Duplicated index name {}", backQuoteIfNeed(index_desc.name));
                 const auto & settings = getContext()->getSettingsRef();
                 if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index)
                 {
@@ -711,6 +714,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
 
                 properties.indices.push_back(index_desc);
             }
+
         if (create.columns_list->projections)
             for (const auto & projection_ast : create.columns_list->projections->children)
             {
diff --git a/tests/queries/0_stateless/02884_duplicate_index_name.reference b/tests/queries/0_stateless/02884_duplicate_index_name.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02884_duplicate_index_name.sql b/tests/queries/0_stateless/02884_duplicate_index_name.sql
new file mode 100644
index 00000000000..4647ab3702b
--- /dev/null
+++ b/tests/queries/0_stateless/02884_duplicate_index_name.sql
@@ -0,0 +1,10 @@
+DROP TABLE IF EXISTS test_dup_index;
+
+CREATE TABLE test_dup_index
+(
+	a Int64,
+	b Int64,
+	INDEX idx_a a TYPE minmax,
+	INDEX idx_a b TYPE minmax
+) Engine = MergeTree()
+ORDER BY a; -- { serverError 127 }

From 3e4eefc700d48284cd3c0bf788914dbcf7fd559a Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 19 Sep 2023 15:56:46 +0200
Subject: [PATCH 125/243] Update
 tests/queries/0_stateless/02884_duplicate_index_name.sql

Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
---
 tests/queries/0_stateless/02884_duplicate_index_name.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02884_duplicate_index_name.sql b/tests/queries/0_stateless/02884_duplicate_index_name.sql
index 4647ab3702b..4cd9ae6d2a2 100644
--- a/tests/queries/0_stateless/02884_duplicate_index_name.sql
+++ b/tests/queries/0_stateless/02884_duplicate_index_name.sql
@@ -7,4 +7,4 @@ CREATE TABLE test_dup_index
 	INDEX idx_a a TYPE minmax,
 	INDEX idx_a b TYPE minmax
 ) Engine = MergeTree()
-ORDER BY a; -- { serverError 127 }
+ORDER BY a; -- { serverError ILLEGAL_INDEX }

From c66b60f00f851670a36789a45254a4c07ee85e47 Mon Sep 17 00:00:00 2001
From: HarryLeeIBM <hleeatwork@outlook.com>
Date: Tue, 19 Sep 2023 07:12:14 -0700
Subject: [PATCH 126/243] Fix SimHash function issue for s390x

---
 src/Functions/FunctionsStringHash.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp
index ff8ff2d2651..0bf6e39e651 100644
--- a/src/Functions/FunctionsStringHash.cpp
+++ b/src/Functions/FunctionsStringHash.cpp
@@ -18,6 +18,10 @@
 #include "vec_crc32.h"
 #endif
 
+#if defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+#include <crc32-s390x.h>
+#endif
+
 namespace DB
 {
 
@@ -43,7 +47,7 @@ struct Hash
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
-        return s390x_crc32(crc, val);
+        return crc32c_le(static_cast<UInt32>(crc), reinterpret_cast<unsigned char *>(&val), sizeof(val));
 #else
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "String hash is not implemented without sse4.2 support");
 #endif
@@ -58,7 +62,7 @@ struct Hash
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
-        return s390x_crc32_u32(crc, val);
+        return crc32c_le(static_cast<UInt32>(crc), reinterpret_cast<unsigned char *>(&val), sizeof(val));
 #else
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "String hash is not implemented without sse4.2 support");
 #endif
@@ -73,7 +77,7 @@ struct Hash
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
-        return s390x_crc32_u16(crc, val);
+        return crc32c_le(static_cast<UInt32>(crc), reinterpret_cast<unsigned char *>(&val), sizeof(val));
 #else
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "String hash is not implemented without sse4.2 support");
 #endif
@@ -88,7 +92,7 @@ struct Hash
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
-        return s390x_crc32_u8(crc, val);
+        return crc32c_le(static_cast<UInt32>(crc), reinterpret_cast<unsigned char *>(&val), sizeof(val));
 #else
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "String hash is not implemented without sse4.2 support");
 #endif

From 27d8eefe9376ba73ee8a898ec674d324b1a91d7c Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 19 Sep 2023 17:14:32 +0300
Subject: [PATCH 127/243] Fixed tests

---
 src/DataTypes/Utils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/DataTypes/Utils.cpp b/src/DataTypes/Utils.cpp
index 0168c6d256c..1c36b2dfc9d 100644
--- a/src/DataTypes/Utils.cpp
+++ b/src/DataTypes/Utils.cpp
@@ -15,10 +15,10 @@ bool canBeSafelyCasted(const DataTypePtr & from_type, const DataTypePtr & to_typ
     bool to_type_was_nullable = isNullableOrLowCardinalityNullable(to_type);
     auto to_type_unwrapped = removeNullable(removeLowCardinality(to_type));
 
-    if (from_type->equals(*to_type))
+    if (from_type->equals(*to_type_unwrapped))
         return true;
 
-    auto to_which_type = WhichDataType(to_type->getTypeId());
+    auto to_which_type = WhichDataType(to_type_unwrapped->getTypeId());
 
     switch (from_which_type.idx)
     {

From 51b95abc2b9f4e8577d4376129b5c0c7f6b3fbca Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 19 Sep 2023 17:19:11 +0200
Subject: [PATCH 128/243] Update Settings.h (#54754)

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 7a20c685b09..186d90d9f5d 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -796,7 +796,7 @@ class IColumn;
     M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
     M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
     M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
-    M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \
+    M(Bool, allow_experimental_undrop_table_query, true, "Allow to use undrop query to restore dropped table in a limited time", 0) \
     M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
     M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
     M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \

From b1b49f430af91868d9da8cb0a34aa5fa58e0bc3b Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 19 Sep 2023 15:32:58 +0000
Subject: [PATCH 129/243] review suggestions

---
 docker/images.json               |  1 +
 tests/ci/libfuzzer_test_check.py | 10 ++++------
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/docker/images.json b/docker/images.json
index bddfd49ea3b..d208ee9a888 100644
--- a/docker/images.json
+++ b/docker/images.json
@@ -125,6 +125,7 @@
          "name": "clickhouse/test-base",
          "dependent": [
             "docker/test/fuzzer",
+            "docker/test/libfuzzer",
             "docker/test/integration/base",
             "docker/test/keeper-jepsen",
             "docker/test/server-jepsen",
diff --git a/tests/ci/libfuzzer_test_check.py b/tests/ci/libfuzzer_test_check.py
index 8d307b22042..e768b7f1b4e 100644
--- a/tests/ci/libfuzzer_test_check.py
+++ b/tests/ci/libfuzzer_test_check.py
@@ -139,9 +139,8 @@ def main():
 
     docker_image = get_image_with_version(reports_path, "clickhouse/libfuzzer")
 
-    fuzzers_path = Path(temp_path) / "fuzzers"
-    if not fuzzers_path.exists():
-        os.makedirs(fuzzers_path)
+    fuzzers_path = temp_path / "fuzzers"
+    fuzzers_path.mkdir(parents=True, exist_ok=True)
 
     download_fuzzers(check_name, reports_path, fuzzers_path)
 
@@ -152,9 +151,8 @@ def main():
             corpus_path = fuzzers_path / (file.removesuffix("_seed_corpus.zip") + ".in")
             zipfile.ZipFile(fuzzers_path / file, "r").extractall(corpus_path)
 
-    result_path = Path(temp_path) / "result_path"
-    if not result_path.exists():
-        os.makedirs(result_path)
+    result_path = temp_path / "result_path"
+    result_path.mkdir(parents=True, exist_ok=True)
 
     run_log_path = result_path / "run.log"
 

From a15029ecb593e218cabeb3ae2af5d2afa3f22c6e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 19 Sep 2023 16:29:02 +0000
Subject: [PATCH 130/243] Query Cache: Reject queries with non-deterministic
 functions by default

https://github.com/ClickHouse/support-escalation/issues/963
---
 docs/en/operations/query-cache.md                  |  4 ++--
 src/Common/ErrorCodes.cpp                          |  1 +
 src/Interpreters/executeQuery.cpp                  | 14 +++++++++-----
 ...uery_cache_nondeterministic_functions.reference |  1 -
 ...2494_query_cache_nondeterministic_functions.sql |  6 +++---
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index bbde77338af..e111206355e 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -142,7 +142,7 @@ As a result, the query cache stores for each query multiple (partial)
 result blocks. While this behavior is a good default, it can be suppressed using setting
 [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results).
 
-Also, results of queries with non-deterministic functions are not cached. Such functions include
+Also, results of queries with non-deterministic functions are not cached by default. Such functions include
 - functions for accessing dictionaries: [`dictGet()`](../sql-reference/functions/ext-dict-functions.md#dictGet) etc.
 - [user-defined functions](../sql-reference/statements/create/function.md),
 - functions which return the current date or time: [`now()`](../sql-reference/functions/date-time-functions.md#now),
@@ -158,7 +158,7 @@ Also, results of queries with non-deterministic functions are not cached. Such f
 - functions which depend on the environment: [`currentUser()`](../sql-reference/functions/other-functions.md#currentUser),
   [`queryID()`](../sql-reference/functions/other-functions.md#queryID),
   [`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc.
-Caching of non-deterministic functions can be forced regardless using setting
+To force caching of results of queries with non-deterministic functionsregardless, using setting
 [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).
 
 Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index f23685c37d1..ad34516b00e 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -585,6 +585,7 @@
     M(700, USER_SESSION_LIMIT_EXCEEDED)  \
     M(701, CLUSTER_DOESNT_EXIST) \
     M(702, CLIENT_INFO_DOES_NOT_MATCH) \
+    M(703, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 310af2f9812..9b24b5df9b2 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -94,11 +94,12 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS;
     extern const int INTO_OUTFILE_NOT_ALLOWED;
-    extern const int QUERY_WAS_CANCELLED;
     extern const int INVALID_TRANSACTION;
     extern const int LOGICAL_ERROR;
     extern const int NOT_IMPLEMENTED;
+    extern const int QUERY_WAS_CANCELLED;
 }
 
 
@@ -991,7 +992,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 
         if (!async_insert)
         {
-            /// If it is a non-internal SELECT, and passive/read use of the query cache is enabled, and the cache knows the query, then set
+            /// If it is a non-internal SELECT, and passive (read) use of the query cache is enabled, and the cache knows the query, then set
             /// a pipeline with a source populated by the query cache.
             auto get_result_from_query_cache = [&]()
             {
@@ -1091,11 +1092,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
 
                     res = interpreter->execute();
 
-                    /// If it is a non-internal SELECT query, and active/write use of the query cache is enabled, then add a processor on
+                    /// If it is a non-internal SELECT query, and active (write) use of the query cache is enabled, then add a processor on
                     /// top of the pipeline which stores the result in the query cache.
-                    if (can_use_query_cache && settings.enable_writes_to_query_cache
-                        && (!astContainsNonDeterministicFunctions(ast, context) || settings.query_cache_store_results_of_queries_with_nondeterministic_functions))
+                    if (can_use_query_cache && settings.enable_writes_to_query_cache)
                     {
+                        if (astContainsNonDeterministicFunctions(ast, context) && !settings.query_cache_store_results_of_queries_with_nondeterministic_functions)
+                            throw Exception(ErrorCodes::CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS,
+                                "Unable to cache the query result because the query contains a non-deterministic function. Use setting query_cache_store_results_of_queries_with_nondeterministic_functions = 1 to store the query result regardless.");
+
                         QueryCache::Key key(
                             ast, res.pipeline.getHeader(),
                             context->getUserName(), settings.query_cache_share_between_users,
diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference
index cb6165c307a..e666f54d4c4 100644
--- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference
+++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference
@@ -1,4 +1,3 @@
-1
 0
 ---
 1
diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
index 045b7258a34..3a2e24d6bfe 100644
--- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
+++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
@@ -3,13 +3,13 @@
 
 SYSTEM DROP QUERY CACHE;
 
--- rand() is non-deterministic, with default settings no entry in the query cache should be created
-SELECT COUNT(rand(1)) SETTINGS use_query_cache = true;
+-- rand() is non-deterministic, the query is rejected by default
+SELECT COUNT(rand(1)) SETTINGS use_query_cache = true; -- { serverError 703 }
 SELECT COUNT(*) FROM system.query_cache;
 
 SELECT '---';
 
--- But an entry can be forced using a setting
+-- Force caching using a setting
 SELECT COUNT(RAND(1)) SETTINGS use_query_cache = true, query_cache_store_results_of_queries_with_nondeterministic_functions = true;
 SELECT COUNT(*) FROM system.query_cache;
 

From accbe24e08a92254c062f46d57e72a700334f10e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 19 Sep 2023 18:42:46 +0200
Subject: [PATCH 131/243] Fix test

---
 tests/integration/test_check_table/test.py | 33 ++++++++--------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/tests/integration/test_check_table/test.py b/tests/integration/test_check_table/test.py
index 613ac3fb35f..99a5846d4ee 100644
--- a/tests/integration/test_check_table/test.py
+++ b/tests/integration/test_check_table/test.py
@@ -109,21 +109,15 @@ def test_check_normal_table_corruption(started_cluster):
 
     corrupt_data_part_on_disk(node1, "non_replicated_mt", "201902_1_1_0")
 
-    assert (
-        node1.query(
-            "CHECK TABLE non_replicated_mt",
-            settings={"check_query_single_value_result": 0},
-        ).strip()
-        == "201902_1_1_0\t0\tCannot read all data. Bytes read: 2. Bytes expected: 25."
-    )
+    assert node1.query(
+        "CHECK TABLE non_replicated_mt",
+        settings={"check_query_single_value_result": 0},
+    ).strip().split("\t")[0:2] == ["201902_1_1_0", "0"]
 
-    assert (
-        node1.query(
-            "CHECK TABLE non_replicated_mt",
-            settings={"check_query_single_value_result": 0},
-        ).strip()
-        == "201902_1_1_0\t0\tCannot read all data. Bytes read: 2. Bytes expected: 25."
-    )
+    assert node1.query(
+        "CHECK TABLE non_replicated_mt",
+        settings={"check_query_single_value_result": 0},
+    ).strip().split("\t")[0:2] == ["201902_1_1_0", "0"]
 
     node1.query(
         "INSERT INTO non_replicated_mt VALUES (toDate('2019-01-01'), 1, 10), (toDate('2019-01-01'), 2, 12)"
@@ -141,13 +135,10 @@ def test_check_normal_table_corruption(started_cluster):
 
     remove_checksums_on_disk(node1, "non_replicated_mt", "201901_2_2_0")
 
-    assert (
-        node1.query(
-            "CHECK TABLE non_replicated_mt PARTITION 201901",
-            settings={"check_query_single_value_result": 0},
-        )
-        == "201901_2_2_0\t0\tCheck of part finished with error: \\'Cannot read all data. Bytes read: 2. Bytes expected: 25.\\'\n"
-    )
+    assert node1.query(
+        "CHECK TABLE non_replicated_mt PARTITION 201901",
+        settings={"check_query_single_value_result": 0},
+    ).strip().split("\t")[0:2] == ["201901_2_2_0", "0"]
 
 
 def test_check_replicated_table_simple(started_cluster):

From 5111f1e0901f13a758408a7aee39baba586eeb29 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 19 Sep 2023 18:59:00 +0200
Subject: [PATCH 132/243] Update docs/en/operations/query-cache.md

Co-authored-by: Nikita Taranov <nickita.taranov@gmail.com>
---
 docs/en/operations/query-cache.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index e111206355e..6e21b0b3658 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -158,7 +158,7 @@ Also, results of queries with non-deterministic functions are not cached by defa
 - functions which depend on the environment: [`currentUser()`](../sql-reference/functions/other-functions.md#currentUser),
   [`queryID()`](../sql-reference/functions/other-functions.md#queryID),
   [`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc.
-To force caching of results of queries with non-deterministic functionsregardless, using setting
+To force caching of results of queries with non-deterministic functions regardless, use setting
 [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).
 
 Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a

From c439c4bca212a784c752f791b146f71de8fa883a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 19 Sep 2023 21:39:21 +0200
Subject: [PATCH 133/243] Revert "Fix filtering parts with indexHint for non
 analyzer"

---
 src/Processors/QueryPlan/ReadFromMergeTree.cpp           | 5 +----
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp   | 3 ---
 tests/analyzer_tech_debt.txt                             | 1 -
 .../0_stateless/02880_indexHint__partition_id.reference  | 9 ---------
 .../0_stateless/02880_indexHint__partition_id.sql        | 9 ---------
 5 files changed, 1 insertion(+), 26 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02880_indexHint__partition_id.reference
 delete mode 100644 tests/queries/0_stateless/02880_indexHint__partition_id.sql

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 237a4cc703f..a2f2c1e0aac 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1346,10 +1346,7 @@ static void buildIndexes(
     }
 
     /// TODO Support row_policy_filter and additional_filters
-    if (settings.allow_experimental_analyzer)
-        indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context);
-    else
-        indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, query_info.query, context);
+    indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context);
 
     indexes->use_skip_indexes = settings.use_skip_indexes;
     bool final = query_info.isFinal();
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index d7c6c370c18..3c2b09b6f3b 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -815,9 +815,6 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
     ASTPtr expression_ast;
     auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, true /* one_part */);
 
-    if (virtual_columns_block.rows() == 0)
-        return {};
-
     // Generate valid expressions for filtering
     VirtualColumnUtils::prepareFilterBlockWithQuery(query, context, virtual_columns_block, expression_ast);
 
diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 652ab0b99de..4419190e12c 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -54,7 +54,6 @@
 01710_projection_additional_filters
 01721_join_implicit_cast_long
 01739_index_hint
-02880_indexHint__partition_id
 01747_join_view_filter_dictionary
 01748_partition_id_pruning
 01756_optimize_skip_unused_shards_rewrite_in
diff --git a/tests/queries/0_stateless/02880_indexHint__partition_id.reference b/tests/queries/0_stateless/02880_indexHint__partition_id.reference
deleted file mode 100644
index 365e7b676c7..00000000000
--- a/tests/queries/0_stateless/02880_indexHint__partition_id.reference
+++ /dev/null
@@ -1,9 +0,0 @@
--- { echoOn }
-select * from data prewhere indexHint(_partition_id = '1');
-1
-select count() from data prewhere indexHint(_partition_id = '1');
-1
-select * from data where indexHint(_partition_id = '1');
-1
-select count() from data where indexHint(_partition_id = '1');
-1
diff --git a/tests/queries/0_stateless/02880_indexHint__partition_id.sql b/tests/queries/0_stateless/02880_indexHint__partition_id.sql
deleted file mode 100644
index d15b3f4ccea..00000000000
--- a/tests/queries/0_stateless/02880_indexHint__partition_id.sql
+++ /dev/null
@@ -1,9 +0,0 @@
-drop table if exists data;
-create table data (part Int) engine=MergeTree() order by tuple() partition by part;
-insert into data values (1)(2);
-
--- { echoOn }
-select * from data prewhere indexHint(_partition_id = '1');
-select count() from data prewhere indexHint(_partition_id = '1');
-select * from data where indexHint(_partition_id = '1');
-select count() from data where indexHint(_partition_id = '1');

From 4d46753541e5c6b7e40815c7b69b178a41ecbb5f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 18 Sep 2023 13:02:17 +0200
Subject: [PATCH 134/243] Convert shutdown_wait_unfinished into ServerSettings

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/server/Server.cpp | 4 ++--
 src/Core/ServerSettings.h  | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index d543d16b146..a968acb549f 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1602,7 +1602,7 @@ try
                 LOG_INFO(log, "Closed all listening sockets.");
 
             if (current_connections > 0)
-                current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, config().getInt("shutdown_wait_unfinished", 5));
+                current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished);
 
             if (current_connections)
                 LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
@@ -1909,7 +1909,7 @@ try
                 global_context->getProcessList().killAllQueries();
 
             if (current_connections)
-                current_connections = waitServersToFinish(servers, servers_lock, config().getInt("shutdown_wait_unfinished", 5));
+                current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished);
 
             if (current_connections)
                 LOG_WARNING(log, "Closed connections. But {} remain."
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 7e346f3596c..f0a62029de3 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -16,6 +16,7 @@ namespace DB
 #define SERVER_SETTINGS(M, ALIAS) \
     M(Bool, show_addresses_in_stack_traces, true, "If it is set true will show addresses in stack traces", 0) \
     M(Bool, shutdown_wait_unfinished_queries, false, "If set true ClickHouse will wait for running queries finish before shutdown.", 0) \
+    M(UInt64, shutdown_wait_unfinished, 5, "Delay in seconds to wait for unfinished queries", 0) \
     M(UInt64, max_thread_pool_size, 10000, "The maximum number of threads that could be allocated from the OS and used for query execution and background operations.", 0) \
     M(UInt64, max_thread_pool_free_size, 1000, "The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks.", 0) \
     M(UInt64, thread_pool_queue_size, 10000, "The maximum number of tasks that will be placed in a queue and wait for execution.", 0) \

From dde0a88067e6b962ceaa73d7a6021e2d8a411e60 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 18 Sep 2023 13:02:33 +0200
Subject: [PATCH 135/243] Document
 shutdown_wait_unfinished/shutdown_wait_unfinished_queries in config.xml

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/server/config.xml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/programs/server/config.xml b/programs/server/config.xml
index 39ad1f82eed..07427c2851a 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -1493,4 +1493,10 @@
     <!-- On Linux systems this can control the behavior of OOM killer.
     <oom_score>-1000</oom_score>
     -->
+
+    <!-- Delay (in seconds) to wait for unfinished queries before force exit -->
+    <!-- <shutdown_wait_unfinished>5</shutdown_wait_unfinished> -->
+
+    <!-- If set true ClickHouse will wait for running queries finish before shutdown. -->
+    <!-- <shutdown_wait_unfinished_queries>false</shutdown_wait_unfinished_queries> -->
 </clickhouse>

From 4c301565952d48be5eebc18f00dfbb7543c129f6 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 18 Sep 2023 12:56:15 +0200
Subject: [PATCH 136/243] Disable killing the server by systemd (that may lead
 to data loss for Buffer)

Default systemd's timeout for sending SIGKILL after SIGTERM is 1m30s
(TimeoutStopSec), which is can be not enough to wait for queries or
shutdown the storages.

And besides in this case shutdown_wait_unfinished server settings are
ignored.

So let's just disable this systemd logic and rely on
shutdown_wait_unfinished instead.

But note shutting down the storages can take a while, but it is better
to give it time instead of killing the process, since killing may lead
to data loss.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 packages/clickhouse-server.service | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service
index 42dc5bd380d..c2ef7c2746d 100644
--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@@ -17,6 +17,10 @@ User=clickhouse
 Group=clickhouse
 Restart=always
 RestartSec=30
+# The following ClickHouse directives should be used instead of forcing SIGKILL by systemd:
+# - shutdown_wait_unfinished_queries
+# - shutdown_wait_unfinished
+TimeoutStopSec=infinity
 # Since ClickHouse is systemd aware default 1m30sec may not be enough
 TimeoutStartSec=0
 # %p is resolved to the systemd unit name

From 5e90cd413b3aa7d2d2a89e0ac1faf2f7c737d34a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 19 Sep 2023 19:40:13 +0200
Subject: [PATCH 137/243] Add ability to disable forwarding signals to the
 child by watchdog

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Daemon/BaseDaemon.cpp | 59 ++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 19 deletions(-)

diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index be323dc6786..3b2fdcd95ca 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -181,6 +181,15 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
     errno = saved_errno;
 }
 
+static bool getenvBool(const char * name)
+{
+    bool res = false;
+    const char * env_var = getenv(name); // NOLINT(concurrency-mt-unsafe)
+    if (env_var && 0 == strcmp(env_var, "1"))
+        res = true;
+    return res;
+}
+
 
 /// Avoid link time dependency on DB/Interpreters - will use this function only when linked.
 __attribute__((__weak__)) void collectCrashLog(
@@ -1110,10 +1119,8 @@ void BaseDaemon::setupWatchdog()
     if (argv0)
         original_process_name = argv0;
 
-    bool restart = false;
-    const char * env_watchdog_restart = getenv("CLICKHOUSE_WATCHDOG_RESTART"); // NOLINT(concurrency-mt-unsafe)
-    if (env_watchdog_restart && 0 == strcmp(env_watchdog_restart, "1"))
-        restart = true;
+    bool restart = getenvBool("CLICKHOUSE_WATCHDOG_RESTART");
+    bool forward_signals = !getenvBool("CLICKHOUSE_WATCHDOG_NO_FORWARD");
 
     while (true)
     {
@@ -1194,23 +1201,37 @@ void BaseDaemon::setupWatchdog()
         logger().information(fmt::format("Will watch for the process with pid {}", pid));
 
         /// Forward signals to the child process.
-        addSignalHandler(
-            {SIGHUP, SIGINT, SIGQUIT, SIGTERM},
-            [](int sig, siginfo_t *, void *)
-            {
-                /// Forward all signals except INT as it can be send by terminal to the process group when user press Ctrl+C,
-                /// and we process double delivery of this signal as immediate termination.
-                if (sig == SIGINT)
-                    return;
-
-                const char * error_message = "Cannot forward signal to the child process.\n";
-                if (0 != ::kill(pid, sig))
+        if (forward_signals)
+        {
+            addSignalHandler(
+                {SIGHUP, SIGINT, SIGQUIT, SIGTERM},
+                [](int sig, siginfo_t *, void *)
                 {
-                    auto res = write(STDERR_FILENO, error_message, strlen(error_message));
-                    (void)res;
+                    /// Forward all signals except INT as it can be send by terminal to the process group when user press Ctrl+C,
+                    /// and we process double delivery of this signal as immediate termination.
+                    if (sig == SIGINT)
+                        return;
+
+                    const char * error_message = "Cannot forward signal to the child process.\n";
+                    if (0 != ::kill(pid, sig))
+                    {
+                        auto res = write(STDERR_FILENO, error_message, strlen(error_message));
+                        (void)res;
+                    }
+                },
+                nullptr);
+        }
+        else
+        {
+            for (const auto & sig : {SIGHUP, SIGINT, SIGQUIT, SIGTERM})
+            {
+                if (SIG_ERR == signal(sig, SIG_IGN))
+                {
+                    char * signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe)
+                    throwFromErrno(fmt::format("Cannot ignore {}", signal_description), ErrorCodes::SYSTEM_ERROR);
                 }
-            },
-            nullptr);
+            }
+        }
 
         int status = 0;
         do

From 630eddbbbcf75aedaf89d3acbeb67ff8fc99d1e8 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 19 Sep 2023 19:43:10 +0200
Subject: [PATCH 138/243] Disable forwarding signals by watchdog in systemd
 service

With default KillMode=control-group, systemd will send signals to all
processes in cgroup and this will lead to server will be terminated
forcefully due to double signal.

    2023.09.19 12:47:06.369090 [ 763 ] {} <Information> Application: Received termination signal (Terminated)
    2023.09.19 12:47:06.369141 [ 762 ] {} <Debug> Application: Received termination signal.
    2023.09.19 12:47:06.369215 [ 763 ] {} <Information> Application: Received termination signal (Terminated)
    2023.09.19 12:47:06.369225 [ 763 ] {} <Information> Application: This is the second termination signal. Immediately terminate.
    2023.09.19 12:47:06.400959 [ 761 ] {} <Information> Application: Child process exited normally with code 143.

Someone may naively think that, hey, I can change KillMode to
process/mixed, but this will not work either, because in this case
systemd cannot wait for the $MainPID (and main_pid_alien=true in
systemd's sources), because it is not a child of systemd, and this will
lead to double signal again:

    2023.09.19 16:24:19.694473 [ 3118 ] {} <Information> Application: Received termination signal (Terminated)
    2023.09.19 16:24:19.694894 [ 3118 ] {} <Information> Application: Received termination signal (Terminated)
    2023.09.19 16:24:19.695060 [ 3118 ] {} <Information> Application: This is the second termination signal. Immediately terminate.

And this is because it sends signal firstly on a normal termnation and
then when it cleans up left over processes:

    clickhouse-server.service: Found left-over process 3117 (clickhouse-serv) in control group while starting unit. Ignoring.

And yes, even though it prints "Ignoring" here (I guess it is related to
the fact that it can be ignored if the signal will not be handled)

Here is a proof of double signal by systemd:

    # pgrep clickhouse-serv | xargs strace  -e /kill -fp
    strace: Process 3117 attached with 469 threads
    [pid  3582] --- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=1, si_uid=0} ---
    [pid  3580] --- SIGCONT {si_signo=SIGCONT, si_code=SI_USER, si_pid=1, si_uid=0} ---
    [pid  3582] --- SIGCONT {si_signo=SIGCONT, si_code=SI_USER, si_pid=1, si_uid=0} ---
    [pid  3580] --- SIGTERM {si_signo=SIGTERM, si_code=SI_USER, si_pid=1, si_uid=0} ---
                                                                      ^^^
    [pid  3118] tgkill(3117, 3118, SIGTERM) = 0 # and this is a force termination

So yes, there is no other way except for disabling signal forwarding.

*Well, there is another way, but I guess it is will be unwelcome (even
though systemd can be configured in multiple ways right now, and there
is even systemd-oomd instead of clickhouse'es watchdog) - disable
watchdog completelly.*

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 packages/clickhouse-server.service | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service
index c2ef7c2746d..9a7d07e5cee 100644
--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@@ -21,6 +21,9 @@ RestartSec=30
 # - shutdown_wait_unfinished_queries
 # - shutdown_wait_unfinished
 TimeoutStopSec=infinity
+# Disable forwarding signals by watchdog, since with default systemd's
+# kill-mode control-group, systemd will send signal to all process in cgroup.
+Environment=CLICKHOUSE_WATCHDOG_NO_FORWARD=1
 # Since ClickHouse is systemd aware default 1m30sec may not be enough
 TimeoutStartSec=0
 # %p is resolved to the systemd unit name

From fa51399ede28f551a62061d6961032d1c99e763a Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Tue, 19 Sep 2023 21:39:59 +0000
Subject: [PATCH 139/243] Make clickhouse-local logging (server_logs_file)
 prepend timestamps etc

---
 programs/local/LocalServer.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index d56e7d8b47c..9fb629a0871 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -32,6 +32,8 @@
 #include <Common/randomSeed.h>
 #include <Common/ThreadPool.h>
 #include <Loggers/Loggers.h>
+#include <Loggers/OwnFormattingChannel.h>
+#include <Loggers/OwnPatternFormatter.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
@@ -599,7 +601,9 @@ void LocalServer::processConfig()
     {
         auto poco_logs_level = Poco::Logger::parseLevel(level);
         Poco::Logger::root().setLevel(poco_logs_level);
-        Poco::Logger::root().setChannel(Poco::AutoPtr<Poco::SimpleFileChannel>(new Poco::SimpleFileChannel(server_logs_file)));
+        Poco::AutoPtr<OwnPatternFormatter> pf = new OwnPatternFormatter;
+        Poco::AutoPtr<OwnFormattingChannel> log = new OwnFormattingChannel(pf, new Poco::SimpleFileChannel(server_logs_file));
+        Poco::Logger::root().setChannel(log);
         logging_initialized = true;
     }
     else if (logging || is_interactive)

From 636fc506aa791c30116d1c9bb42cd046405165e6 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Tue, 19 Sep 2023 23:42:40 +0200
Subject: [PATCH 140/243] Replace information_schema view columns aliases with
 uppercase

---
 .../system-tables/information_schema.md       | 120 +++++++++++++++---
 .../System/InformationSchema/columns.sql      |  79 ++++++++----
 .../InformationSchema/key_column_usage.sql    |  61 +++++----
 .../referential_constraints.sql               |  56 ++++----
 .../System/InformationSchema/schemata.sql     |  35 +++--
 .../System/InformationSchema/tables.sql       |  30 +++--
 .../System/InformationSchema/views.sql        |  32 +++--
 .../01161_information_schema.reference        |  50 +++++---
 .../0_stateless/01161_information_schema.sql  | 117 ++++++++++++++++-
 9 files changed, 430 insertions(+), 150 deletions(-)

diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md
index d9fcb544ebb..bca37b47312 100644
--- a/docs/en/operations/system-tables/information_schema.md
+++ b/docs/en/operations/system-tables/information_schema.md
@@ -38,7 +38,7 @@ SHOW TABLES FROM information_schema;
 - [KEY_COLUMN_USAGE](#key_column_usage)
 - [REFERENTIAL_CONSTRAINTS](#referential_constraints)
 
-Case-insensitive equivalent views, e.g. `INFORMATION_SCHEMA.columns` are provided for reasons of compatibility with other databases.
+Case-insensitive equivalent views, e.g. `INFORMATION_SCHEMA.columns` are provided for reasons of compatibility with other databases. The same applies to all the columns in these views - both lowercase (for example, `table_name`) and uppercase (`TABLE_NAME`) variants are provided.
 
 ## COLUMNS {#columns}
 
@@ -75,7 +75,36 @@ Columns:
 Query:
 
 ``` sql
-SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical;
+SELECT table_catalog,
+       table_schema,
+       table_name,
+       column_name,
+       ordinal_position,
+       column_default,
+       is_nullable,
+       data_type,
+       character_maximum_length,
+       character_octet_length,
+       numeric_precision,
+       numeric_precision_radix,
+       numeric_scale,
+       datetime_precision,
+       character_set_catalog,
+       character_set_schema,
+       character_set_name,
+       collation_catalog,
+       collation_schema,
+       collation_name,
+       domain_catalog,
+       domain_schema,
+       domain_name,
+       column_comment,
+       column_type
+FROM INFORMATION_SCHEMA.COLUMNS
+WHERE (table_schema = currentDatabase() OR table_schema = '')
+  AND table_name NOT LIKE '%inner%' 
+LIMIT 1 
+FORMAT Vertical;
 ```
 
 Result:
@@ -127,7 +156,17 @@ Columns:
 Query:
 
 ``` sql
-SELECT * FROM information_schema.schemata WHERE schema_name ILIKE 'information_schema' LIMIT 1 FORMAT Vertical;
+SELECT catalog_name,
+       schema_name,
+       schema_owner,
+       default_character_set_catalog,
+       default_character_set_schema,
+       default_character_set_name,
+       sql_path
+FROM information_schema.schemata
+WHERE schema_name ilike 'information_schema' 
+LIMIT 1 
+FORMAT Vertical;
 ```
 
 Result:
@@ -167,7 +206,17 @@ Columns:
 Query:
 
 ``` sql
-SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema = currentDatabase() OR table_schema = '') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical;
+SELECT table_catalog, 
+       table_schema, 
+       table_name, 
+       table_type, 
+       table_collation, 
+       table_comment
+FROM INFORMATION_SCHEMA.TABLES
+WHERE (table_schema = currentDatabase() OR table_schema = '')
+  AND table_name NOT LIKE '%inner%'
+LIMIT 1 
+FORMAT Vertical;
 ```
 
 Result:
@@ -175,10 +224,12 @@ Result:
 ``` text
 Row 1:
 ──────
-table_catalog: default
-table_schema:  default
-table_name:    describe_example
-table_type:    BASE TABLE
+table_catalog:   default
+table_schema:    default
+table_name:      describe_example
+table_type:      BASE TABLE
+table_collation: utf8mb4_0900_ai_ci
+table_comment:   
 ```
 
 ## VIEWS {#views}
@@ -207,7 +258,20 @@ Query:
 ``` sql
 CREATE VIEW v (n Nullable(Int32), f Float64) AS SELECT n, f FROM t;
 CREATE MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM system.one;
-SELECT * FROM information_schema.views WHERE table_schema = currentDatabase() LIMIT 1 FORMAT Vertical;
+SELECT table_catalog,
+       table_schema,
+       table_name,
+       view_definition,
+       check_option,
+       is_updatable,
+       is_insertable_into,
+       is_trigger_updatable,
+       is_trigger_deletable,
+       is_trigger_insertable_into
+FROM information_schema.views
+WHERE table_schema = currentDatabase() 
+LIMIT 1
+FORMAT Vertical;
 ```
 
 Result:
@@ -240,7 +304,7 @@ Columns:
 - `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema (database) to which the table belongs.
 - `table_name` ([String](../../sql-reference/data-types/string.md)) — The name of the table that has the constraint.
 - `column_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the column that has the constraint.
-- `ordinal_position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The column's position within the constraint, not the column's position within the table. Column positions are numbered beginning with 1.
+- `ordinal_position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Currently unused. Always `1`.
 - `position_in_unique_constraint` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Currently unused. Always `NULL`.
 - `referenced_table_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused. Always NULL.
 - `referenced_table_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused. Always NULL.
@@ -250,7 +314,21 @@ Columns:
 
 ```sql
 CREATE TABLE test (i UInt32, s String) ENGINE MergeTree ORDER BY i;
-SELECT * FROM information_schema.key_column_usage WHERE table_name = 'test' FORMAT Vertical;
+SELECT constraint_catalog,
+       constraint_schema,
+       constraint_name,
+       table_catalog,
+       table_schema,
+       table_name,
+       column_name,
+       ordinal_position,
+       position_in_unique_constraint,
+       referenced_table_schema,
+       referenced_table_name,
+       referenced_column_name
+FROM information_schema.key_column_usage 
+WHERE table_name = 'test' 
+FORMAT Vertical;
 ```
 
 Result:
@@ -258,14 +336,18 @@ Result:
 ```
 Row 1:
 ──────
-referenced_table_schema: ᴺᵁᴸᴸ
-referenced_table_name:   ᴺᵁᴸᴸ
-referenced_column_name:  ᴺᵁᴸᴸ
-table_schema:            default
-table_name:              test
-column_name:             i
-ordinal_position:        1
-constraint_name:         PRIMARY
+constraint_catalog:            def
+constraint_schema:             default
+constraint_name:               PRIMARY
+table_catalog:                 def
+table_schema:                  default
+table_name:                    test
+column_name:                   i
+ordinal_position:              1
+position_in_unique_constraint: ᴺᵁᴸᴸ
+referenced_table_schema:       ᴺᵁᴸᴸ
+referenced_table_name:         ᴺᵁᴸᴸ
+referenced_column_name:        ᴺᵁᴸᴸ
 ```
 
 ## REFERENTIAL_CONSTRAINTS (#referential_constraints)
diff --git a/src/Storages/System/InformationSchema/columns.sql b/src/Storages/System/InformationSchema/columns.sql
index b01352145ff..f95899d1cbf 100644
--- a/src/Storages/System/InformationSchema/columns.sql
+++ b/src/Storages/System/InformationSchema/columns.sql
@@ -3,8 +3,6 @@ ATTACH VIEW columns
     `table_catalog` String,
     `table_schema` String,
     `table_name` String,
-    `TABLE_SCHEMA` String,
-    `TABLE_NAME` String,
     `column_name` String,
     `ordinal_position` UInt64,
     `column_default` String,
@@ -27,36 +25,36 @@ ATTACH VIEW columns
     `domain_name` Nullable(String),
     `column_comment` String,
     `column_type` String,
-    `TABLE_CATALOG` String ALIAS table_catalog,
-    `COLUMN_NAME` String ALIAS column_name,
-    `ORDINAL_POSITION` UInt64 ALIAS ordinal_position,
-    `COLUMN_DEFAULT` String ALIAS column_default,
-    `IS_NULLABLE` String ALIAS is_nullable,
-    `DATA_TYPE` String ALIAS data_type,
-    `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64) ALIAS character_maximum_length,
-    `CHARACTER_OCTET_LENGTH` Nullable(UInt64) ALIAS character_octet_length,
-    `NUMERIC_PRECISION` Nullable(UInt64) ALIAS numeric_precision,
-    `NUMERIC_PRECISION_RADIX` Nullable(UInt64) ALIAS numeric_precision_radix,
-    `NUMERIC_SCALE` Nullable(UInt64) ALIAS numeric_scale,
-    `DATETIME_PRECISION` Nullable(UInt64) ALIAS datetime_precision,
-    `CHARACTER_SET_CATALOG` Nullable(String) ALIAS character_set_catalog,
-    `CHARACTER_SET_SCHEMA` Nullable(String) ALIAS character_set_schema,
-    `CHARACTER_SET_NAME` Nullable(String) ALIAS character_set_name,
-    `COLLATION_CATALOG` Nullable(String) ALIAS collation_catalog,
-    `COLLATION_SCHEMA` Nullable(String) ALIAS collation_schema,
-    `COLLATION_NAME` Nullable(String) ALIAS collation_name,
-    `DOMAIN_CATALOG` Nullable(String) ALIAS domain_catalog,
-    `DOMAIN_SCHEMA` Nullable(String) ALIAS domain_schema,
-    `DOMAIN_NAME` Nullable(String) ALIAS domain_name,
-    `COLUMN_COMMENT` String ALIAS column_comment,
-    `COLUMN_TYPE` String ALIAS column_type
+    `TABLE_CATALOG` String,
+    `TABLE_SCHEMA` String,
+    `TABLE_NAME` String,
+    `COLUMN_NAME` String,
+    `ORDINAL_POSITION` UInt64,
+    `COLUMN_DEFAULT` String,
+    `IS_NULLABLE` String,
+    `DATA_TYPE` String,
+    `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64),
+    `CHARACTER_OCTET_LENGTH` Nullable(UInt64),
+    `NUMERIC_PRECISION` Nullable(UInt64),
+    `NUMERIC_PRECISION_RADIX` Nullable(UInt64),
+    `NUMERIC_SCALE` Nullable(UInt64),
+    `DATETIME_PRECISION` Nullable(UInt64),
+    `CHARACTER_SET_CATALOG` Nullable(String),
+    `CHARACTER_SET_SCHEMA` Nullable(String),
+    `CHARACTER_SET_NAME` Nullable(String),
+    `COLLATION_CATALOG` Nullable(String),
+    `COLLATION_SCHEMA` Nullable(String),
+    `COLLATION_NAME` Nullable(String),
+    `DOMAIN_CATALOG` Nullable(String),
+    `DOMAIN_SCHEMA` Nullable(String),
+    `DOMAIN_NAME` Nullable(String),
+    `COLUMN_COMMENT` String,
+    `COLUMN_TYPE` String
 ) AS
 SELECT
     database AS table_catalog,
     database AS table_schema,
-    database AS TABLE_SCHEMA,
     table AS table_name,
-    table AS TABLE_NAME,
     name AS column_name,
     position AS ordinal_position,
     default_expression AS column_default,
@@ -78,5 +76,30 @@ SELECT
     NULL AS domain_schema,
     NULL AS domain_name,
     comment AS column_comment,
-    type AS column_type
+    type AS column_type,
+    table_catalog AS TABLE_CATALOG,
+    table_schema AS TABLE_SCHEMA,
+    table_name AS TABLE_NAME,
+    column_name AS COLUMN_NAME,
+    ordinal_position AS ORDINAL_POSITION,
+    column_default AS COLUMN_DEFAULT,
+    is_nullable AS IS_NULLABLE,
+    data_type AS DATA_TYPE,
+    character_maximum_length AS CHARACTER_MAXIMUM_LENGTH,
+    character_octet_length AS CHARACTER_OCTET_LENGTH,
+    numeric_precision AS NUMERIC_PRECISION,
+    numeric_precision_radix AS NUMERIC_PRECISION_RADIX,
+    numeric_scale AS NUMERIC_SCALE,
+    datetime_precision AS DATETIME_PRECISION,
+    character_set_catalog AS CHARACTER_SET_CATALOG,
+    character_set_schema AS CHARACTER_SET_SCHEMA,
+    character_set_name AS CHARACTER_SET_NAME,
+    collation_catalog AS COLLATION_CATALOG,
+    collation_schema AS COLLATION_SCHEMA,
+    collation_name AS COLLATION_NAME,
+    domain_catalog AS DOMAIN_CATALOG,
+    domain_schema AS DOMAIN_SCHEMA,
+    domain_name AS DOMAIN_NAME,
+    column_comment AS COLUMN_COMMENT,
+    column_type AS COLUMN_TYPE
 FROM system.columns
diff --git a/src/Storages/System/InformationSchema/key_column_usage.sql b/src/Storages/System/InformationSchema/key_column_usage.sql
index bd8ad27f567..32152886706 100644
--- a/src/Storages/System/InformationSchema/key_column_usage.sql
+++ b/src/Storages/System/InformationSchema/key_column_usage.sql
@@ -12,30 +12,43 @@ ATTACH VIEW key_column_usage
      `referenced_table_schema` Nullable(String),
      `referenced_table_name` Nullable(String),
      `referenced_column_name` Nullable(String),
-     `CONSTRAINT_CATALOG` Nullable(String) ALIAS constraint_catalog, 
-     `CONSTRAINT_SCHEMA` Nullable(String) ALIAS constraint_schema,
-     `CONSTRAINT_NAME` Nullable(String) ALIAS constraint_name,
-     `TABLE_CATALOG` String ALIAS table_catalog,
-     `TABLE_SCHEMA` String ALIAS table_schema,
-     `TABLE_NAME` String ALIAS table_name,
-     `COLUMN_NAME` Nullable(String) ALIAS column_name,
-     `ORDINAL_POSITION` UInt32 ALIAS ordinal_position,
-     `POSITION_IN_UNIQUE_CONSTRAINT` Nullable(UInt32) ALIAS position_in_unique_constraint,
-     `REFERENCED_TABLE_SCHEMA` Nullable(String) ALIAS referenced_table_schema,
-     `REFERENCED_TABLE_NAME` Nullable(String) ALIAS referenced_table_name,
-     `REFERENCED_COLUMN_NAME` Nullable(String) ALIAS referenced_column_name
+     `CONSTRAINT_CATALOG` Nullable(String), 
+     `CONSTRAINT_SCHEMA` Nullable(String),
+     `CONSTRAINT_NAME` Nullable(String),
+     `TABLE_CATALOG` String,
+     `TABLE_SCHEMA` String,
+     `TABLE_NAME` String,
+     `COLUMN_NAME` Nullable(String),
+     `ORDINAL_POSITION` UInt32,
+     `POSITION_IN_UNIQUE_CONSTRAINT` Nullable(UInt32),
+     `REFERENCED_TABLE_SCHEMA` Nullable(String),
+     `REFERENCED_TABLE_NAME` Nullable(String),
+     `REFERENCED_COLUMN_NAME` Nullable(String)
 ) AS
-SELECT 'def'     AS `constraint_catalog`,
-       database  AS `constraint_schema`,
-       'PRIMARY' AS `constraint_name`,
-       'def'     AS `table_catalog`,
-       database  AS `table_schema`,
-       table     AS `table_name`,
-       name      AS `column_name`,
-       position  AS `ordinal_position`,
-       NULL      AS `position_in_unique_constraint`,
-       NULL      AS `referenced_table_schema`,
-       NULL      AS `referenced_table_name`,
-       NULL      AS `referenced_column_name`
+SELECT 
+    'def'                         AS constraint_catalog,
+    database                      AS constraint_schema,
+    'PRIMARY'                     AS constraint_name,
+    'def'                         AS table_catalog,
+    database                      AS table_schema,
+    table                         AS table_name,
+    name                          AS column_name,
+    1                             AS ordinal_position,
+    NULL                          AS position_in_unique_constraint,
+    NULL                          AS referenced_table_schema,
+    NULL                          AS referenced_table_name,
+    NULL                          AS referenced_column_name,
+    constraint_catalog            AS CONSTRAINT_CATALOG,
+    constraint_schema             AS CONSTRAINT_SCHEMA,
+    constraint_name               AS CONSTRAINT_NAME,
+    table_catalog                 AS TABLE_CATALOG,
+    table_schema                  AS TABLE_SCHEMA,
+    table_name                    AS TABLE_NAME,
+    column_name                   AS COLUMN_NAME,
+    ordinal_position              AS ORDINAL_POSITION,
+    position_in_unique_constraint AS POSITION_IN_UNIQUE_CONSTRAINT,
+    referenced_table_schema       AS REFERENCED_TABLE_SCHEMA,
+    referenced_table_name         AS REFERENCED_TABLE_NAME,
+    referenced_column_name        AS REFERENCED_COLUMN_NAME
 FROM system.columns
 WHERE is_in_primary_key;
diff --git a/src/Storages/System/InformationSchema/referential_constraints.sql b/src/Storages/System/InformationSchema/referential_constraints.sql
index b33b5a80ec7..9722db92050 100644
--- a/src/Storages/System/InformationSchema/referential_constraints.sql
+++ b/src/Storages/System/InformationSchema/referential_constraints.sql
@@ -11,27 +11,39 @@ ATTACH VIEW referential_constraints
      `delete_rule` String,
      `table_name` String,
      `referenced_table_name` String,
-     `CONSTRAINT_CATALOG` String ALIAS constraint_catalog,
-     `CONSTRAINT_SCHEMA` String ALIAS constraint_schema,
-     `CONSTRAINT_NAME` Nullable(String) ALIAS constraint_name,
-     `UNIQUE_CONSTRAINT_CATALOG` String ALIAS unqiue_constraint_catalog,
-     `UNIQUE_CONSTRAINT_SCHEMA` String ALIAS unqiue_constraint_schema,
-     `UNIQUE_CONSTRAINT_NAME` Nullable(String) ALIAS unqiue_constraint_name,
-     `MATCH_OPTION` String ALIAS match_option,
-     `UPDATE_RULE` String ALIAS update_rule,
-     `DELETE_RULE` String ALIAS delete_rule
-     `TABLE_NAME` String ALIAS table_name,
-     `REFERENCED_TABLE_NAME` String ALIAS referenced_table_name
+     `CONSTRAINT_CATALOG` String,
+     `CONSTRAINT_SCHEMA` String,
+     `CONSTRAINT_NAME` Nullable(String),
+     `UNIQUE_CONSTRAINT_CATALOG` String,
+     `UNIQUE_CONSTRAINT_SCHEMA` String,
+     `UNIQUE_CONSTRAINT_NAME` Nullable(String),
+     `MATCH_OPTION` String,
+     `UPDATE_RULE` String,
+     `DELETE_RULE` String,
+     `TABLE_NAME` String,
+     `REFERENCED_TABLE_NAME` String
 ) AS
-SELECT ''   AS `constraint_catalog`,
-       NULL AS `constraint_name`,
-       ''   AS `constraint_schema`,
-       ''   AS `unique_constraint_catalog`,
-       NULL AS `unique_constraint_name`,
-       ''   AS `unique_constraint_schema`,
-       ''   AS `match_option`,
-       ''   AS `update_rule`,
-       ''   AS `delete_rule`
-       ''   AS `table_name`,
-       ''   AS `referenced_table_name`
+SELECT 
+    ''                        AS constraint_catalog,
+    NULL                      AS constraint_name,
+    ''                        AS constraint_schema,
+    ''                        AS unique_constraint_catalog,
+    NULL                      AS unique_constraint_name,
+    ''                        AS unique_constraint_schema,
+    ''                        AS match_option,
+    ''                        AS update_rule,
+    ''                        AS delete_rule,
+    ''                        AS table_name,
+    ''                        AS referenced_table_name,
+    constraint_catalog        AS CONSTRAINT_CATALOG,
+    constraint_name           AS CONSTRAINT_NAME,
+    constraint_schema         AS CONSTRAINT_SCHEMA,
+    unique_constraint_catalog AS UNIQUE_CONSTRAINT_CATALOG,
+    unique_constraint_name    AS UNIQUE_CONSTRAINT_NAME,
+    unique_constraint_schema  AS UNIQUE_CONSTRAINT_SCHEMA,
+    match_option              AS MATCH_OPTION,
+    update_rule               AS UPDATE_RULE,
+    delete_rule               AS DELETE_RULE,
+    table_name                AS TABLE_NAME,
+    referenced_table_name     AS REFERENCED_TABLE_NAME
 WHERE false; -- make sure this view is always empty
diff --git a/src/Storages/System/InformationSchema/schemata.sql b/src/Storages/System/InformationSchema/schemata.sql
index 9686fcbf4fa..887a27537e9 100644
--- a/src/Storages/System/InformationSchema/schemata.sql
+++ b/src/Storages/System/InformationSchema/schemata.sql
@@ -7,20 +7,27 @@ ATTACH VIEW schemata
     `default_character_set_schema` Nullable(String),
     `default_character_set_name` Nullable(String),
     `sql_path` Nullable(String),
-    `CATALOG_NAME` String ALIAS catalog_name,
-    `SCHEMA_NAME` String ALIAS schema_name,
-    `SCHEMA_OWNER` String ALIAS schema_owner,
-    `DEFAULT_CHARACTER_SET_CATALOG` Nullable(String) ALIAS default_character_set_catalog,
-    `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String) ALIAS default_character_set_schema,
-    `DEFAULT_CHARACTER_SET_NAME` Nullable(String) ALIAS default_character_set_name,
-    `SQL_PATH` Nullable(String) ALIAS sql_path
+    `CATALOG_NAME` String,
+    `SCHEMA_NAME` String,
+    `SCHEMA_OWNER` String,
+    `DEFAULT_CHARACTER_SET_CATALOG` Nullable(String),
+    `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String),
+    `DEFAULT_CHARACTER_SET_NAME` Nullable(String),
+    `SQL_PATH` Nullable(String)
 ) AS
 SELECT
-    name AS catalog_name,
-    name AS schema_name,
-    'default' AS schema_owner,
-    NULL AS default_character_set_catalog,
-    NULL AS default_character_set_schema,
-    NULL AS default_character_set_name,
-    NULL AS sql_path
+    name                          AS catalog_name,
+    name                          AS schema_name,
+    'default'                     AS schema_owner,
+    NULL                          AS default_character_set_catalog,
+    NULL                          AS default_character_set_schema,
+    NULL                          AS default_character_set_name,
+    NULL                          AS sql_path,
+    catalog_name                  AS CATALOG_NAME,
+    schema_name                   AS SCHEMA_NAME,
+    schema_owner                  AS SCHEMA_OWNER,
+    default_character_set_catalog AS DEFAULT_CHARACTER_SET_CATALOG,
+    default_character_set_schema  AS DEFAULT_CHARACTER_SET_SCHEMA,
+    default_character_set_name    AS DEFAULT_CHARACTER_SET_NAME,
+    sql_path                      AS SQL_PATH
 FROM system.databases
diff --git a/src/Storages/System/InformationSchema/tables.sql b/src/Storages/System/InformationSchema/tables.sql
index d02963b0675..becd609f94c 100644
--- a/src/Storages/System/InformationSchema/tables.sql
+++ b/src/Storages/System/InformationSchema/tables.sql
@@ -6,23 +6,29 @@ ATTACH VIEW tables
     `table_type` String,
     `table_collation` Nullable(String),
     `table_comment` Nullable(String),
-    `TABLE_CATALOG` String ALIAS table_catalog,
-    `TABLE_SCHEMA` String ALIAS table_schema,
-    `TABLE_NAME` String ALIAS table_name,
-    `TABLE_TYPE` String ALIAS table_type,
-    `TABLE_COLLATION` Nullable(String) ALIAS table_collation,
-    `TABLE_COMMENT` Nullable(String) ALIAS table_comment
+    `TABLE_CATALOG` String,
+    `TABLE_SCHEMA` String,
+    `TABLE_NAME` String,
+    `TABLE_TYPE` String,
+    `TABLE_COLLATION` Nullable(String),
+    `TABLE_COMMENT` Nullable(String)
 ) AS
 SELECT
-    database  AS table_catalog,
-    database  AS table_schema,
-    name      AS table_name,
+    database             AS table_catalog,
+    database             AS table_schema,
+    name                 AS table_name,
     multiIf(is_temporary,          'LOCAL TEMPORARY', 
             engine LIKE '%View',   'VIEW', 
             engine LIKE 'System%', 'SYSTEM VIEW', 
             has_own_data = 0,      'FOREIGN TABLE', 
             'BASE TABLE'
-            ) AS table_type,
-    'utf8mb4' AS table_collation,
-    comment   AS table_comment
+            )            AS table_type,
+    'utf8mb4_0900_ai_ci' AS table_collation,
+    comment              AS table_comment,
+    table_catalog        AS TABLE_CATALOG,
+    table_schema         AS TABLE_SCHEMA,
+    table_name           AS TABLE_NAME,
+    table_type           AS TABLE_TYPE,
+    table_collation      AS TABLE_COLLATION,
+    table_comment        AS TABLE_COMMENT
 FROM system.tables
diff --git a/src/Storages/System/InformationSchema/views.sql b/src/Storages/System/InformationSchema/views.sql
index c5ecebfceac..7c6f65e120b 100644
--- a/src/Storages/System/InformationSchema/views.sql
+++ b/src/Storages/System/InformationSchema/views.sql
@@ -10,16 +10,16 @@ ATTACH VIEW views
     `is_trigger_updatable` Enum8('NO' = 0, 'YES' = 1),
     `is_trigger_deletable` Enum8('NO' = 0, 'YES' = 1),
     `is_trigger_insertable_into` Enum8('NO' = 0, 'YES' = 1),
-    `TABLE_CATALOG` String ALIAS table_catalog,
-    `TABLE_SCHEMA` String ALIAS table_schema,
-    `TABLE_NAME` String ALIAS table_name,
-    `VIEW_DEFINITION` String ALIAS view_definition,
-    `CHECK_OPTION` String ALIAS check_option,
-    `IS_UPDATABLE` Enum8('NO' = 0, 'YES' = 1) ALIAS is_updatable,
-    `IS_INSERTABLE_INTO` Enum8('NO' = 0, 'YES' = 1) ALIAS is_insertable_into,
-    `IS_TRIGGER_UPDATABLE` Enum8('NO' = 0, 'YES' = 1) ALIAS is_trigger_updatable,
-    `IS_TRIGGER_DELETABLE` Enum8('NO' = 0, 'YES' = 1) ALIAS is_trigger_deletable,
-    `IS_TRIGGER_INSERTABLE_INTO` Enum8('NO' = 0, 'YES' = 1) ALIAS is_trigger_insertable_into
+    `TABLE_CATALOG` String,
+    `TABLE_SCHEMA` String,
+    `TABLE_NAME` String,
+    `VIEW_DEFINITION` String,
+    `CHECK_OPTION` String,
+    `IS_UPDATABLE` Enum8('NO' = 0, 'YES' = 1),
+    `IS_INSERTABLE_INTO` Enum8('NO' = 0, 'YES' = 1),
+    `IS_TRIGGER_UPDATABLE` Enum8('NO' = 0, 'YES' = 1),
+    `IS_TRIGGER_DELETABLE` Enum8('NO' = 0, 'YES' = 1),
+    `IS_TRIGGER_INSERTABLE_INTO` Enum8('NO' = 0, 'YES' = 1)
 ) AS
 SELECT
     database AS table_catalog,
@@ -31,6 +31,16 @@ SELECT
     engine = 'MaterializedView' AS is_insertable_into,
     0 AS is_trigger_updatable,
     0 AS is_trigger_deletable,
-    0 AS is_trigger_insertable_into
+    0 AS is_trigger_insertable_into,
+    table_catalog AS TABLE_CATALOG,
+    table_schema AS TABLE_SCHEMA,
+    table_name AS TABLE_NAME,
+    view_definition AS VIEW_DEFINITION,
+    check_option AS CHECK_OPTION,
+    is_updatable AS IS_UPDATABLE,
+    is_insertable_into AS IS_INSERTABLE_INTO,
+    is_trigger_updatable AS IS_TRIGGER_UPDATABLE,
+    is_trigger_deletable AS IS_TRIGGER_DELETABLE,
+    is_trigger_insertable_into AS IS_TRIGGER_INSERTABLE_INTO
 FROM system.tables
 WHERE engine LIKE '%View'
diff --git a/tests/queries/0_stateless/01161_information_schema.reference b/tests/queries/0_stateless/01161_information_schema.reference
index 32ad3f16abc..3cfec968d0c 100644
--- a/tests/queries/0_stateless/01161_information_schema.reference
+++ b/tests/queries/0_stateless/01161_information_schema.reference
@@ -1,37 +1,57 @@
 COLUMNS
+KEY_COLUMN_USAGE
+REFERENTIAL_CONSTRAINTS
 SCHEMATA
 TABLES
 VIEWS
 columns
+key_column_usage
+referential_constraints
 schemata
 tables
 views
 COLUMNS
+KEY_COLUMN_USAGE
+REFERENTIAL_CONSTRAINTS
 SCHEMATA
 TABLES
 VIEWS
 columns
+key_column_usage
+referential_constraints
 schemata
 tables
 views
 INFORMATION_SCHEMA	INFORMATION_SCHEMA	default	\N	\N	\N	\N
 information_schema	information_schema	default	\N	\N	\N	\N
-default	default	mv	VIEW
-default	default	t	FOREIGN TABLE
-default	default	v	VIEW
-		tmp	LOCAL TEMPORARY
+default	default	kcu	BASE TABLE	utf8mb4_0900_ai_ci	
+default	default	kcu2	BASE TABLE	utf8mb4_0900_ai_ci	
+default	default	mv	VIEW	utf8mb4_0900_ai_ci	
+default	default	t	FOREIGN TABLE	utf8mb4_0900_ai_ci	
+default	default	v	VIEW	utf8mb4_0900_ai_ci	
+		tmp	LOCAL TEMPORARY	utf8mb4_0900_ai_ci	
 default	default	mv	SELECT * FROM system.one	NONE	NO	YES	NO	NO	NO
 default	default	v	SELECT n, f FROM default.t	NONE	NO	NO	NO	NO	NO
-default	default	mv	default	mv	dummy	1		0	UInt8	\N	\N	8	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt8
-default	default	t	default	t	n	1		0	UInt64	\N	\N	64	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt64
-default	default	t	default	t	f	2		0	Float32	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Float32
-default	default	t	default	t	s	3		0	String	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		String
-default	default	t	default	t	fs	4		0	FixedString(42)	42	42	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		FixedString(42)
-default	default	t	default	t	d	5		0	Decimal(9, 6)	\N	\N	9	10	6	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Decimal(9, 6)
-default	default	v	default	v	n	1		1	Nullable(Int32)	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Nullable(Int32)
-default	default	v	default	v	f	2		0	Float64	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Float64
-		tmp		tmp	d	1		0	Date	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		Date
-		tmp		tmp	dt	2		0	DateTime	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		DateTime
-		tmp		tmp	dtms	3		0	DateTime64(3)	\N	\N	\N	\N	\N	3	\N	\N	\N	\N	\N	\N	\N	\N	\N		DateTime64(3)
+default	default	kcu	i	1		0	UInt32	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt32
+default	default	kcu	s	2		0	String	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		String
+default	default	kcu2	i	1		0	UInt32	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt32
+default	default	kcu2	d	2		0	Date	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		Date
+default	default	kcu2	u	3		0	UUID	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UUID
+default	default	mv	dummy	1		0	UInt8	\N	\N	8	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt8
+default	default	t	n	1		0	UInt64	\N	\N	64	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt64
+default	default	t	f	2		0	Float32	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Float32
+default	default	t	s	3		0	String	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		String
+default	default	t	fs	4		0	FixedString(42)	42	42	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		FixedString(42)
+default	default	t	d	5		0	Decimal(9, 6)	\N	\N	9	10	6	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Decimal(9, 6)
+default	default	v	n	1		1	Nullable(Int32)	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Nullable(Int32)
+default	default	v	f	2		0	Float64	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Float64
+		tmp	d	1		0	Date	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		Date
+		tmp	dt	2		0	DateTime	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		DateTime
+		tmp	dtms	3		0	DateTime64(3)	\N	\N	\N	\N	\N	3	\N	\N	\N	\N	\N	\N	\N	\N	\N		DateTime64(3)
 1
 1
+def	default	PRIMARY	def	default	kcu	i	1	\N	\N	\N	\N
+def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N
+def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N
+def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N
+def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N
diff --git a/tests/queries/0_stateless/01161_information_schema.sql b/tests/queries/0_stateless/01161_information_schema.sql
index 68a3b011ced..d2b7af9ed29 100644
--- a/tests/queries/0_stateless/01161_information_schema.sql
+++ b/tests/queries/0_stateless/01161_information_schema.sql
@@ -5,20 +5,82 @@ DROP TABLE IF EXISTS t;
 DROP VIEW IF EXISTS v;
 DROP VIEW IF EXISTS mv;
 DROP TABLE IF EXISTS tmp;
+DROP TABLE IF EXISTS kcu;
+DROP TABLE IF EXISTS kcu2;
 
 CREATE TABLE t (n UInt64, f Float32, s String, fs FixedString(42), d Decimal(9, 6)) ENGINE=Memory;
 CREATE VIEW v (n Nullable(Int32), f Float64) AS SELECT n, f FROM t;
 CREATE MATERIALIZED VIEW mv ENGINE=Null AS SELECT * FROM system.one;
 CREATE TEMPORARY TABLE tmp (d Date, dt DateTime, dtms DateTime64(3));
-
+CREATE TABLE kcu (i UInt32, s String) ENGINE MergeTree ORDER BY i;
+CREATE TABLE kcu2 (i UInt32, d Date, u UUID) ENGINE MergeTree ORDER BY (u, d);
 
 -- FIXME #28687
-SELECT * FROM information_schema.schemata WHERE schema_name ilike 'information_schema';
+SELECT catalog_name,
+       schema_name,
+       schema_owner,
+       default_character_set_catalog,
+       default_character_set_schema,
+       default_character_set_name,
+       sql_path
+FROM information_schema.schemata
+WHERE schema_name ilike 'information_schema';
+
 -- SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%';
-SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%';
-SELECT * FROM information_schema.views WHERE table_schema=currentDatabase();
+
+SELECT table_catalog, 
+       table_schema, 
+       table_name, 
+       table_type, 
+       table_collation, 
+       table_comment
+FROM INFORMATION_SCHEMA.TABLES
+WHERE (table_schema = currentDatabase() OR table_schema = '')
+  AND table_name NOT LIKE '%inner%';
+
+SELECT table_catalog,
+       table_schema,
+       table_name,
+       view_definition,
+       check_option,
+       is_updatable,
+       is_insertable_into,
+       is_trigger_updatable,
+       is_trigger_deletable,
+       is_trigger_insertable_into
+FROM information_schema.views
+WHERE table_schema = currentDatabase();
+
 -- SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%';
-SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%';
+
+SELECT table_catalog,
+       table_schema,
+       table_name,
+       column_name,
+       ordinal_position,
+       column_default,
+       is_nullable,
+       data_type,
+       character_maximum_length,
+       character_octet_length,
+       numeric_precision,
+       numeric_precision_radix,
+       numeric_scale,
+       datetime_precision,
+       character_set_catalog,
+       character_set_schema,
+       character_set_name,
+       collation_catalog,
+       collation_schema,
+       collation_name,
+       domain_catalog,
+       domain_schema,
+       domain_name,
+       column_comment,
+       column_type
+FROM INFORMATION_SCHEMA.COLUMNS
+WHERE (table_schema = currentDatabase() OR table_schema = '')
+  AND table_name NOT LIKE '%inner%';
 
 -- mixed upper/lowercase schema and table name:
 SELECT count() FROM information_schema.TABLES WHERE table_schema=currentDatabase() AND table_name = 't';
@@ -26,6 +88,51 @@ SELECT count() FROM INFORMATION_SCHEMA.tables WHERE table_schema=currentDatabase
 SELECT count() FROM INFORMATION_schema.tables WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_DATABASE }
 SELECT count() FROM information_schema.taBLES WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_TABLE }
 
+SELECT constraint_catalog,
+       constraint_schema,
+       constraint_name,
+       table_catalog,
+       table_schema,
+       table_name,
+       column_name,
+       ordinal_position,
+       position_in_unique_constraint,
+       referenced_table_schema,
+       referenced_table_name,
+       referenced_column_name
+FROM information_schema.key_column_usage
+WHERE table_name = 'kcu';
+
+SELECT constraint_catalog,
+       constraint_schema,
+       constraint_name,
+       table_catalog,
+       table_schema,
+       table_name,
+       column_name,
+       ordinal_position,
+       position_in_unique_constraint,
+       referenced_table_schema,
+       referenced_table_name,
+       referenced_column_name
+FROM information_schema.key_column_usage
+WHERE table_name = 'kcu2';
+
+SELECT constraint_catalog,
+       constraint_name,
+       constraint_schema,
+       unique_constraint_catalog,
+       unique_constraint_name,
+       unique_constraint_schema,
+       match_option,
+       update_rule,
+       delete_rule,
+       table_name,
+       referenced_table_name
+FROM information_schema.referential_constraints;
+
 drop view mv;
 drop view v;
 drop table t;
+drop table kcu;
+drop table kcu2;

From 7271cfd18704e2ab89ec9d8f3528e67794b6c7b0 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Tue, 19 Sep 2023 21:41:02 +0000
Subject: [PATCH 141/243] Prevent parquet schema inference reading the first 1
 MB of the file unnecessarily

---
 src/Formats/ReadSchemaUtils.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp
index 3158798fdca..b185007eda7 100644
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@@ -5,6 +5,7 @@
 #include <Storages/IStorage.h>
 #include <Common/assert_cast.h>
 #include <IO/WithFileName.h>
+#include <IO/WithFileSize.h>
 
 
 namespace DB
@@ -86,7 +87,16 @@ try
                 buf = read_buffer_iterator.next();
                 if (!buf)
                     break;
-                is_eof = buf->eof();
+
+                /// We just want to check for eof, but eof() can be pretty expensive.
+                /// So we use getFileSize() when available, which has better worst case.
+                /// (For remote files, typically eof() would read 1 MB from S3, which may be much
+                ///  more than what the schema reader and even data reader will read).
+                auto size = tryGetFileSizeFromReadBuffer(*buf);
+                if (size.has_value())
+                    is_eof = *size == 0;
+                else
+                    is_eof = buf->eof();
             }
             catch (Exception & e)
             {

From c856ec4087bb990c6fe1c22d6b619a6ae8646e31 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Tue, 19 Sep 2023 21:43:44 +0000
Subject: [PATCH 142/243] Prevent ParquetMetadata reading 40 MB from each file
 unnecessarily

---
 .../Formats/Impl/ParquetMetadataInputFormat.cpp   | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
index 043e6d2260c..1f81f5ac201 100644
--- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp
@@ -130,7 +130,7 @@ static std::shared_ptr<parquet::FileMetaData> getFileMetadata(
     const FormatSettings & format_settings,
     std::atomic<int> & is_stopped)
 {
-    auto arrow_file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES);
+    auto arrow_file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES, /* avoid_buffering */ true);
     return parquet::ReadMetaData(arrow_file);
 }
 
@@ -495,12 +495,15 @@ NamesAndTypesList ParquetMetadataSchemaReader::readSchema()
 
 void registerInputFormatParquetMetadata(FormatFactory & factory)
 {
-    factory.registerInputFormat(
+    factory.registerRandomAccessInputFormat(
         "ParquetMetadata",
-        [](ReadBuffer &buf,
-           const Block &sample,
-           const RowInputFormatParams &,
-           const FormatSettings & settings)
+        [](ReadBuffer & buf,
+            const Block & sample,
+            const FormatSettings & settings,
+            const ReadSettings &,
+            bool /* is_remote_fs */,
+            size_t /* max_download_threads */,
+            size_t /* max_parsing_threads */)
         {
             return std::make_shared<ParquetMetadataInputFormat>(buf, sample, settings);
         });

From e8cd42945388ecc20192b398da17867955873160 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 19 Sep 2023 22:11:58 +0000
Subject: [PATCH 143/243] add error code INVALID_IDENTIFIER

---
 src/Common/ErrorCodes.cpp                      | 1 +
 src/Interpreters/DatabaseAndTableWithAlias.cpp | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index f23685c37d1..e2ad8741ca3 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -585,6 +585,7 @@
     M(700, USER_SESSION_LIMIT_EXCEEDED)  \
     M(701, CLUSTER_DOESNT_EXIST) \
     M(702, CLIENT_INFO_DOES_NOT_MATCH) \
+    M(703, INVALID_IDENTIFIER) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Interpreters/DatabaseAndTableWithAlias.cpp b/src/Interpreters/DatabaseAndTableWithAlias.cpp
index 9b6ce4f22d3..cb993611ecb 100644
--- a/src/Interpreters/DatabaseAndTableWithAlias.cpp
+++ b/src/Interpreters/DatabaseAndTableWithAlias.cpp
@@ -16,6 +16,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int INVALID_IDENTIFIER;
 }
 
 DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableIdentifier & identifier, const String & current_database)
@@ -37,7 +38,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & ident
     else if (identifier.name_parts.size() == 1)
         table = identifier.name_parts[0];
     else
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: invalid identifier");
+        throw Exception(ErrorCodes::INVALID_IDENTIFIER, "Invalid identifier");
 
     if (database.empty())
         database = current_database;
@@ -50,7 +51,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTPtr & node, const
     else if (const auto * identifier = node->as<ASTIdentifier>())
         *this = DatabaseAndTableWithAlias(*identifier, current_database);
     else
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: identifier or table identifier expected");
+        throw Exception(ErrorCodes::INVALID_IDENTIFIER, "Identifier or table identifier expected");
 }
 
 DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database)

From e9e5c7a09bdfc36459d7a2ac141914a9357c67f7 Mon Sep 17 00:00:00 2001
From: lgbo-ustc <lgbo.ustc@gmail.com>
Date: Wed, 20 Sep 2023 08:44:30 +0800
Subject: [PATCH 144/243] update

---
 src/Functions/FunctionSQLJSON.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h
index 1786b613f98..0533f3d419a 100644
--- a/src/Functions/FunctionSQLJSON.h
+++ b/src/Functions/FunctionSQLJSON.h
@@ -209,7 +209,7 @@ public:
                 bool added_to_column = false;
                 if (document_ok)
                 {
-                    // Instead of creating a new generator for each row, we can reuse the same one.
+                    /// Instead of creating a new generator for each row, we can reuse the same one.
                     generator_json_path.reinitialize();
                     added_to_column = impl.insertResultToColumn(*to, document, generator_json_path, context);
                 }

From d59db55b142ee5a83c6911a46a821cf788158b0b Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 20 Sep 2023 10:23:47 +0800
Subject: [PATCH 145/243] add uts about array join

---
 ...02355_control_block_size_in_array_join.reference |  1 +
 .../02355_control_block_size_in_array_join.sql      | 13 +++++++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 tests/queries/0_stateless/02355_control_block_size_in_array_join.reference
 create mode 100644 tests/queries/0_stateless/02355_control_block_size_in_array_join.sql

diff --git a/tests/queries/0_stateless/02355_control_block_size_in_array_join.reference b/tests/queries/0_stateless/02355_control_block_size_in_array_join.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02355_control_block_size_in_array_join.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02355_control_block_size_in_array_join.sql b/tests/queries/0_stateless/02355_control_block_size_in_array_join.sql
new file mode 100644
index 00000000000..f094a612696
--- /dev/null
+++ b/tests/queries/0_stateless/02355_control_block_size_in_array_join.sql
@@ -0,0 +1,13 @@
+SET max_block_size = 8192;
+
+SELECT DISTINCT blockSize() <= 8192
+FROM
+(
+    SELECT n
+    FROM
+    (
+        SELECT range(0, rand() % 10) AS x
+        FROM numbers(1000000)
+    )
+    LEFT ARRAY JOIN x AS n
+)

From 34aecc0bf30ff4f6109d593ddc305f45324ba59a Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Tue, 19 Sep 2023 23:05:48 -0400
Subject: [PATCH 146/243] Adjusting `num_streams` by expected work in StorageS3

---
 src/Storages/StorageS3.cpp | 74 ++++++++++++++++++++++++++++++++++++--
 src/Storages/StorageS3.h   | 14 ++++++--
 2 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 341d8b3f768..e99be7a1204 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -180,6 +180,13 @@ public:
         return nextAssumeLocked();
     }
 
+    size_t objectsCount()
+    {
+        assert(outcome_future.valid());
+        first_outcome = outcome_future.get();
+        return first_outcome->GetResult().GetContents().size();
+    }
+
     ~Impl()
     {
         list_objects_pool.wait();
@@ -225,8 +232,17 @@ private:
     {
         buffer.clear();
 
-        assert(outcome_future.valid());
-        auto outcome = outcome_future.get();
+        ListObjectsOutcome outcome;
+        if (unlikely(first_outcome))
+        {
+            outcome = std::move(*first_outcome);
+            first_outcome = std::nullopt;
+        }
+        else
+        {
+            assert(outcome_future.valid());
+            outcome = outcome_future.get();
+        }
 
         if (!outcome.IsSuccess())
         {
@@ -343,6 +359,7 @@ private:
     ThreadPool list_objects_pool;
     ThreadPoolCallbackRunner<ListObjectsOutcome> list_objects_scheduler;
     std::future<ListObjectsOutcome> outcome_future;
+    std::optional<ListObjectsOutcome> first_outcome;  /// the result will be set by `estimatedKeysCount`
     std::function<void(FileProgress)> file_progress_callback;
 };
 
@@ -364,6 +381,11 @@ StorageS3Source::KeyWithInfo StorageS3Source::DisclosedGlobIterator::next()
     return pimpl->next();
 }
 
+size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount()
+{
+    return pimpl->objectsCount();
+}
+
 class StorageS3Source::KeysIterator::Impl : WithContext
 {
 public:
@@ -425,6 +447,11 @@ public:
         return {key, info};
     }
 
+    size_t objectsCount()
+    {
+        return keys.size();
+    }
+
 private:
     Strings keys;
     std::atomic_size_t index = 0;
@@ -459,6 +486,43 @@ StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next()
     return pimpl->next();
 }
 
+size_t StorageS3Source::KeysIterator::estimatedKeysCount()
+{
+    return pimpl->objectsCount();
+}
+
+StorageS3Source::ReadTaskIterator::ReadTaskIterator(
+    const DB::ReadTaskCallback & callback_,
+    const size_t max_threads_count)
+    : callback(callback_)
+{
+    ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, max_threads_count);
+    auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "ReadTaskIteratorPrefetch");
+
+    std::vector<std::future<String>> keys;
+    for (size_t i = 0; i < max_threads_count; ++i)
+        keys.push_back(pool_scheduler([this] { return callback(); }, Priority{}));
+
+    pool.wait();
+    buffer.reserve(max_threads_count);
+    for (auto & key_future : keys)
+        buffer.emplace_back(key_future.get(), std::nullopt);
+}
+
+StorageS3Source::KeyWithInfo StorageS3Source::ReadTaskIterator::next()
+{
+    size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
+    if (current_index >= buffer.size())
+        return {callback(), {}};
+
+    return buffer[current_index];
+}
+
+size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount()
+{
+    return buffer.size();
+}
+
 StorageS3Source::StorageS3Source(
     const ReadFromFormatInfo & info,
     const String & format_,
@@ -965,7 +1029,7 @@ std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
 {
     if (distributed_processing)
     {
-        return std::make_shared<StorageS3Source::ReadTaskIterator>(local_context->getReadTaskCallback());
+        return std::make_shared<StorageS3Source::ReadTaskIterator>(local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads);
     }
     else if (configuration.withGlobs())
     {
@@ -1017,6 +1081,9 @@ Pipe StorageS3::read(
     std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
         query_configuration, distributed_processing, local_context, query_info.query, virtual_columns, nullptr, local_context->getFileProgressCallback());
 
+    size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
+    num_streams = std::min(num_streams, estimated_keys_count);
+
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
     bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
         && local_context->getSettingsRef().optimize_count_from_files;
@@ -1024,6 +1091,7 @@ Pipe StorageS3::read(
     const size_t max_threads = local_context->getSettingsRef().max_threads;
     const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / num_streams);
 
+    pipes.reserve(num_streams);
     for (size_t i = 0; i < num_streams; ++i)
     {
         pipes.emplace_back(std::make_shared<StorageS3Source>(
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index ee03b9f18c2..f0315244088 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -60,6 +60,10 @@ public:
         virtual ~IIterator() = default;
         virtual KeyWithInfo next() = 0;
 
+        /// Estimates how many streams we need to process all files.
+        /// If keys count >= max_threads_count, the returned number may not represent the actual number of the keys.
+        virtual size_t estimatedKeysCount() = 0;
+
         KeyWithInfo operator ()() { return next(); }
     };
 
@@ -77,6 +81,7 @@ public:
             std::function<void(FileProgress)> progress_callback_ = {});
 
         KeyWithInfo next() override;
+        size_t estimatedKeysCount() override;
 
     private:
         class Impl;
@@ -100,6 +105,7 @@ public:
             std::function<void(FileProgress)> progress_callback_ = {});
 
         KeyWithInfo next() override;
+        size_t estimatedKeysCount() override;
 
     private:
         class Impl;
@@ -110,11 +116,15 @@ public:
     class ReadTaskIterator : public IIterator
     {
     public:
-        explicit ReadTaskIterator(const ReadTaskCallback & callback_) : callback(callback_) {}
+        explicit ReadTaskIterator(const ReadTaskCallback & callback_, const size_t max_threads_count);
 
-        KeyWithInfo next() override { return {callback(), {}}; }
+        KeyWithInfo next() override;
+        size_t estimatedKeysCount() override;
 
     private:
+        KeysWithInfo buffer;
+        std::atomic_size_t index = 0;
+
         ReadTaskCallback callback;
     };
 

From 56dca144cc4a35c953113ab0aa7f0bf3448cbf73 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 20 Sep 2023 07:09:31 +0200
Subject: [PATCH 147/243] Fix
 test_backup_restore_on_cluster/test.py::test_stop_other_host_during_backup
 flakiness

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/integration/test_backup_restore_on_cluster/test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py
index dfce2f15413..20f538cca58 100644
--- a/tests/integration/test_backup_restore_on_cluster/test.py
+++ b/tests/integration/test_backup_restore_on_cluster/test.py
@@ -1096,6 +1096,7 @@ def test_stop_other_host_during_backup(kill):
     if status == "BACKUP_CREATED":
         node1.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
         node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
+        node1.query("SYSTEM SYNC REPLICA tbl")
         assert node1.query("SELECT * FROM tbl ORDER BY x") == TSV([3, 5])
     elif status == "BACKUP_FAILED":
         assert not os.path.exists(

From 696ab745aa44da168f98d7c024f325fc08c3be34 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Wed, 20 Sep 2023 07:51:27 +0200
Subject: [PATCH 148/243] Doc. Update query-complexity.md

---
 docs/en/operations/settings/query-complexity.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index b65ecdcb6ab..15f39b53e07 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -157,7 +157,7 @@ At this time, it is not checked for one of the sorting stages, or when merging a
 The `max_execution_time` parameter can be a bit tricky to understand. 
 It operates based on interpolation relative to the current query execution speed (this behaviour is controlled by [timeout_before_checking_execution_speed](#timeout-before-checking-execution-speed)). 
 ClickHouse will interrupt a query if the projected execution time exceeds the specified `max_execution_time`.
-By default, the timeout_before_checking_execution_speed is set to 1 second. This means that after just one second of query execution, ClickHouse will begin estimating the total execution time. 
+By default, the timeout_before_checking_execution_speed is set to 10 seconds. This means that after 10 seconds of query execution, ClickHouse will begin estimating the total execution time. 
 If, for example, `max_execution_time` is set to 3600 seconds (1 hour), ClickHouse will terminate the query if the estimated time exceeds this 3600-second limit.
 If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use clock time as the basis for `max_execution_time`.
 

From 074ee40e3b80010a6f77765423ff153be79b10a2 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 20 Sep 2023 06:55:39 +0000
Subject: [PATCH 149/243] avoid excessive calls to getifaddrs

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Common/isLocalAddress.cpp | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index 7569c6fc14e..8089a713eb8 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -2,6 +2,9 @@
 
 #include <ifaddrs.h>
 #include <cstring>
+#include <memory>
+#include <mutex>
+#include <shared_mutex>
 #include <optional>
 #include <base/types.h>
 #include <Common/Exception.h>
@@ -74,6 +77,29 @@ struct NetworkInterfaces
     {
         freeifaddrs(ifaddr);
     }
+
+    static NetworkInterfaces & instance()
+    {
+        static constexpr int NET_INTERFACE_VALID_PERIOD_SECONDS = 30;
+        static std::unique_ptr<NetworkInterfaces> nf = std::make_unique<NetworkInterfaces>();
+        static time_t last_updated_time = time(nullptr);
+        static std::shared_mutex nf_mtx;
+
+        time_t now = time(nullptr);
+
+        if (now - last_updated_time > NET_INTERFACE_VALID_PERIOD_SECONDS)
+        {
+            std::unique_lock lock(nf_mtx);
+            nf = std::make_unique<NetworkInterfaces>();
+            last_updated_time = now;
+            return *nf;
+        }
+        else
+        {
+            std::shared_lock lock(nf_mtx);
+            return *nf;
+        }
+    }
 };
 
 }
@@ -111,8 +137,7 @@ bool isLocalAddress(const Poco::Net::IPAddress & address)
         }
     }
 
-    NetworkInterfaces interfaces;
-    return interfaces.hasAddress(address);
+    return NetworkInterfaces::instance().hasAddress(address);
 }
 
 

From b1bddae5ce7c653ade6405321bf9f9c4cd991f23 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@clickhouse.com>
Date: Wed, 20 Sep 2023 10:26:29 +0200
Subject: [PATCH 150/243] Rewrite the test to check the query plan too.

---
 ...optimize_read_in_order_from_view.reference |  4 +-
 .../02877_optimize_read_in_order_from_view.sh | 40 +++++++++++++++++++
 ...02877_optimize_read_in_order_from_view.sql | 21 ----------
 3 files changed, 43 insertions(+), 22 deletions(-)
 create mode 100755 tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sh
 delete mode 100644 tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sql

diff --git a/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.reference b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.reference
index 05893173f2b..0f7d306c7b5 100644
--- a/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.reference
+++ b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.reference
@@ -18,4 +18,6 @@
 283
 282
 281
-read_rows=ok
+read_rows:ok
+ReadFromMergeTree (default.table1)
+ReadType: InReverseOrder
diff --git a/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sh b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sh
new file mode 100755
index 00000000000..a3a906a9891
--- /dev/null
+++ b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. "$CURDIR"/../shell_config.sh
+set -eo pipefail
+
+$CLICKHOUSE_CLIENT --multiquery <<EOF
+DROP TABLE IF EXISTS view1;
+DROP TABLE IF EXISTS table1;
+CREATE TABLE table1 (number UInt64) ENGINE=MergeTree ORDER BY number SETTINGS index_granularity=1;
+INSERT INTO table1 SELECT number FROM numbers(1, 300);
+CREATE VIEW view1 AS SELECT number FROM table1;
+EOF
+
+# The following SELECT is expected to read 20 rows. In fact it may decide to read more than 20 rows, but not too many anyway.
+# So we'll check that the number of read rows is less than 40.
+query="SELECT * FROM (SELECT * FROM view1) ORDER BY number DESC LIMIT 20"
+
+query_id=${CLICKHOUSE_DATABASE}_optimize_read_in_order_from_view_$RANDOM$RANDOM
+
+$CLICKHOUSE_CLIENT -q "$query" --query_id="$query_id" --log_queries=1 --optimize_read_in_order=1
+
+$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
+
+read_rows=$($CLICKHOUSE_CLIENT -q "SELECT read_rows FROM system.query_log WHERE current_database = currentDatabase() AND query_id='${query_id}' AND type='QueryFinish'")
+
+if [ -z "$read_rows" ]; then
+    echo "read_rows:not found"
+elif (( "$read_rows" > 40 )); then
+    echo "read_rows:$read_rows"
+else
+    echo "read_rows:ok"
+fi
+
+query_plan=$($CLICKHOUSE_CLIENT -q "EXPLAIN actions=1 $query" --optimize_read_in_order=1)
+
+echo "$query_plan" | grep -A 1 "ReadFromMergeTree" | sed 's/^[ \t]*//'
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE view1"
+$CLICKHOUSE_CLIENT -q "DROP TABLE table1"
diff --git a/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sql b/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sql
deleted file mode 100644
index 3397681d0d9..00000000000
--- a/tests/queries/0_stateless/02877_optimize_read_in_order_from_view.sql
+++ /dev/null
@@ -1,21 +0,0 @@
-SET optimize_read_in_order=1;
-
-DROP TABLE IF EXISTS view1;
-DROP TABLE IF EXISTS table1;
-
-CREATE TABLE table1 (number UInt64) ENGINE=MergeTree ORDER BY number SETTINGS index_granularity=1;
-INSERT INTO table1 SELECT number FROM numbers(1, 300);
-
-CREATE VIEW view1 AS SELECT number FROM table1;
-
--- The following SELECT is expected to read 20 rows. In fact it may decide to read more than 20 rows, but not too many anyway.
--- So we'll check that the number of read rows is less than 40.
-
-SELECT /* test 02877, query 1 */ * FROM (SELECT * FROM view1) ORDER BY number DESC LIMIT 20 SETTINGS log_queries=1;
-
-SYSTEM FLUSH LOGS;
-
-SELECT concat('read_rows=', if(read_rows<40, 'ok', toString(read_rows))) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%test 02877, query 1%' AND type='QueryFinish';
-
-DROP TABLE view1;
-DROP TABLE table1;

From 5ace2a15db1671eddd32c7305e4ebe50c40cab93 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 20 Sep 2023 16:46:32 +0800
Subject: [PATCH 151/243] Update src/Common/isLocalAddress.cpp

Co-authored-by: Igor Nikonov <954088+devcrafter@users.noreply.github.com>
---
 src/Common/isLocalAddress.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index 8089a713eb8..54a01dc4126 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -78,7 +78,7 @@ struct NetworkInterfaces
         freeifaddrs(ifaddr);
     }
 
-    static NetworkInterfaces & instance()
+    static const NetworkInterfaces & instance()
     {
         static constexpr int NET_INTERFACE_VALID_PERIOD_SECONDS = 30;
         static std::unique_ptr<NetworkInterfaces> nf = std::make_unique<NetworkInterfaces>();

From a175a7e0fcd696e08f3262582b5dbb7074259668 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 20 Sep 2023 09:04:35 +0000
Subject: [PATCH 152/243] Use error name instead of error code

---
 .../02494_query_cache_nondeterministic_functions.sql            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
index 3a2e24d6bfe..62e0b099d7a 100644
--- a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
+++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql
@@ -4,7 +4,7 @@
 SYSTEM DROP QUERY CACHE;
 
 -- rand() is non-deterministic, the query is rejected by default
-SELECT COUNT(rand(1)) SETTINGS use_query_cache = true; -- { serverError 703 }
+SELECT COUNT(rand(1)) SETTINGS use_query_cache = true; -- { serverError CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS }
 SELECT COUNT(*) FROM system.query_cache;
 
 SELECT '---';

From eb8f9f1e764aa3b773a568f7234a989d55957891 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 20 Sep 2023 09:21:12 +0000
Subject: [PATCH 153/243] Remove config files sizes check

---
 .../internal/platform/data/file_test.go        | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/programs/diagnostics/internal/platform/data/file_test.go b/programs/diagnostics/internal/platform/data/file_test.go
index bb6397b82e8..9e305b1a5da 100644
--- a/programs/diagnostics/internal/platform/data/file_test.go
+++ b/programs/diagnostics/internal/platform/data/file_test.go
@@ -132,13 +132,6 @@ func TestConfigFileFrameCopy(t *testing.T) {
 		configFrame, errs := data.NewConfigFileFrame(path.Join(cwd, "../../../testdata", "configs", "xml"))
 		require.Empty(t, errs)
 		i := 0
-		sizes := map[string]int64{
-			"users.xml":            int64(2017),
-			"default-password.xml": int64(188),
-			"config.xml":           int64(59377),
-			"server-include.xml":   int64(168),
-			"user-include.xml":     int64(559),
-		}
 		var checkedFiles []string
 		for {
 			values, ok, err := configFrame.Next()
@@ -153,8 +146,6 @@ func TestConfigFileFrameCopy(t *testing.T) {
 			newPath := path.Join(tmrDir, fileName)
 			err = configFile.Copy(newPath, true)
 			require.FileExists(t, newPath)
-			destInfo, _ := os.Stat(newPath)
-			require.Equal(t, sizes[fileName], destInfo.Size())
 			require.Nil(t, err)
 			bytes, err := ioutil.ReadFile(newPath)
 			require.Nil(t, err)
@@ -186,13 +177,6 @@ func TestConfigFileFrameCopy(t *testing.T) {
 		configFrame, errs := data.NewConfigFileFrame(path.Join(cwd, "../../../testdata", "configs", "yaml"))
 		require.Empty(t, errs)
 		i := 0
-		sizes := map[string]int64{
-			"users.yaml":            int64(1023),
-			"default-password.yaml": int64(132),
-			"config.yaml":           int64(41633),
-			"server-include.yaml":   int64(21),
-			"user-include.yaml":     int64(120),
-		}
 		var checkedFiles []string
 		for {
 			values, ok, err := configFrame.Next()
@@ -207,8 +191,6 @@ func TestConfigFileFrameCopy(t *testing.T) {
 			newPath := path.Join(tmrDir, fileName)
 			err = configFile.Copy(newPath, true)
 			require.FileExists(t, newPath)
-			destInfo, _ := os.Stat(newPath)
-			require.Equal(t, sizes[fileName], destInfo.Size())
 			require.Nil(t, err)
 			bytes, err := ioutil.ReadFile(newPath)
 			require.Nil(t, err)

From 65baf129bf8a081d4041fa73aebb34287fcb098b Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 19 Sep 2023 15:34:42 +0000
Subject: [PATCH 154/243] Do not set PR status label

---
 tests/ci/commit_status_helper.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index a5fd27efb6b..3d07d81a1fe 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -141,16 +141,6 @@ STATUS_ICON_MAP = defaultdict(
 )
 
 
-def update_pr_status_label(pr: PullRequest, status: str) -> None:
-    new_label = "pr-status-" + STATUS_ICON_MAP[status]
-    for label in pr.get_labels():
-        if label.name == new_label:
-            return
-        if label.name.startswith("pr-status-"):
-            pr.remove_from_labels(label.name)
-    pr.add_to_labels(new_label)
-
-
 def set_status_comment(commit: Commit, pr_info: PRInfo) -> None:
     """It adds or updates the comment status to all Pull Requests but for release
     one, so the method does nothing for simple pushes and pull requests with
@@ -190,8 +180,6 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None:
             comment = ic
             break
 
-    update_pr_status_label(pr, get_worst_state(statuses))
-
     if comment is None:
         pr.create_issue_comment(comment_body)
         return

From d9a634eb0f3198658af25f86221a331b05e1cd39 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 8 Sep 2023 16:58:13 +0200
Subject: [PATCH 155/243] Fix filtering parts with indexHint for non analyzer

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
(cherry picked from commit ffa82e9297c1d01f77793cc7b43aa4fb7bbec9c4)
---
 src/Processors/QueryPlan/ReadFromMergeTree.cpp           | 5 ++++-
 src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp   | 3 +++
 tests/analyzer_tech_debt.txt                             | 1 +
 .../0_stateless/02880_indexHint__partition_id.reference  | 9 +++++++++
 .../0_stateless/02880_indexHint__partition_id.sql        | 9 +++++++++
 5 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02880_indexHint__partition_id.reference
 create mode 100644 tests/queries/0_stateless/02880_indexHint__partition_id.sql

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index a2f2c1e0aac..237a4cc703f 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1346,7 +1346,10 @@ static void buildIndexes(
     }
 
     /// TODO Support row_policy_filter and additional_filters
-    indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context);
+    if (settings.allow_experimental_analyzer)
+        indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, filter_actions_dag, context);
+    else
+        indexes->part_values = MergeTreeDataSelectExecutor::filterPartsByVirtualColumns(data, parts, query_info.query, context);
 
     indexes->use_skip_indexes = settings.use_skip_indexes;
     bool final = query_info.isFinal();
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 3c2b09b6f3b..d7c6c370c18 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -815,6 +815,9 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
     ASTPtr expression_ast;
     auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, true /* one_part */);
 
+    if (virtual_columns_block.rows() == 0)
+        return {};
+
     // Generate valid expressions for filtering
     VirtualColumnUtils::prepareFilterBlockWithQuery(query, context, virtual_columns_block, expression_ast);
 
diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt
index 4419190e12c..652ab0b99de 100644
--- a/tests/analyzer_tech_debt.txt
+++ b/tests/analyzer_tech_debt.txt
@@ -54,6 +54,7 @@
 01710_projection_additional_filters
 01721_join_implicit_cast_long
 01739_index_hint
+02880_indexHint__partition_id
 01747_join_view_filter_dictionary
 01748_partition_id_pruning
 01756_optimize_skip_unused_shards_rewrite_in
diff --git a/tests/queries/0_stateless/02880_indexHint__partition_id.reference b/tests/queries/0_stateless/02880_indexHint__partition_id.reference
new file mode 100644
index 00000000000..365e7b676c7
--- /dev/null
+++ b/tests/queries/0_stateless/02880_indexHint__partition_id.reference
@@ -0,0 +1,9 @@
+-- { echoOn }
+select * from data prewhere indexHint(_partition_id = '1');
+1
+select count() from data prewhere indexHint(_partition_id = '1');
+1
+select * from data where indexHint(_partition_id = '1');
+1
+select count() from data where indexHint(_partition_id = '1');
+1
diff --git a/tests/queries/0_stateless/02880_indexHint__partition_id.sql b/tests/queries/0_stateless/02880_indexHint__partition_id.sql
new file mode 100644
index 00000000000..d15b3f4ccea
--- /dev/null
+++ b/tests/queries/0_stateless/02880_indexHint__partition_id.sql
@@ -0,0 +1,9 @@
+drop table if exists data;
+create table data (part Int) engine=MergeTree() order by tuple() partition by part;
+insert into data values (1)(2);
+
+-- { echoOn }
+select * from data prewhere indexHint(_partition_id = '1');
+select count() from data prewhere indexHint(_partition_id = '1');
+select * from data where indexHint(_partition_id = '1');
+select count() from data where indexHint(_partition_id = '1');

From e6a3f3ed017a1f6ed82d00e210ed173c3da69242 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 20 Sep 2023 09:31:03 +0000
Subject: [PATCH 156/243] get rid of unique pointer

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Common/isLocalAddress.cpp | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index 54a01dc4126..e9fefb0c319 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -1,12 +1,14 @@
 #include <Common/isLocalAddress.h>
 
 #include <ifaddrs.h>
+#include <chrono>
 #include <cstring>
 #include <memory>
 #include <mutex>
 #include <shared_mutex>
 #include <optional>
 #include <base/types.h>
+#include <boost/core/noncopyable.hpp>
 #include <Common/Exception.h>
 #include <Poco/Net/IPAddress.h>
 #include <Poco/Net/SocketAddress.h>
@@ -23,7 +25,7 @@ namespace ErrorCodes
 namespace
 {
 
-struct NetworkInterfaces
+struct NetworkInterfaces : public boost::noncopyable
 {
     ifaddrs * ifaddr;
     NetworkInterfaces()
@@ -34,6 +36,12 @@ struct NetworkInterfaces
         }
     }
 
+    void swap(NetworkInterfaces && other)
+    {
+        ifaddr = other.ifaddr;
+        other.ifaddr = nullptr;
+    }
+
     bool hasAddress(const Poco::Net::IPAddress & address) const
     {
         ifaddrs * iface;
@@ -80,24 +88,24 @@ struct NetworkInterfaces
 
     static const NetworkInterfaces & instance()
     {
-        static constexpr int NET_INTERFACE_VALID_PERIOD_SECONDS = 30;
-        static std::unique_ptr<NetworkInterfaces> nf = std::make_unique<NetworkInterfaces>();
-        static time_t last_updated_time = time(nullptr);
+        static constexpr int NET_INTERFACE_VALID_PERIOD_MS = 30000;
+        static NetworkInterfaces nf;
+        static auto last_updated_time = std::chrono::steady_clock::now();
         static std::shared_mutex nf_mtx;
 
-        time_t now = time(nullptr);
+        auto now = std::chrono::steady_clock::now();
 
-        if (now - last_updated_time > NET_INTERFACE_VALID_PERIOD_SECONDS)
+        if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_updated_time).count() > NET_INTERFACE_VALID_PERIOD_MS)
         {
             std::unique_lock lock(nf_mtx);
-            nf = std::make_unique<NetworkInterfaces>();
+            nf.swap(NetworkInterfaces());
             last_updated_time = now;
-            return *nf;
+            return nf;
         }
         else
         {
             std::shared_lock lock(nf_mtx);
-            return *nf;
+            return nf;
         }
     }
 };

From d8adf05de25a61555ded0cdbec3dc2388d77909e Mon Sep 17 00:00:00 2001
From: SmitaRKulkarni <64093672+SmitaRKulkarni@users.noreply.github.com>
Date: Wed, 20 Sep 2023 11:31:12 +0200
Subject: [PATCH 157/243] Added a new column _block_number (#47532)

Added a new virtual column _block_number which is persisted on merges when allow_experimental_block_number_column is enabled
---
 .../mergetree-family/mergetree.md             |  1 +
 .../settings/merge-tree-settings.md           |  6 +++
 src/Interpreters/InterpreterCreateQuery.cpp   |  8 ++++
 src/Interpreters/MutationsInterpreter.cpp     | 26 +++++++++++-
 src/Interpreters/inplaceBlockConversions.cpp  | 14 +++++--
 src/Interpreters/inplaceBlockConversions.h    |  2 +-
 .../Algorithms/SummingSortedAlgorithm.cpp     |  7 ++++
 src/Processors/Transforms/TTLTransform.cpp    |  3 +-
 src/Storages/AlterCommands.cpp                | 11 ++++-
 src/Storages/BlockNumberColumn.cpp            | 23 +++++++++++
 src/Storages/BlockNumberColumn.h              | 16 ++++++++
 src/Storages/ColumnsDescription.cpp           |  6 +++
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  4 --
 src/Storages/MergeTree/IMergeTreeReader.cpp   |  4 +-
 src/Storages/MergeTree/IMergeTreeReader.h     |  2 +-
 src/Storages/MergeTree/MergeTask.cpp          | 23 ++++++++++-
 src/Storages/MergeTree/MergeTask.h            |  7 ++++
 src/Storages/MergeTree/MergeTreeData.cpp      |  4 +-
 .../MergeTree/MergeTreeDataPartCompact.cpp    |  7 ++++
 .../MergeTreeDataPartWriterCompact.cpp        | 12 +++++-
 .../MergeTree/MergeTreeDataPartWriterWide.cpp | 12 +++++-
 .../MergeTree/MergeTreeDataSelectExecutor.cpp |  6 ++-
 .../MergeTree/MergeTreeSelectProcessor.cpp    | 27 ++++++++++--
 .../MergeTree/MergeTreeSequentialSource.cpp   |  2 +-
 src/Storages/MergeTree/MergeTreeSettings.h    |  3 +-
 src/Storages/StorageDistributed.cpp           |  3 ++
 src/Storages/StorageLog.cpp                   | 12 +++++-
 src/Storages/StorageSnapshot.cpp              |  2 +
 .../02668_column_block_number.reference       | 41 +++++++++++++++++++
 .../0_stateless/02668_column_block_number.sql | 32 +++++++++++++++
 ...lumn_block_number_vertical_merge.reference | 41 +++++++++++++++++++
 ...668_column_block_number_vertical_merge.sql | 36 ++++++++++++++++
 ...mn_block_number_with_projections.reference | 19 +++++++++
 ...8_column_block_number_with_projections.sql | 18 ++++++++
 34 files changed, 411 insertions(+), 29 deletions(-)
 create mode 100644 src/Storages/BlockNumberColumn.cpp
 create mode 100644 src/Storages/BlockNumberColumn.h
 create mode 100644 tests/queries/0_stateless/02668_column_block_number.reference
 create mode 100644 tests/queries/0_stateless/02668_column_block_number.sql
 create mode 100644 tests/queries/0_stateless/02668_column_block_number_vertical_merge.reference
 create mode 100644 tests/queries/0_stateless/02668_column_block_number_vertical_merge.sql
 create mode 100644 tests/queries/0_stateless/02668_column_block_number_with_projections.reference
 create mode 100644 tests/queries/0_stateless/02668_column_block_number_with_projections.sql

diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index bb257311680..23ab2699cc1 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -1354,3 +1354,4 @@ In this sample configuration:
 - `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`).
 - `_partition_value` — Values (a tuple) of a `partition by` expression.
 - `_sample_factor` — Sample factor (from the query).
+- `_block_number` — Block number of the row, it is persisted on merges when `allow_experimental_block_number_column` is set to true.
diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md
index e746719b629..483dcf2e61c 100644
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@@ -854,3 +854,9 @@ Possible values:
 - `Always` or `Never`.
 
 Default value: `Never`
+
+## allow_experimental_block_number_column
+
+Persists virtual column `_block_number` on merges.
+
+Default value: false.
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index b426025413c..3654f307eb9 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -31,6 +31,7 @@
 #include <Storages/StorageInMemoryMetadata.h>
 #include <Storages/WindowView/StorageWindowView.h>
 #include <Storages/StorageReplicatedMergeTree.h>
+#include <Storages/BlockNumberColumn.h>
 
 #include <Interpreters/Context.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
@@ -837,6 +838,13 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat
                             "Cannot create table with column '{}' for *MergeTree engines because it "
                             "is reserved for lightweight delete feature",
                             LightweightDeleteDescription::FILTER_COLUMN.name);
+
+        auto search_block_number = all_columns.find(BlockNumberColumn::name);
+        if (search_block_number != all_columns.end())
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+                            "Cannot create table with column '{}' for *MergeTree engines because it "
+                            "is reserved for storing block number",
+                            BlockNumberColumn::name);
     }
 
     const auto & settings = getContext()->getSettingsRef();
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index e50f8488cac..4b0cbec4f9f 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -7,6 +7,7 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
 #include <Storages/StorageMergeTree.h>
+#include <Storages/BlockNumberColumn.h>
 #include <Processors/Transforms/FilterTransform.h>
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <Processors/Transforms/CreatingSetsTransform.h>
@@ -40,7 +41,6 @@
 #include <Parsers/makeASTForLogicalFunction.h>
 #include <Common/logger_useful.h>
 
-
 namespace DB
 {
 
@@ -56,6 +56,7 @@ namespace ErrorCodes
     extern const int THERE_IS_NO_COLUMN;
 }
 
+
 namespace
 {
 
@@ -416,6 +417,12 @@ static void validateUpdateColumns(
             found = true;
         }
 
+        /// Dont allow to override value of block number virtual column
+        if (!found && column_name == BlockNumberColumn::name)
+        {
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name));
+        }
+
         if (!found)
         {
             for (const auto & col : metadata_snapshot->getColumns().getMaterialized())
@@ -511,7 +518,8 @@ void MutationsInterpreter::prepare(bool dry_run)
 
         for (const auto & [name, _] : command.column_to_update_expression)
         {
-            if (!available_columns_set.contains(name) && name != LightweightDeleteDescription::FILTER_COLUMN.name)
+            if (!available_columns_set.contains(name) && name != LightweightDeleteDescription::FILTER_COLUMN.name
+                && name != BlockNumberColumn::name)
                 throw Exception(ErrorCodes::THERE_IS_NO_COLUMN,
                     "Column {} is updated but not requested to read", name);
 
@@ -613,6 +621,8 @@ void MutationsInterpreter::prepare(bool dry_run)
                     type = physical_column->type;
                 else if (column == LightweightDeleteDescription::FILTER_COLUMN.name)
                     type = LightweightDeleteDescription::FILTER_COLUMN.type;
+                else if (column == BlockNumberColumn::name)
+                    type = BlockNumberColumn::type;
                 else
                     throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column {}", column);
 
@@ -1087,6 +1097,18 @@ struct VirtualColumns
 
                 virtuals.emplace_back(ColumnAndPosition{.column = std::move(column), .position = i});
             }
+            else if (columns_to_read[i] == BlockNumberColumn::name)
+            {
+                if (!part->getColumns().contains(BlockNumberColumn::name))
+                {
+                    ColumnWithTypeAndName block_number_column;
+                    block_number_column.type = BlockNumberColumn::type;
+                    block_number_column.column = block_number_column.type->createColumnConst(0, part->info.min_block);
+                    block_number_column.name = std::move(columns_to_read[i]);
+
+                    virtuals.emplace_back(ColumnAndPosition{.column = std::move(block_number_column), .position = i});
+                }
+            }
         }
 
         if (!virtuals.empty())
diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp
index 4cac2f0e20c..b7ef5dbdbbc 100644
--- a/src/Interpreters/inplaceBlockConversions.cpp
+++ b/src/Interpreters/inplaceBlockConversions.cpp
@@ -20,6 +20,7 @@
 #include <Columns/ColumnArray.h>
 #include <DataTypes/DataTypeArray.h>
 #include <Storages/StorageInMemoryMetadata.h>
+#include <Storages/BlockNumberColumn.h>
 
 
 namespace DB
@@ -260,7 +261,7 @@ void fillMissingColumns(
     const NamesAndTypesList & requested_columns,
     const NamesAndTypesList & available_columns,
     const NameSet & partially_read_columns,
-    StorageMetadataPtr metadata_snapshot)
+    StorageMetadataPtr metadata_snapshot, size_t block_number)
 {
     size_t num_columns = requested_columns.size();
     if (num_columns != res_columns.size())
@@ -339,9 +340,14 @@ void fillMissingColumns(
         }
         else
         {
-            /// We must turn a constant column into a full column because the interpreter could infer
-            /// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
-            res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
+            if (requested_column->name == BlockNumberColumn::name)
+                res_columns[i] = type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst();
+            else
+                /// We must turn a constant column into a full column because the interpreter could infer
+                /// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
+                res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
+
+
         }
     }
 }
diff --git a/src/Interpreters/inplaceBlockConversions.h b/src/Interpreters/inplaceBlockConversions.h
index bea44bf6db9..7a13a75ec8b 100644
--- a/src/Interpreters/inplaceBlockConversions.h
+++ b/src/Interpreters/inplaceBlockConversions.h
@@ -46,6 +46,6 @@ void fillMissingColumns(
     const NamesAndTypesList & requested_columns,
     const NamesAndTypesList & available_columns,
     const NameSet & partially_read_columns,
-    StorageMetadataPtr metadata_snapshot);
+    StorageMetadataPtr metadata_snapshot, size_t block_number = 0);
 
 }
diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
index 7dac5715f95..f468e112d8f 100644
--- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
@@ -12,6 +12,7 @@
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <IO/WriteHelpers.h>
+#include <Storages/BlockNumberColumn.h>
 
 
 namespace DB
@@ -222,6 +223,12 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
         const ColumnWithTypeAndName & column = header.safeGetByPosition(i);
 
         const auto * simple = dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(column.type->getCustomName());
+        if (column.name == BlockNumberColumn::name)
+        {
+            def.column_numbers_not_to_aggregate.push_back(i);
+            continue;
+        }
+
         /// Discover nested Maps and find columns for summation
         if (typeid_cast<const DataTypeArray *>(column.type.get()) && !simple)
         {
diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp
index 3250d012d5c..7cde86098c7 100644
--- a/src/Processors/Transforms/TTLTransform.cpp
+++ b/src/Processors/Transforms/TTLTransform.cpp
@@ -49,7 +49,8 @@ TTLTransform::TTLTransform(
 
     for (const auto & group_by_ttl : metadata_snapshot_->getGroupByTTLs())
         algorithms.emplace_back(std::make_unique<TTLAggregationAlgorithm>(
-            group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_, getInputPort().getHeader(), storage_));
+                group_by_ttl, old_ttl_infos.group_by_ttl[group_by_ttl.result_column], current_time_, force_,
+                getInputPort().getHeader(), storage_));
 
     if (metadata_snapshot_->hasAnyColumnTTL())
     {
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index f38fc1f3734..da46cb4d7fe 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -27,6 +27,7 @@
 #include <Storages/AlterCommands.h>
 #include <Storages/IStorage.h>
 #include <Storages/LightweightDeleteDescription.h>
+#include <Storages/BlockNumberColumn.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Common/typeid_cast.h>
 #include <Common/randomSeed.h>
@@ -782,7 +783,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada
     /// Drop alias is metadata alter, in other case mutation is required.
     if (type == DROP_COLUMN)
         return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name) ||
-            column_name == LightweightDeleteDescription::FILTER_COLUMN.name;
+            column_name == LightweightDeleteDescription::FILTER_COLUMN.name || column_name == BlockNumberColumn::name;
 
     if (type != MODIFY_COLUMN || data_type == nullptr)
         return false;
@@ -1066,6 +1067,10 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
                 throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: "
                                 "this column name is reserved for lightweight delete feature", backQuote(column_name));
 
+            if (column_name == BlockNumberColumn::name && std::dynamic_pointer_cast<MergeTreeData>(table))
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: "
+                                                            "this column name is reserved for _block_number persisting feature", backQuote(column_name));
+
             if (command.codec)
                 CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec);
 
@@ -1270,6 +1275,10 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
                 throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: "
                                 "this column name is reserved for lightweight delete feature", backQuote(command.rename_to));
 
+            if (command.rename_to == BlockNumberColumn::name && std::dynamic_pointer_cast<MergeTreeData>(table))
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: "
+                                                            "this column name is reserved for _block_number persisting feature", backQuote(command.rename_to));
+
             if (modified_columns.contains(column_name))
                 throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename and modify the same column {} "
                                                              "in a single ALTER query", backQuote(column_name));
diff --git a/src/Storages/BlockNumberColumn.cpp b/src/Storages/BlockNumberColumn.cpp
new file mode 100644
index 00000000000..8c9e1fd902a
--- /dev/null
+++ b/src/Storages/BlockNumberColumn.cpp
@@ -0,0 +1,23 @@
+#include <Storages/BlockNumberColumn.h>
+#include <Compression/CompressionCodecMultiple.h>
+
+namespace DB
+{
+
+CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
+
+CompressionCodecPtr getCompressionCodecForBlockNumberColumn()
+{
+    std::vector <CompressionCodecPtr> codecs;
+    codecs.reserve(2);
+    auto data_bytes_size = BlockNumberColumn::type->getSizeOfValueInMemory();
+    codecs.emplace_back(getCompressionCodecDelta(data_bytes_size));
+    codecs.emplace_back(CompressionCodecFactory::instance().get("LZ4", {}));
+    return std::make_shared<CompressionCodecMultiple>(codecs);
+}
+
+const String BlockNumberColumn::name = "_block_number";
+const DataTypePtr BlockNumberColumn::type = std::make_shared<DataTypeUInt64>();
+const CompressionCodecPtr BlockNumberColumn::compression_codec = getCompressionCodecForBlockNumberColumn();
+
+}
diff --git a/src/Storages/BlockNumberColumn.h b/src/Storages/BlockNumberColumn.h
new file mode 100644
index 00000000000..fffa68bfd49
--- /dev/null
+++ b/src/Storages/BlockNumberColumn.h
@@ -0,0 +1,16 @@
+#pragma once
+#include <Core/NamesAndTypes.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Compression/CompressionFactory.h>
+
+namespace DB
+{
+
+struct BlockNumberColumn
+{
+    static const String name;
+    static const DataTypePtr type;
+    static const CompressionCodecPtr compression_codec;
+};
+
+}
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 0c918bda5fd..9fa79387d5c 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -30,11 +30,15 @@
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/FunctionNameNormalizer.h>
+#include <Storages/BlockNumberColumn.h>
 
 
 namespace DB
 {
 
+CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
+
+
 namespace ErrorCodes
 {
     extern const int NO_SUCH_COLUMN_IN_TABLE;
@@ -721,11 +725,13 @@ CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_
 
 CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name) const
 {
+    assert (column_name != BlockNumberColumn::name);
     return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec());
 }
 
 ASTPtr ColumnsDescription::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const
 {
+    assert (column_name != BlockNumberColumn::name);
     const auto it = columns.get<1>().find(column_name);
 
     if (it == columns.get<1>().end() || !it->codec)
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 97c9b81ce87..02c838458f9 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -477,10 +477,6 @@ public:
     /// Moar hardening: this method is supposed to be used for debug assertions
     bool assertHasValidVersionMetadata() const;
 
-    /// Return hardlink count for part.
-    /// Required for keep data on remote FS when part has shadow copies.
-    UInt32 getNumberOfRefereneces() const;
-
     /// True if the part supports lightweight delete mutate.
     bool supportLightweightDeleteMutate() const;
 
diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp
index 4bb8c400691..120edd81e30 100644
--- a/src/Storages/MergeTree/IMergeTreeReader.cpp
+++ b/src/Storages/MergeTree/IMergeTreeReader.cpp
@@ -62,7 +62,7 @@ const IMergeTreeReader::ValueSizeMap & IMergeTreeReader::getAvgValueSizeHints()
     return avg_value_size_hints;
 }
 
-void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const
+void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number) const
 {
     try
     {
@@ -71,7 +71,7 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e
             res_columns, num_rows,
             Nested::convertToSubcolumns(requested_columns),
             Nested::convertToSubcolumns(available_columns),
-            partially_read_columns, storage_snapshot->metadata);
+            partially_read_columns, storage_snapshot->metadata, block_number);
 
         should_evaluate_missing_defaults = std::any_of(
             res_columns.begin(), res_columns.end(), [](const auto & column) { return column == nullptr; });
diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h
index a7e60254217..02faebf4b41 100644
--- a/src/Storages/MergeTree/IMergeTreeReader.h
+++ b/src/Storages/MergeTree/IMergeTreeReader.h
@@ -45,7 +45,7 @@ public:
     /// Add columns from ordered_names that are not present in the block.
     /// Missing columns are added in the order specified by ordered_names.
     /// num_rows is needed in case if all res_columns are nullptr.
-    void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const;
+    void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number = 0) const;
     /// Evaluate defaulted columns if necessary.
     void evaluateMissingDefaults(Block additional_columns, Columns & res_columns) const;
 
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index df607d36402..adb1ca72e46 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -218,6 +218,14 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
     ctx->need_remove_expired_values = false;
     ctx->force_ttl = false;
 
+    if (supportsBlockNumberColumn(global_ctx) && !global_ctx->storage_columns.contains(BlockNumberColumn::name))
+    {
+        global_ctx->storage_columns.emplace_back(NameAndTypePair{BlockNumberColumn::name,BlockNumberColumn::type});
+        global_ctx->all_column_names.emplace_back(BlockNumberColumn::name);
+        global_ctx->gathering_columns.emplace_back(NameAndTypePair{BlockNumberColumn::name,BlockNumberColumn::type});
+        global_ctx->gathering_column_names.emplace_back(BlockNumberColumn::name);
+    }
+
     SerializationInfo::Settings info_settings =
     {
         .ratio_of_defaults_for_sparse = global_ctx->data->getSettings()->ratio_of_defaults_for_sparse_serialization,
@@ -251,12 +259,12 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
         }
     }
 
-    global_ctx->new_data_part->setColumns(global_ctx->storage_columns, infos, global_ctx->metadata_snapshot->getMetadataVersion());
-
     const auto & local_part_min_ttl = global_ctx->new_data_part->ttl_infos.part_min_ttl;
     if (local_part_min_ttl && local_part_min_ttl <= global_ctx->time_of_merge)
         ctx->need_remove_expired_values = true;
 
+    global_ctx->new_data_part->setColumns(global_ctx->storage_columns, infos, global_ctx->metadata_snapshot->getMetadataVersion());
+
     if (ctx->need_remove_expired_values && global_ctx->ttl_merges_blocker->isCancelled())
     {
         LOG_INFO(ctx->log, "Part {} has values with expired TTL, but merges with TTL are cancelled.", global_ctx->new_data_part->name);
@@ -998,6 +1006,17 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
 
     if (global_ctx->deduplicate)
     {
+        /// We don't want to deduplicate by block number column
+        /// so if deduplicate_by_columns is empty, add all columns except _block_number
+        if (supportsBlockNumberColumn(global_ctx) && global_ctx->deduplicate_by_columns.empty())
+        {
+            for (const auto & col : global_ctx->merging_column_names)
+            {
+                if (col != BlockNumberColumn::name)
+                    global_ctx->deduplicate_by_columns.emplace_back(col);
+            }
+        }
+
         if (DistinctSortedTransform::isApplicable(header, sort_description, global_ctx->deduplicate_by_columns))
             res_pipe.addTransform(std::make_shared<DistinctSortedTransform>(
                 res_pipe.getHeader(), sort_description, SizeLimits(), 0 /*limit_hint*/, global_ctx->deduplicate_by_columns));
diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h
index 402d3c26e49..2122484bb24 100644
--- a/src/Storages/MergeTree/MergeTask.h
+++ b/src/Storages/MergeTree/MergeTask.h
@@ -13,6 +13,7 @@
 #include <QueryPipeline/QueryPipeline.h>
 #include <Compression/CompressedReadBufferFromFile.h>
 #include <Common/filesystemHelpers.h>
+#include <Storages/BlockNumberColumn.h>
 
 #include <memory>
 #include <list>
@@ -388,6 +389,12 @@ private:
 
     Stages::iterator stages_iterator = stages.begin();
 
+    /// Check for persisting block number column
+    static bool supportsBlockNumberColumn(GlobalRuntimeContextPtr global_ctx)
+    {
+        return global_ctx->data->getSettings()->allow_experimental_block_number_column && global_ctx->metadata_snapshot->getGroupByTTLs().empty();
+    }
+
 };
 
 /// FIXME
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 4b6d2ea41ed..14c9961f6c3 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -78,6 +78,7 @@
 #include <Storages/VirtualColumnUtils.h>
 #include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
 #include <Storages/MutationCommands.h>
+#include <Storages/BlockNumberColumn.h>
 
 #include <boost/range/algorithm_ext/erase.hpp>
 #include <boost/algorithm/string/join.hpp>
@@ -3730,7 +3731,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts
     const auto & part_columns = part->getColumns();
     for (const auto & part_column : part_columns)
     {
-        if (part_column.name == LightweightDeleteDescription::FILTER_COLUMN.name)
+        if (part_column.name == LightweightDeleteDescription::FILTER_COLUMN.name || part_column.name == BlockNumberColumn::name)
             continue;
 
         auto storage_column = columns.getPhysical(part_column.name);
@@ -8269,6 +8270,7 @@ NamesAndTypesList MergeTreeData::getVirtuals() const
         NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
         NameAndTypePair("_part_offset", std::make_shared<DataTypeUInt64>()),
         LightweightDeleteDescription::FILTER_COLUMN,
+        NameAndTypePair(BlockNumberColumn::name, BlockNumberColumn::type),
     };
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
index c6d059498ff..209a5061f33 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
@@ -5,6 +5,7 @@
 #include <Interpreters/Context.h>
 #include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
 #include <Compression/CompressedReadBufferFromFile.h>
+#include <Storages/BlockNumberColumn.h>
 
 
 namespace DB
@@ -64,6 +65,12 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter(
     ordered_columns_list.sort([this](const auto & lhs, const auto & rhs)
         { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); });
 
+    /// _block_number column is not added by user, but is persisted in a part after merge
+    /// If _block_number is not present in the parts to be merged, then it won't have a position
+    /// So check if its not present and add it at the end
+    if (columns_list.contains(BlockNumberColumn::name) && !ordered_columns_list.contains(BlockNumberColumn::name))
+        ordered_columns_list.emplace_back(NameAndTypePair{BlockNumberColumn::name, BlockNumberColumn::type});
+
     return std::make_unique<MergeTreeDataPartWriterCompact>(
         shared_from_this(), ordered_columns_list, metadata_snapshot,
         indices_to_recalc, getMarksFileExtension(),
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 3475130bf24..d2a9632d4e5 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -1,9 +1,12 @@
 #include <Storages/MergeTree/MergeTreeDataPartWriterCompact.h>
 #include <Storages/MergeTree/MergeTreeDataPartCompact.h>
+#include <Storages/BlockNumberColumn.h>
 
 namespace DB
 {
 
+    CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
+
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
@@ -53,7 +56,14 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
 
     const auto & storage_columns = metadata_snapshot->getColumns();
     for (const auto & column : columns_list)
-        addStreams(column, storage_columns.getCodecDescOrDefault(column.name, default_codec));
+    {
+        ASTPtr compression;
+        if (column.name == BlockNumberColumn::name)
+            compression = BlockNumberColumn::compression_codec->getFullCodecDesc();
+        else
+            compression = storage_columns.getCodecDescOrDefault(column.name, default_codec);
+        addStreams(column, compression);
+    }
 }
 
 void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index bcf340e0f55..f3e60e224aa 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -6,9 +6,12 @@
 #include <Common/escapeForFileName.h>
 #include <Columns/ColumnSparse.h>
 #include <Common/logger_useful.h>
+#include <Storages/BlockNumberColumn.h>
 
 namespace DB
 {
+    CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
+
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
@@ -87,7 +90,14 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
 {
     const auto & columns = metadata_snapshot->getColumns();
     for (const auto & it : columns_list)
-        addStreams(it, columns.getCodecDescOrDefault(it.name, default_codec));
+    {
+        ASTPtr compression;
+        if (it.name == BlockNumberColumn::name)
+            compression = BlockNumberColumn::compression_codec->getFullCodecDesc();
+        else
+            compression = columns.getCodecDescOrDefault(it.name, default_codec);
+        addStreams(it, compression);
+    }
 }
 
 void MergeTreeDataPartWriterWide::addStreams(
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 3c2b09b6f3b..d8ebfb95d74 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -46,7 +46,7 @@
 #include <Functions/IFunction.h>
 
 #include <IO/WriteBufferFromOStream.h>
-
+#include <Storages/BlockNumberColumn.h>
 #include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
 
 namespace CurrentMetrics
@@ -1232,6 +1232,10 @@ static void selectColumnNames(
         {
             virt_column_names.push_back(name);
         }
+        else if (name == BlockNumberColumn::name)
+        {
+            virt_column_names.push_back(name);
+        }
         else if (name == "_part_uuid")
         {
             virt_column_names.push_back(name);
diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
index 95fcde23f8e..9b480ac27a0 100644
--- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
@@ -9,6 +9,7 @@
 #include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypeArray.h>
 #include <Processors/Transforms/AggregatingTransform.h>
+#include <Storages/BlockNumberColumn.h>
 #include <city.h>
 
 namespace DB
@@ -24,7 +25,8 @@ namespace ErrorCodes
 static void injectNonConstVirtualColumns(
     size_t rows,
     Block & block,
-    const Names & virtual_columns);
+    const Names & virtual_columns,
+    MergeTreeReadTask * task = nullptr);
 
 static void injectPartConstVirtualColumns(
     size_t rows,
@@ -247,7 +249,8 @@ namespace
 static void injectNonConstVirtualColumns(
     size_t rows,
     Block & block,
-    const Names & virtual_columns)
+    const Names & virtual_columns,
+    MergeTreeReadTask * task)
 {
     VirtualColumnsInserter inserter(block);
     for (const auto & virtual_column_name : virtual_columns)
@@ -278,6 +281,24 @@ static void injectNonConstVirtualColumns(
 
                 inserter.insertUInt8Column(column, virtual_column_name);
         }
+
+        if (virtual_column_name == BlockNumberColumn::name)
+        {
+            ColumnPtr column;
+            if (rows)
+            {
+                size_t value = 0;
+                if (task)
+                {
+                    value = task->getInfo().data_part ? task->getInfo().data_part->info.min_block : 0;
+                }
+                column = BlockNumberColumn::type->createColumnConst(rows, value)->convertToFullColumnIfConst();
+            }
+            else
+                column = BlockNumberColumn::type->createColumn();
+
+            inserter.insertUInt64Column(column, virtual_column_name);
+        }
     }
 }
 
@@ -368,7 +389,7 @@ void MergeTreeSelectProcessor::injectVirtualColumns(
 {
     /// First add non-const columns that are filled by the range reader and then const columns that we will fill ourselves.
     /// Note that the order is important: virtual columns filled by the range reader must go first
-    injectNonConstVirtualColumns(row_count, block, virtual_columns);
+    injectNonConstVirtualColumns(row_count, block, virtual_columns,task);
     injectPartConstVirtualColumns(row_count, block, task, partition_value_type, virtual_columns);
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 8a9faa5cee4..6ba0d4dcfad 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -176,7 +176,7 @@ try
             current_mark += (rows_to_read == rows_read);
 
             bool should_evaluate_missing_defaults = false;
-            reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read);
+            reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read, data_part->info.min_block);
 
             if (should_evaluate_missing_defaults)
             {
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 4f36da048c2..52c3f3efc6d 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -171,7 +171,8 @@ struct Settings;
     M(UInt64, part_moves_between_shards_delay_seconds, 30, "Time to wait before/after moving parts between shards.", 0) \
     M(Bool, allow_remote_fs_zero_copy_replication, false, "Don't use this setting in production, because it is not ready.", 0) \
     M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \
-    M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \
+    M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0)                                                                                                                                       \
+    M(Bool, allow_experimental_block_number_column, false, "Enable persisting column _block_number for each row.", 0) \
     \
     /** Compress marks and primary key. */ \
     M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index f869a1158ef..4baa43311d4 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -102,6 +102,8 @@
 #include <IO/Operators.h>
 #include <IO/ConnectionTimeouts.h>
 
+#include <Storages/BlockNumberColumn.h>
+
 #include <memory>
 #include <filesystem>
 #include <optional>
@@ -298,6 +300,7 @@ NamesAndTypesList StorageDistributed::getVirtuals() const
         NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
         NameAndTypePair("_part_offset", std::make_shared<DataTypeUInt64>()),
         NameAndTypePair("_row_exists", std::make_shared<DataTypeUInt8>()),
+        NameAndTypePair(BlockNumberColumn::name, BlockNumberColumn::type),
         NameAndTypePair("_shard_num", std::make_shared<DataTypeUInt32>()), /// deprecated
     };
 }
diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp
index ab795a2d04c..cf76f7a16ba 100644
--- a/src/Storages/StorageLog.cpp
+++ b/src/Storages/StorageLog.cpp
@@ -33,6 +33,7 @@
 #include <Backups/IBackup.h>
 #include <Backups/RestorerFromBackup.h>
 #include <Disks/TemporaryFileOnDisk.h>
+#include <Storages/BlockNumberColumn.h>
 
 #include <cassert>
 #include <chrono>
@@ -45,6 +46,8 @@
 namespace DB
 {
 
+    CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
+
 namespace ErrorCodes
 {
     extern const int TIMEOUT_EXCEEDED;
@@ -452,10 +455,15 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c
             const auto & data_file = *data_file_it->second;
             const auto & columns = metadata_snapshot->getColumns();
 
+            CompressionCodecPtr compression;
+            if (name_and_type.name == BlockNumberColumn::name)
+                compression = BlockNumberColumn::compression_codec;
+            else
+                compression = columns.getCodecOrDefault(name_and_type.name);
+
             it = streams.try_emplace(data_file.name, storage.disk, data_file.path,
                                      storage.file_checker.getFileSize(data_file.path),
-                                     columns.getCodecOrDefault(name_and_type.name),
-                                     storage.max_compress_block_size).first;
+                                     compression, storage.max_compress_block_size).first;
         }
 
         auto & stream = it->second;
diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp
index c0e85900794..0c19634f50c 100644
--- a/src/Storages/StorageSnapshot.cpp
+++ b/src/Storages/StorageSnapshot.cpp
@@ -1,5 +1,6 @@
 #include <Storages/StorageSnapshot.h>
 #include <Storages/LightweightDeleteDescription.h>
+#include <Storages/BlockNumberColumn.h>
 #include <Storages/IStorage.h>
 #include <DataTypes/ObjectUtils.h>
 #include <DataTypes/NestedUtils.h>
@@ -24,6 +25,7 @@ void StorageSnapshot::init()
 
     if (storage.hasLightweightDeletedMask())
         system_columns[LightweightDeleteDescription::FILTER_COLUMN.name] = LightweightDeleteDescription::FILTER_COLUMN.type;
+    system_columns[BlockNumberColumn::name] = BlockNumberColumn::type;
 }
 
 NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options) const
diff --git a/tests/queries/0_stateless/02668_column_block_number.reference b/tests/queries/0_stateless/02668_column_block_number.reference
new file mode 100644
index 00000000000..f08220243ff
--- /dev/null
+++ b/tests/queries/0_stateless/02668_column_block_number.reference
@@ -0,0 +1,41 @@
+*** BEFORE MUTATION BEFORE MERGE ***
+1	1	1	all_1_1_0
+2	2	1	all_1_1_0
+3	3	1	all_1_1_0
+4	4	2	all_2_2_0
+5	5	2	all_2_2_0
+6	6	2	all_2_2_0
+*** AFTER MUTATION BEFORE MERGE ***
+1	0	1	all_1_1_0_3
+2	0	1	all_1_1_0_3
+3	0	1	all_1_1_0_3
+4	4	2	all_2_2_0_3
+5	5	2	all_2_2_0_3
+6	6	2	all_2_2_0_3
+*** AFTER MUTATION AFTER MERGE ***
+1	0	1	all_1_2_1_3
+2	0	1	all_1_2_1_3
+3	0	1	all_1_2_1_3
+4	4	2	all_1_2_1_3
+5	5	2	all_1_2_1_3
+6	6	2	all_1_2_1_3
+*** AFTER MUTATION AFTER MERGE , NEW BLOCK ***
+1	0	1	all_1_2_1_3
+2	0	1	all_1_2_1_3
+3	0	1	all_1_2_1_3
+4	4	2	all_1_2_1_3
+5	5	2	all_1_2_1_3
+6	6	2	all_1_2_1_3
+7	7	4	all_4_4_0
+8	8	4	all_4_4_0
+9	9	4	all_4_4_0
+*** AFTER MUTATION AFTER MERGE , NEW BLOCK MERGED ***
+1	0	1	all_1_4_2_3
+2	0	1	all_1_4_2_3
+3	0	1	all_1_4_2_3
+4	4	2	all_1_4_2_3
+5	5	2	all_1_4_2_3
+6	6	2	all_1_4_2_3
+7	7	4	all_1_4_2_3
+8	8	4	all_1_4_2_3
+9	9	4	all_1_4_2_3
diff --git a/tests/queries/0_stateless/02668_column_block_number.sql b/tests/queries/0_stateless/02668_column_block_number.sql
new file mode 100644
index 00000000000..4d4d54bba5f
--- /dev/null
+++ b/tests/queries/0_stateless/02668_column_block_number.sql
@@ -0,0 +1,32 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (id UInt32, a UInt32) ENGINE = MergeTree ORDER BY id SETTINGS allow_experimental_block_number_column = true;
+
+INSERT INTO test(id,a) VALUES (1,1),(2,2),(3,3);
+INSERT INTO test(id,a) VALUES (4,4),(5,5),(6,6);
+
+SELECT '*** BEFORE MUTATION BEFORE MERGE ***';
+SELECT id,a,_block_number,_part from test ORDER BY id;
+
+set mutations_sync=1;
+ALTER TABLE test UPDATE a=0 WHERE id<4;
+
+SELECT '*** AFTER MUTATION BEFORE MERGE ***';
+SELECT id,a,_block_number,_part from test ORDER BY id;
+
+OPTIMIZE TABLE test FINAL;
+
+SELECT '*** AFTER MUTATION AFTER MERGE ***';
+SELECT *,_block_number,_part from test ORDER BY id;
+
+INSERT INTO test(id,a) VALUES (7,7),(8,8),(9,9);
+
+SELECT '*** AFTER MUTATION AFTER MERGE , NEW BLOCK ***';
+SELECT *,_block_number,_part from test ORDER BY id;
+
+OPTIMIZE TABLE test FINAL;
+
+SELECT '*** AFTER MUTATION AFTER MERGE , NEW BLOCK MERGED ***';
+SELECT *,_block_number,_part from test ORDER BY id;
+
+DROP TABLE test;
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02668_column_block_number_vertical_merge.reference b/tests/queries/0_stateless/02668_column_block_number_vertical_merge.reference
new file mode 100644
index 00000000000..f08220243ff
--- /dev/null
+++ b/tests/queries/0_stateless/02668_column_block_number_vertical_merge.reference
@@ -0,0 +1,41 @@
+*** BEFORE MUTATION BEFORE MERGE ***
+1	1	1	all_1_1_0
+2	2	1	all_1_1_0
+3	3	1	all_1_1_0
+4	4	2	all_2_2_0
+5	5	2	all_2_2_0
+6	6	2	all_2_2_0
+*** AFTER MUTATION BEFORE MERGE ***
+1	0	1	all_1_1_0_3
+2	0	1	all_1_1_0_3
+3	0	1	all_1_1_0_3
+4	4	2	all_2_2_0_3
+5	5	2	all_2_2_0_3
+6	6	2	all_2_2_0_3
+*** AFTER MUTATION AFTER MERGE ***
+1	0	1	all_1_2_1_3
+2	0	1	all_1_2_1_3
+3	0	1	all_1_2_1_3
+4	4	2	all_1_2_1_3
+5	5	2	all_1_2_1_3
+6	6	2	all_1_2_1_3
+*** AFTER MUTATION AFTER MERGE , NEW BLOCK ***
+1	0	1	all_1_2_1_3
+2	0	1	all_1_2_1_3
+3	0	1	all_1_2_1_3
+4	4	2	all_1_2_1_3
+5	5	2	all_1_2_1_3
+6	6	2	all_1_2_1_3
+7	7	4	all_4_4_0
+8	8	4	all_4_4_0
+9	9	4	all_4_4_0
+*** AFTER MUTATION AFTER MERGE , NEW BLOCK MERGED ***
+1	0	1	all_1_4_2_3
+2	0	1	all_1_4_2_3
+3	0	1	all_1_4_2_3
+4	4	2	all_1_4_2_3
+5	5	2	all_1_4_2_3
+6	6	2	all_1_4_2_3
+7	7	4	all_1_4_2_3
+8	8	4	all_1_4_2_3
+9	9	4	all_1_4_2_3
diff --git a/tests/queries/0_stateless/02668_column_block_number_vertical_merge.sql b/tests/queries/0_stateless/02668_column_block_number_vertical_merge.sql
new file mode 100644
index 00000000000..a32060efae1
--- /dev/null
+++ b/tests/queries/0_stateless/02668_column_block_number_vertical_merge.sql
@@ -0,0 +1,36 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (id UInt32, a UInt32) ENGINE = MergeTree ORDER BY id SETTINGS allow_experimental_block_number_column = true,
+    vertical_merge_algorithm_min_rows_to_activate = 1,
+    vertical_merge_algorithm_min_columns_to_activate = 0,
+    min_rows_for_wide_part = 1,
+    min_bytes_for_wide_part = 1;
+
+INSERT INTO test(id,a) VALUES (1,1),(2,2),(3,3);
+INSERT INTO test(id,a) VALUES (4,4),(5,5),(6,6);
+
+SELECT '*** BEFORE MUTATION BEFORE MERGE ***';
+SELECT id,a,_block_number,_part from test ORDER BY id;
+
+set mutations_sync=1;
+ALTER TABLE test UPDATE a=0 WHERE id<4;
+
+SELECT '*** AFTER MUTATION BEFORE MERGE ***';
+SELECT id,a,_block_number,_part from test ORDER BY id;
+
+OPTIMIZE TABLE test FINAL;
+
+SELECT '*** AFTER MUTATION AFTER MERGE ***';
+SELECT *,_block_number,_part from test ORDER BY id;
+
+INSERT INTO test(id,a) VALUES (7,7),(8,8),(9,9);
+
+SELECT '*** AFTER MUTATION AFTER MERGE , NEW BLOCK ***';
+SELECT *,_block_number,_part from test ORDER BY id;
+
+OPTIMIZE TABLE test FINAL;
+
+SELECT '*** AFTER MUTATION AFTER MERGE , NEW BLOCK MERGED ***';
+SELECT *,_block_number,_part from test ORDER BY id;
+
+DROP TABLE test;
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02668_column_block_number_with_projections.reference b/tests/queries/0_stateless/02668_column_block_number_with_projections.reference
new file mode 100644
index 00000000000..289a21035b5
--- /dev/null
+++ b/tests/queries/0_stateless/02668_column_block_number_with_projections.reference
@@ -0,0 +1,19 @@
+0
+1
+2
+3
+*** AFTER FIRST OPTIMIZE ***
+0	1
+1	2
+1	2
+2	3
+3	3
+*** AFTER SECOND OPTIMIZE ***
+0	1
+1	2
+1	2
+2	3
+3	3
+4	4
+5	4
+6	4
diff --git a/tests/queries/0_stateless/02668_column_block_number_with_projections.sql b/tests/queries/0_stateless/02668_column_block_number_with_projections.sql
new file mode 100644
index 00000000000..25ec411967b
--- /dev/null
+++ b/tests/queries/0_stateless/02668_column_block_number_with_projections.sql
@@ -0,0 +1,18 @@
+DROP TABLE IF EXISTS t;
+CREATE TABLE t (x UInt8, PROJECTION p (SELECT x GROUP BY x)) ENGINE = MergeTree ORDER BY () SETTINGS allow_experimental_block_number_column=true;
+INSERT INTO t VALUES (0);
+INSERT INTO t VALUES (1),(1);
+INSERT INTO t VALUES (2),(3);
+
+SELECT x FROM t GROUP BY x;
+OPTIMIZE TABLE t FINAL;
+
+SELECT '*** AFTER FIRST OPTIMIZE ***';
+SELECT x,_block_number FROM t;
+
+INSERT INTO t VALUES (4), (5), (6);
+OPTIMIZE TABLE t FINAL;
+SELECT '*** AFTER SECOND OPTIMIZE ***';
+SELECT x,_block_number FROM t;
+
+DROP TABLE t;
\ No newline at end of file

From 1200e03f207e17d175fb53c10846bf80ac9c707d Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 20 Sep 2023 18:51:31 +0800
Subject: [PATCH 158/243] correctly implement swap

---
 src/Common/isLocalAddress.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index e9fefb0c319..ce0316db2c4 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -38,8 +38,9 @@ struct NetworkInterfaces : public boost::noncopyable
 
     void swap(NetworkInterfaces && other)
     {
+        auto * tmp = ifaddr;
         ifaddr = other.ifaddr;
-        other.ifaddr = nullptr;
+        other.ifaddr = tmp;
     }
 
     bool hasAddress(const Poco::Net::IPAddress & address) const

From 9c3fb641068ddcd538b925e16361e73e7f32a81e Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 20 Sep 2023 13:32:07 +0200
Subject: [PATCH 159/243] Check type of the filter expressions while filtering
 by virtual columns

This should fix filtering by virtual columns with non-uint8 types, i.e.
queries like:

    SELECT * FROM data WHERE 1.0

Fixes: 02346_full_text_search
Fixes: 00990_hasToken_and_tokenbf
v2: move check out from is_const to filterBlockWithQuery(), since in is_const there is no way to validate sets.
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/VirtualColumnUtils.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index dbb424ee957..9ded5256a72 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -1,5 +1,6 @@
 #include <memory>
 #include <Core/NamesAndTypes.h>
+#include <Core/TypeId.h>
 
 #include <Interpreters/Context.h>
 #include <Interpreters/TreeRewriter.h>
@@ -288,7 +289,9 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, ContextPtr contex
 
     /// Filter the block.
     String filter_column_name = expression_ast->getColumnName();
-    ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullColumnIfConst();
+    ColumnPtr filter_column = block_with_filter.getByName(filter_column_name).column->convertToFullIfNeeded();
+    if (filter_column->getDataType() != TypeIndex::UInt8)
+        return;
 
     ConstantFilterDescription constant_filter(*filter_column);
 

From 2805ebf2b259bf99382ebdc537139ff8e6a3973a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 20 Sep 2023 11:08:02 +0000
Subject: [PATCH 160/243] Set correct size for signal pipe buffer

---
 src/Daemon/BaseDaemon.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index be323dc6786..8e01311dcb0 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -104,7 +104,8 @@ static const size_t signal_pipe_buf_size =
     + sizeof(ucontext_t*)
     + sizeof(StackTrace)
     + sizeof(UInt32)
-    + sizeof(void*);
+    + sizeof(void*)
+    + sizeof(UInt64);
 
 using signal_function = void(int, siginfo_t*, void*);
 

From 06513f60ebf548f69f47c4dde649ff30bea6b971 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Wed, 20 Sep 2023 15:16:39 +0200
Subject: [PATCH 161/243] Revert "refine error code of duplicated index in
 create query"

---
 src/Interpreters/InterpreterCreateQuery.cpp            |  4 ----
 .../0_stateless/02884_duplicate_index_name.reference   |  0
 .../queries/0_stateless/02884_duplicate_index_name.sql | 10 ----------
 3 files changed, 14 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02884_duplicate_index_name.reference
 delete mode 100644 tests/queries/0_stateless/02884_duplicate_index_name.sql

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 3654f307eb9..a0635f18214 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -96,7 +96,6 @@ namespace ErrorCodes
     extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY;
     extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE;
     extern const int ILLEGAL_COLUMN;
-    extern const int ILLEGAL_INDEX;
     extern const int LOGICAL_ERROR;
     extern const int UNKNOWN_DATABASE;
     extern const int PATH_ACCESS_DENIED;
@@ -699,8 +698,6 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
             for (const auto & index : create.columns_list->indices->children)
             {
                 IndexDescription index_desc = IndexDescription::getIndexFromAST(index->clone(), properties.columns, getContext());
-                if (properties.indices.has(index_desc.name))
-                    throw Exception(ErrorCodes::ILLEGAL_INDEX, "Duplicated index name {}", backQuoteIfNeed(index_desc.name));
                 const auto & settings = getContext()->getSettingsRef();
                 if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index)
                 {
@@ -715,7 +712,6 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
 
                 properties.indices.push_back(index_desc);
             }
-
         if (create.columns_list->projections)
             for (const auto & projection_ast : create.columns_list->projections->children)
             {
diff --git a/tests/queries/0_stateless/02884_duplicate_index_name.reference b/tests/queries/0_stateless/02884_duplicate_index_name.reference
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/queries/0_stateless/02884_duplicate_index_name.sql b/tests/queries/0_stateless/02884_duplicate_index_name.sql
deleted file mode 100644
index 4cd9ae6d2a2..00000000000
--- a/tests/queries/0_stateless/02884_duplicate_index_name.sql
+++ /dev/null
@@ -1,10 +0,0 @@
-DROP TABLE IF EXISTS test_dup_index;
-
-CREATE TABLE test_dup_index
-(
-	a Int64,
-	b Int64,
-	INDEX idx_a a TYPE minmax,
-	INDEX idx_a b TYPE minmax
-) Engine = MergeTree()
-ORDER BY a; -- { serverError ILLEGAL_INDEX }

From c75f7c843456fe184f1d7d5e40e77d27123a441b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 20 Sep 2023 13:27:11 +0000
Subject: [PATCH 162/243] Correct merge result

---
 src/Common/ErrorCodes.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 95614c7e9ce..be2b0a7bd5e 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -585,8 +585,9 @@
     M(700, USER_SESSION_LIMIT_EXCEEDED)  \
     M(701, CLUSTER_DOESNT_EXIST) \
     M(702, CLIENT_INFO_DOES_NOT_MATCH) \
-    M(703, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \
-    M(704, INVALID_IDENTIFIER) \
+    M(703, INVALID_IDENTIFIER) \
+    M(704, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \
+    \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
     M(1001, STD_EXCEPTION) \

From 1a9467535066f54dc0dcf0f8c0e75dcd6fb9509b Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 20 Sep 2023 13:32:01 +0000
Subject: [PATCH 163/243] Don't capture this in callback

---
 src/Server/KeeperTCPHandler.cpp | 2 +-
 src/Server/KeeperTCPHandler.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp
index 58d227a5ae5..84ed7388503 100644
--- a/src/Server/KeeperTCPHandler.cpp
+++ b/src/Server/KeeperTCPHandler.cpp
@@ -382,7 +382,7 @@ void KeeperTCPHandler::runImpl()
     }
 
     auto response_fd = poll_wrapper->getResponseFD();
-    auto response_callback = [this, response_fd] (const Coordination::ZooKeeperResponsePtr & response)
+    auto response_callback = [responses = this->responses, response_fd](const Coordination::ZooKeeperResponsePtr & response)
     {
         if (!responses->push(response))
             throw Exception(ErrorCodes::SYSTEM_ERROR,
diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h
index ffdd50b805a..588cdf6305e 100644
--- a/src/Server/KeeperTCPHandler.h
+++ b/src/Server/KeeperTCPHandler.h
@@ -25,7 +25,7 @@ struct SocketInterruptablePollWrapper;
 using SocketInterruptablePollWrapperPtr = std::unique_ptr<SocketInterruptablePollWrapper>;
 
 using ThreadSafeResponseQueue = ConcurrentBoundedQueue<Coordination::ZooKeeperResponsePtr>;
-using ThreadSafeResponseQueuePtr = std::unique_ptr<ThreadSafeResponseQueue>;
+using ThreadSafeResponseQueuePtr = std::shared_ptr<ThreadSafeResponseQueue>;
 
 struct LastOp;
 using LastOpMultiVersion = MultiVersion<LastOp>;

From 494227de7126044db7bbbd0829ab8f7f92a4e3fd Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 20 Sep 2023 15:36:11 +0200
Subject: [PATCH 164/243] Fix tests

---
 .../01161_information_schema.reference        |  62 +++++-----
 .../0_stateless/01161_information_schema.sql  | 113 ++----------------
 ...information_schema_show_database.reference |  10 +-
 3 files changed, 44 insertions(+), 141 deletions(-)

diff --git a/tests/queries/0_stateless/01161_information_schema.reference b/tests/queries/0_stateless/01161_information_schema.reference
index 3cfec968d0c..8139d327e31 100644
--- a/tests/queries/0_stateless/01161_information_schema.reference
+++ b/tests/queries/0_stateless/01161_information_schema.reference
@@ -22,36 +22,36 @@ referential_constraints
 schemata
 tables
 views
-INFORMATION_SCHEMA	INFORMATION_SCHEMA	default	\N	\N	\N	\N
-information_schema	information_schema	default	\N	\N	\N	\N
-default	default	kcu	BASE TABLE	utf8mb4_0900_ai_ci	
-default	default	kcu2	BASE TABLE	utf8mb4_0900_ai_ci	
-default	default	mv	VIEW	utf8mb4_0900_ai_ci	
-default	default	t	FOREIGN TABLE	utf8mb4_0900_ai_ci	
-default	default	v	VIEW	utf8mb4_0900_ai_ci	
-		tmp	LOCAL TEMPORARY	utf8mb4_0900_ai_ci	
-default	default	mv	SELECT * FROM system.one	NONE	NO	YES	NO	NO	NO
-default	default	v	SELECT n, f FROM default.t	NONE	NO	NO	NO	NO	NO
-default	default	kcu	i	1		0	UInt32	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt32
-default	default	kcu	s	2		0	String	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		String
-default	default	kcu2	i	1		0	UInt32	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt32
-default	default	kcu2	d	2		0	Date	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		Date
-default	default	kcu2	u	3		0	UUID	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UUID
-default	default	mv	dummy	1		0	UInt8	\N	\N	8	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt8
-default	default	t	n	1		0	UInt64	\N	\N	64	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt64
-default	default	t	f	2		0	Float32	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Float32
-default	default	t	s	3		0	String	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		String
-default	default	t	fs	4		0	FixedString(42)	42	42	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		FixedString(42)
-default	default	t	d	5		0	Decimal(9, 6)	\N	\N	9	10	6	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Decimal(9, 6)
-default	default	v	n	1		1	Nullable(Int32)	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Nullable(Int32)
-default	default	v	f	2		0	Float64	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Float64
-		tmp	d	1		0	Date	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		Date
-		tmp	dt	2		0	DateTime	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		DateTime
-		tmp	dtms	3		0	DateTime64(3)	\N	\N	\N	\N	\N	3	\N	\N	\N	\N	\N	\N	\N	\N	\N		DateTime64(3)
+INFORMATION_SCHEMA	INFORMATION_SCHEMA	default	\N	\N	\N	\N	INFORMATION_SCHEMA	INFORMATION_SCHEMA	default	\N	\N	\N	\N
+information_schema	information_schema	default	\N	\N	\N	\N	information_schema	information_schema	default	\N	\N	\N	\N
+default	default	kcu	BASE TABLE	utf8mb4_0900_ai_ci		default	default	kcu	BASE TABLE	utf8mb4_0900_ai_ci	
+default	default	kcu2	BASE TABLE	utf8mb4_0900_ai_ci		default	default	kcu2	BASE TABLE	utf8mb4_0900_ai_ci	
+default	default	mv	VIEW	utf8mb4_0900_ai_ci		default	default	mv	VIEW	utf8mb4_0900_ai_ci	
+default	default	t	FOREIGN TABLE	utf8mb4_0900_ai_ci		default	default	t	FOREIGN TABLE	utf8mb4_0900_ai_ci	
+default	default	v	VIEW	utf8mb4_0900_ai_ci		default	default	v	VIEW	utf8mb4_0900_ai_ci	
+		tmp	LOCAL TEMPORARY	utf8mb4_0900_ai_ci				tmp	LOCAL TEMPORARY	utf8mb4_0900_ai_ci	
+default	default	mv	SELECT * FROM system.one	NONE	NO	YES	NO	NO	NO	default	default	mv	SELECT * FROM system.one	NONE	NO	YES	NO	NO	NO
+default	default	v	SELECT n, f FROM default.t	NONE	NO	NO	NO	NO	NO	default	default	v	SELECT n, f FROM default.t	NONE	NO	NO	NO	NO	NO
+default	default	kcu	i	1		0	UInt32	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt32	default	default	kcu	i	1		0	UInt32	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt32
+default	default	kcu	s	2		0	String	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		String	default	default	kcu	s	2		0	String	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		String
+default	default	kcu2	i	1		0	UInt32	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt32	default	default	kcu2	i	1		0	UInt32	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt32
+default	default	kcu2	d	2		0	Date	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		Date	default	default	kcu2	d	2		0	Date	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		Date
+default	default	kcu2	u	3		0	UUID	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UUID	default	default	kcu2	u	3		0	UUID	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UUID
+default	default	mv	dummy	1		0	UInt8	\N	\N	8	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt8	default	default	mv	dummy	1		0	UInt8	\N	\N	8	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt8
+default	default	t	n	1		0	UInt64	\N	\N	64	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt64	default	default	t	n	1		0	UInt64	\N	\N	64	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		UInt64
+default	default	t	f	2		0	Float32	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Float32	default	default	t	f	2		0	Float32	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Float32
+default	default	t	s	3		0	String	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		String	default	default	t	s	3		0	String	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		String
+default	default	t	fs	4		0	FixedString(42)	42	42	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		FixedString(42)	default	default	t	fs	4		0	FixedString(42)	42	42	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		FixedString(42)
+default	default	t	d	5		0	Decimal(9, 6)	\N	\N	9	10	6	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Decimal(9, 6)	default	default	t	d	5		0	Decimal(9, 6)	\N	\N	9	10	6	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Decimal(9, 6)
+default	default	v	n	1		1	Nullable(Int32)	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Nullable(Int32)	default	default	v	n	1		1	Nullable(Int32)	\N	\N	32	2	0	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Nullable(Int32)
+default	default	v	f	2		0	Float64	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Float64	default	default	v	f	2		0	Float64	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		Float64
+		tmp	d	1		0	Date	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		Date			tmp	d	1		0	Date	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		Date
+		tmp	dt	2		0	DateTime	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		DateTime			tmp	dt	2		0	DateTime	\N	\N	\N	\N	\N	0	\N	\N	\N	\N	\N	\N	\N	\N	\N		DateTime
+		tmp	dtms	3		0	DateTime64(3)	\N	\N	\N	\N	\N	3	\N	\N	\N	\N	\N	\N	\N	\N	\N		DateTime64(3)			tmp	dtms	3		0	DateTime64(3)	\N	\N	\N	\N	\N	3	\N	\N	\N	\N	\N	\N	\N	\N	\N		DateTime64(3)
 1
 1
-def	default	PRIMARY	def	default	kcu	i	1	\N	\N	\N	\N
-def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N
-def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N
-def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N
-def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N
+def	default	PRIMARY	def	default	kcu	i	1	\N	\N	\N	\N	def	default	PRIMARY	def	default	kcu	i	1	\N	\N	\N	\N
+def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N	def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N
+def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N	def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N
+def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N	def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N
+def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N	def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N
diff --git a/tests/queries/0_stateless/01161_information_schema.sql b/tests/queries/0_stateless/01161_information_schema.sql
index d2b7af9ed29..2ccdddc7a9e 100644
--- a/tests/queries/0_stateless/01161_information_schema.sql
+++ b/tests/queries/0_stateless/01161_information_schema.sql
@@ -16,71 +16,12 @@ CREATE TABLE kcu (i UInt32, s String) ENGINE MergeTree ORDER BY i;
 CREATE TABLE kcu2 (i UInt32, d Date, u UUID) ENGINE MergeTree ORDER BY (u, d);
 
 -- FIXME #28687
-SELECT catalog_name,
-       schema_name,
-       schema_owner,
-       default_character_set_catalog,
-       default_character_set_schema,
-       default_character_set_name,
-       sql_path
-FROM information_schema.schemata
-WHERE schema_name ilike 'information_schema';
-
+SELECT * FROM information_schema.schemata WHERE schema_name ilike 'information_schema';
 -- SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%';
-
-SELECT table_catalog, 
-       table_schema, 
-       table_name, 
-       table_type, 
-       table_collation, 
-       table_comment
-FROM INFORMATION_SCHEMA.TABLES
-WHERE (table_schema = currentDatabase() OR table_schema = '')
-  AND table_name NOT LIKE '%inner%';
-
-SELECT table_catalog,
-       table_schema,
-       table_name,
-       view_definition,
-       check_option,
-       is_updatable,
-       is_insertable_into,
-       is_trigger_updatable,
-       is_trigger_deletable,
-       is_trigger_insertable_into
-FROM information_schema.views
-WHERE table_schema = currentDatabase();
-
--- SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%';
-
-SELECT table_catalog,
-       table_schema,
-       table_name,
-       column_name,
-       ordinal_position,
-       column_default,
-       is_nullable,
-       data_type,
-       character_maximum_length,
-       character_octet_length,
-       numeric_precision,
-       numeric_precision_radix,
-       numeric_scale,
-       datetime_precision,
-       character_set_catalog,
-       character_set_schema,
-       character_set_name,
-       collation_catalog,
-       collation_schema,
-       collation_name,
-       domain_catalog,
-       domain_schema,
-       domain_name,
-       column_comment,
-       column_type
-FROM INFORMATION_SCHEMA.COLUMNS
-WHERE (table_schema = currentDatabase() OR table_schema = '')
-  AND table_name NOT LIKE '%inner%';
+SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema = currentDatabase() OR table_schema = '') AND table_name NOT LIKE '%inner%';
+SELECT * FROM information_schema.views WHERE table_schema = currentDatabase();
+-- SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%'
+SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema = currentDatabase() OR table_schema = '') AND table_name NOT LIKE '%inner%';
 
 -- mixed upper/lowercase schema and table name:
 SELECT count() FROM information_schema.TABLES WHERE table_schema=currentDatabase() AND table_name = 't';
@@ -88,48 +29,10 @@ SELECT count() FROM INFORMATION_SCHEMA.tables WHERE table_schema=currentDatabase
 SELECT count() FROM INFORMATION_schema.tables WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_DATABASE }
 SELECT count() FROM information_schema.taBLES WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_TABLE }
 
-SELECT constraint_catalog,
-       constraint_schema,
-       constraint_name,
-       table_catalog,
-       table_schema,
-       table_name,
-       column_name,
-       ordinal_position,
-       position_in_unique_constraint,
-       referenced_table_schema,
-       referenced_table_name,
-       referenced_column_name
-FROM information_schema.key_column_usage
-WHERE table_name = 'kcu';
+SELECT * FROM information_schema.key_column_usage WHERE table_name = 'kcu';
+SELECT * FROM information_schema.key_column_usage WHERE table_name = 'kcu2';
 
-SELECT constraint_catalog,
-       constraint_schema,
-       constraint_name,
-       table_catalog,
-       table_schema,
-       table_name,
-       column_name,
-       ordinal_position,
-       position_in_unique_constraint,
-       referenced_table_schema,
-       referenced_table_name,
-       referenced_column_name
-FROM information_schema.key_column_usage
-WHERE table_name = 'kcu2';
-
-SELECT constraint_catalog,
-       constraint_name,
-       constraint_schema,
-       unique_constraint_catalog,
-       unique_constraint_name,
-       unique_constraint_schema,
-       match_option,
-       update_rule,
-       delete_rule,
-       table_name,
-       referenced_table_name
-FROM information_schema.referential_constraints;
+SELECT * FROM information_schema.referential_constraints;
 
 drop view mv;
 drop view v;
diff --git a/tests/queries/0_stateless/02206_information_schema_show_database.reference b/tests/queries/0_stateless/02206_information_schema_show_database.reference
index 0cf7913e28e..b0eb08e00f5 100644
--- a/tests/queries/0_stateless/02206_information_schema_show_database.reference
+++ b/tests/queries/0_stateless/02206_information_schema_show_database.reference
@@ -1,6 +1,6 @@
 CREATE DATABASE INFORMATION_SCHEMA\nENGINE = Memory
-CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n    `table_catalog` String,\n    `table_schema` String,\n    `table_name` String,\n    `TABLE_SCHEMA` String,\n    `TABLE_NAME` String,\n    `column_name` String,\n    `ordinal_position` UInt64,\n    `column_default` String,\n    `is_nullable` String,\n    `data_type` String,\n    `character_maximum_length` Nullable(UInt64),\n    `character_octet_length` Nullable(UInt64),\n    `numeric_precision` Nullable(UInt64),\n    `numeric_precision_radix` Nullable(UInt64),\n    `numeric_scale` Nullable(UInt64),\n    `datetime_precision` Nullable(UInt64),\n    `character_set_catalog` Nullable(String),\n    `character_set_schema` Nullable(String),\n    `character_set_name` Nullable(String),\n    `collation_catalog` Nullable(String),\n    `collation_schema` Nullable(String),\n    `collation_name` Nullable(String),\n    `domain_catalog` Nullable(String),\n    `domain_schema` Nullable(String),\n    `domain_name` Nullable(String),\n    `column_comment` String,\n    `column_type` String,\n    `TABLE_CATALOG` String ALIAS table_catalog,\n    `COLUMN_NAME` String ALIAS column_name,\n    `ORDINAL_POSITION` UInt64 ALIAS ordinal_position,\n    `COLUMN_DEFAULT` String ALIAS column_default,\n    `IS_NULLABLE` String ALIAS is_nullable,\n    `DATA_TYPE` String ALIAS data_type,\n    `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64) ALIAS character_maximum_length,\n    `CHARACTER_OCTET_LENGTH` Nullable(UInt64) ALIAS character_octet_length,\n    `NUMERIC_PRECISION` Nullable(UInt64) ALIAS numeric_precision,\n    `NUMERIC_PRECISION_RADIX` Nullable(UInt64) ALIAS numeric_precision_radix,\n    `NUMERIC_SCALE` Nullable(UInt64) ALIAS numeric_scale,\n    `DATETIME_PRECISION` Nullable(UInt64) ALIAS datetime_precision,\n    `CHARACTER_SET_CATALOG` Nullable(String) ALIAS character_set_catalog,\n    `CHARACTER_SET_SCHEMA` Nullable(String) ALIAS character_set_schema,\n    `CHARACTER_SET_NAME` Nullable(String) ALIAS character_set_name,\n    `COLLATION_CATALOG` Nullable(String) ALIAS collation_catalog,\n    `COLLATION_SCHEMA` Nullable(String) ALIAS collation_schema,\n    `COLLATION_NAME` Nullable(String) ALIAS collation_name,\n    `DOMAIN_CATALOG` Nullable(String) ALIAS domain_catalog,\n    `DOMAIN_SCHEMA` Nullable(String) ALIAS domain_schema,\n    `DOMAIN_NAME` Nullable(String) ALIAS domain_name,\n    `COLUMN_COMMENT` String ALIAS column_comment,\n    `COLUMN_TYPE` String ALIAS column_type\n) AS\nSELECT\n    database AS table_catalog,\n    database AS table_schema,\n    database AS TABLE_SCHEMA,\n    table AS table_name,\n    table AS TABLE_NAME,\n    name AS column_name,\n    position AS ordinal_position,\n    default_expression AS column_default,\n    type LIKE \'Nullable(%)\' AS is_nullable,\n    type AS data_type,\n    character_octet_length AS character_maximum_length,\n    character_octet_length,\n    numeric_precision,\n    numeric_precision_radix,\n    numeric_scale,\n    datetime_precision,\n    NULL AS character_set_catalog,\n    NULL AS character_set_schema,\n    NULL AS character_set_name,\n    NULL AS collation_catalog,\n    NULL AS collation_schema,\n    NULL AS collation_name,\n    NULL AS domain_catalog,\n    NULL AS domain_schema,\n    NULL AS domain_name,\n    comment AS column_comment,\n    type AS column_type\nFROM system.columns
-CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables
-CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables
-CREATE VIEW information_schema.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables
-CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5), `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, `TABLE_TYPE` Enum8(\'BASE TABLE\' = 1, \'VIEW\' = 2, \'FOREIGN TABLE\' = 3, \'LOCAL TEMPORARY\' = 4, \'SYSTEM VIEW\' = 5) ALIAS table_type) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, 4, engine LIKE \'%View\', 2, engine LIKE \'System%\', 5, has_own_data = 0, 3, 1) AS table_type FROM system.tables
+CREATE VIEW INFORMATION_SCHEMA.COLUMNS\n(\n    `table_catalog` String,\n    `table_schema` String,\n    `table_name` String,\n    `column_name` String,\n    `ordinal_position` UInt64,\n    `column_default` String,\n    `is_nullable` String,\n    `data_type` String,\n    `character_maximum_length` Nullable(UInt64),\n    `character_octet_length` Nullable(UInt64),\n    `numeric_precision` Nullable(UInt64),\n    `numeric_precision_radix` Nullable(UInt64),\n    `numeric_scale` Nullable(UInt64),\n    `datetime_precision` Nullable(UInt64),\n    `character_set_catalog` Nullable(String),\n    `character_set_schema` Nullable(String),\n    `character_set_name` Nullable(String),\n    `collation_catalog` Nullable(String),\n    `collation_schema` Nullable(String),\n    `collation_name` Nullable(String),\n    `domain_catalog` Nullable(String),\n    `domain_schema` Nullable(String),\n    `domain_name` Nullable(String),\n    `column_comment` String,\n    `column_type` String,\n    `TABLE_CATALOG` String,\n    `TABLE_SCHEMA` String,\n    `TABLE_NAME` String,\n    `COLUMN_NAME` String,\n    `ORDINAL_POSITION` UInt64,\n    `COLUMN_DEFAULT` String,\n    `IS_NULLABLE` String,\n    `DATA_TYPE` String,\n    `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64),\n    `CHARACTER_OCTET_LENGTH` Nullable(UInt64),\n    `NUMERIC_PRECISION` Nullable(UInt64),\n    `NUMERIC_PRECISION_RADIX` Nullable(UInt64),\n    `NUMERIC_SCALE` Nullable(UInt64),\n    `DATETIME_PRECISION` Nullable(UInt64),\n    `CHARACTER_SET_CATALOG` Nullable(String),\n    `CHARACTER_SET_SCHEMA` Nullable(String),\n    `CHARACTER_SET_NAME` Nullable(String),\n    `COLLATION_CATALOG` Nullable(String),\n    `COLLATION_SCHEMA` Nullable(String),\n    `COLLATION_NAME` Nullable(String),\n    `DOMAIN_CATALOG` Nullable(String),\n    `DOMAIN_SCHEMA` Nullable(String),\n    `DOMAIN_NAME` Nullable(String),\n    `COLUMN_COMMENT` String,\n    `COLUMN_TYPE` String\n) AS\nSELECT\n    database AS table_catalog,\n    database AS table_schema,\n    table AS table_name,\n    name AS column_name,\n    position AS ordinal_position,\n    default_expression AS column_default,\n    type LIKE \'Nullable(%)\' AS is_nullable,\n    type AS data_type,\n    character_octet_length AS character_maximum_length,\n    character_octet_length,\n    numeric_precision,\n    numeric_precision_radix,\n    numeric_scale,\n    datetime_precision,\n    NULL AS character_set_catalog,\n    NULL AS character_set_schema,\n    NULL AS character_set_name,\n    NULL AS collation_catalog,\n    NULL AS collation_schema,\n    NULL AS collation_name,\n    NULL AS domain_catalog,\n    NULL AS domain_schema,\n    NULL AS domain_name,\n    comment AS column_comment,\n    type AS column_type,\n    table_catalog AS TABLE_CATALOG,\n    table_schema AS TABLE_SCHEMA,\n    table_name AS TABLE_NAME,\n    column_name AS COLUMN_NAME,\n    ordinal_position AS ORDINAL_POSITION,\n    column_default AS COLUMN_DEFAULT,\n    is_nullable AS IS_NULLABLE,\n    data_type AS DATA_TYPE,\n    character_maximum_length AS CHARACTER_MAXIMUM_LENGTH,\n    character_octet_length AS CHARACTER_OCTET_LENGTH,\n    numeric_precision AS NUMERIC_PRECISION,\n    numeric_precision_radix AS NUMERIC_PRECISION_RADIX,\n    numeric_scale AS NUMERIC_SCALE,\n    datetime_precision AS DATETIME_PRECISION,\n    character_set_catalog AS CHARACTER_SET_CATALOG,\n    character_set_schema AS CHARACTER_SET_SCHEMA,\n    character_set_name AS CHARACTER_SET_NAME,\n    collation_catalog AS COLLATION_CATALOG,\n    collation_schema AS COLLATION_SCHEMA,\n    collation_name AS COLLATION_NAME,\n    domain_catalog AS DOMAIN_CATALOG,\n    domain_schema AS DOMAIN_SCHEMA,\n    domain_name AS DOMAIN_NAME,\n    column_comment AS COLUMN_COMMENT,\n    column_type AS COLUMN_TYPE\nFROM system.columns
+CREATE VIEW INFORMATION_SCHEMA.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables
+CREATE VIEW INFORMATION_SCHEMA.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables
+CREATE VIEW information_schema.TABLES (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables
+CREATE VIEW information_schema.tables (`table_catalog` String, `table_schema` String, `table_name` String, `table_type` String, `table_collation` Nullable(String), `table_comment` Nullable(String), `TABLE_CATALOG` String, `TABLE_SCHEMA` String, `TABLE_NAME` String, `TABLE_TYPE` String, `TABLE_COLLATION` Nullable(String), `TABLE_COMMENT` Nullable(String)) AS SELECT database AS table_catalog, database AS table_schema, name AS table_name, multiIf(is_temporary, \'LOCAL TEMPORARY\', engine LIKE \'%View\', \'VIEW\', engine LIKE \'System%\', \'SYSTEM VIEW\', has_own_data = 0, \'FOREIGN TABLE\', \'BASE TABLE\') AS table_type, \'utf8mb4_0900_ai_ci\' AS table_collation, comment AS table_comment, table_catalog AS TABLE_CATALOG, table_schema AS TABLE_SCHEMA, table_name AS TABLE_NAME, table_type AS TABLE_TYPE, table_collation AS TABLE_COLLATION, table_comment AS TABLE_COMMENT FROM system.tables

From 4808c34f3b84756b6f0a07befb68decc062b3268 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 20 Sep 2023 13:45:54 +0000
Subject: [PATCH 165/243] Fix log message

---
 src/Interpreters/ClusterProxy/executeQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index b67208ab12a..6c37046c5f2 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -199,7 +199,7 @@ void executeQuery(
     {
         LOG_TRACE(
             log,
-            "Parallel reading from replicas is disabled for cluster. There are no shards with more then 1 replica: cluster={}",
+            "Parallel reading from replicas is disabled for cluster. There are no shards with more than 1 replica: cluster={}",
             cluster->getName());
     }
 

From c9bf365767597874de1d768835581b87ad858f7d Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 20 Sep 2023 13:47:11 +0000
Subject: [PATCH 166/243] Polishing

---
 src/Interpreters/ClusterProxy/executeQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 6c37046c5f2..84ae382487d 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -193,7 +193,7 @@ void executeQuery(
 
     auto cluster = query_info.getCluster();
     auto new_context = updateSettingsForCluster(*cluster, context, settings, main_table, query_info.additional_filter_ast, log);
-    if (context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value
+    if (context->getSettingsRef().allow_experimental_parallel_reading_from_replicas
         && context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value
            != new_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas.value)
     {

From a1c98bc8c9730a199c15da198198ccceef39d74f Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Wed, 20 Sep 2023 09:50:35 -0400
Subject: [PATCH 167/243] fix build

---
 src/Storages/S3Queue/S3QueueSource.cpp | 5 +++++
 src/Storages/S3Queue/S3QueueSource.h   | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp
index 54a863aeb2c..5f640239985 100644
--- a/src/Storages/S3Queue/S3QueueSource.cpp
+++ b/src/Storages/S3Queue/S3QueueSource.cpp
@@ -147,6 +147,11 @@ StorageS3QueueSource::KeyWithInfo StorageS3QueueSource::QueueGlobIterator::next(
     return KeyWithInfo();
 }
 
+size_t StorageS3QueueSource::QueueGlobIterator::estimatedKeysCount()
+{
+    return keys_buf.size();
+}
+
 StorageS3QueueSource::StorageS3QueueSource(
     const ReadFromFormatInfo & info,
     const String & format_,
diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h
index f89384fb096..0f83ed054d5 100644
--- a/src/Storages/S3Queue/S3QueueSource.h
+++ b/src/Storages/S3Queue/S3QueueSource.h
@@ -54,6 +54,8 @@ public:
         Strings
         filterProcessingFiles(const S3QueueMode & engine_mode, std::unordered_set<String> & exclude_keys, const String & max_file = "");
 
+        size_t estimatedKeysCount() override;
+
     private:
         UInt64 max_poll_size;
         KeysWithInfo keys_buf;

From 3ef2c37b923d1b764ab190d1b3d107d7550647b6 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 20 Sep 2023 15:54:11 +0200
Subject: [PATCH 168/243] Add some logging to StorageRabbitMQ

---
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 116 +++++++++++++---------
 src/Storages/RabbitMQ/StorageRabbitMQ.h   |   1 +
 2 files changed, 68 insertions(+), 49 deletions(-)

diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 651c63e1b91..ec552dd1032 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -959,70 +959,88 @@ bool StorageRabbitMQ::hasDependencies(const StorageID & table_id)
 
 void StorageRabbitMQ::streamingToViewsFunc()
 {
-    chassert(initialized);
-    if (initialized)
+    try
     {
-        try
-        {
-            auto table_id = getStorageID();
-
-            // Check if at least one direct dependency is attached
-            size_t num_views = DatabaseCatalog::instance().getDependentViews(table_id).size();
-            bool rabbit_connected = connection->isConnected() || connection->reconnect();
-
-            if (num_views && rabbit_connected)
-            {
-                auto start_time = std::chrono::steady_clock::now();
-
-                mv_attached.store(true);
-
-                // Keep streaming as long as there are attached views and streaming is not cancelled
-                while (!shutdown_called && num_created_consumers > 0)
-                {
-                    if (!hasDependencies(table_id))
-                        break;
-
-                    LOG_DEBUG(log, "Started streaming to {} attached views", num_views);
-
-                    bool continue_reading = tryStreamToViews();
-                    if (!continue_reading)
-                        break;
-
-                    auto end_time = std::chrono::steady_clock::now();
-                    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
-                    if (duration.count() > MAX_THREAD_WORK_DURATION_MS)
-                    {
-                        LOG_TRACE(log, "Reschedule streaming. Thread work duration limit exceeded.");
-                        break;
-                    }
-
-                    milliseconds_to_wait = rabbitmq_settings->rabbitmq_empty_queue_backoff_start_ms;
-                }
-            }
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-        }
+        streamToViewsImpl();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
     }
 
     mv_attached.store(false);
 
-    /// If there is no running select, stop the loop which was
-    /// activated by previous select.
-    if (connection->getHandler().loopRunning())
-        stopLoopIfNoReaders();
+    try
+    {
+        /// If there is no running select, stop the loop which was
+        /// activated by previous select.
+        if (connection->getHandler().loopRunning())
+            stopLoopIfNoReaders();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
 
-    if (!shutdown_called)
+    if (shutdown_called)
+    {
+        LOG_DEBUG(log, "Shutdown called, stopping background streaming process");
+    }
+    else
     {
         /// Reschedule with backoff.
         if (milliseconds_to_wait < rabbitmq_settings->rabbitmq_empty_queue_backoff_end_ms)
             milliseconds_to_wait += rabbitmq_settings->rabbitmq_empty_queue_backoff_step_ms;
 
+        LOG_DEBUG(log, "Rescheduling background streaming process in {}", milliseconds_to_wait);
         streaming_task->scheduleAfter(milliseconds_to_wait);
     }
 }
 
+void StorageRabbitMQ::streamToViewsImpl()
+{
+    if (!initialized)
+    {
+        chassert(false);
+        return;
+    }
+
+    auto table_id = getStorageID();
+
+    // Check if at least one direct dependency is attached
+    size_t num_views = DatabaseCatalog::instance().getDependentViews(table_id).size();
+    bool rabbit_connected = connection->isConnected() || connection->reconnect();
+
+    if (num_views && rabbit_connected)
+    {
+        auto start_time = std::chrono::steady_clock::now();
+
+        mv_attached.store(true);
+
+        // Keep streaming as long as there are attached views and streaming is not cancelled
+        while (!shutdown_called && num_created_consumers > 0)
+        {
+            if (!hasDependencies(table_id))
+                break;
+
+            LOG_DEBUG(log, "Started streaming to {} attached views", num_views);
+
+            bool continue_reading = tryStreamToViews();
+            if (!continue_reading)
+                break;
+
+            auto end_time = std::chrono::steady_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
+            if (duration.count() > MAX_THREAD_WORK_DURATION_MS)
+            {
+                LOG_TRACE(log, "Reschedule streaming. Thread work duration limit exceeded.");
+                break;
+            }
+
+            milliseconds_to_wait = rabbitmq_settings->rabbitmq_empty_queue_backoff_start_ms;
+        }
+    }
+}
 
 bool StorageRabbitMQ::tryStreamToViews()
 {
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h
index 2b40c88ba6e..a5ff60f0c6e 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.h
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h
@@ -187,6 +187,7 @@ private:
     void bindExchange(AMQP::TcpChannel & rabbit_channel);
     void bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_channel);
 
+    void streamToViewsImpl();
     /// Return true on successful stream attempt.
     bool tryStreamToViews();
     bool hasDependencies(const StorageID & table_id);

From 6dab5bf3a7571055d887ebe84d37fbb021fac9b4 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Wed, 20 Sep 2023 15:55:12 +0200
Subject: [PATCH 169/243] Better

---
 src/Daemon/BaseDaemon.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index 8e01311dcb0..f64d4b365a9 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -103,9 +103,9 @@ static const size_t signal_pipe_buf_size =
     + sizeof(siginfo_t)
     + sizeof(ucontext_t*)
     + sizeof(StackTrace)
+    + sizeof(UInt64)
     + sizeof(UInt32)
-    + sizeof(void*)
-    + sizeof(UInt64);
+    + sizeof(void*);
 
 using signal_function = void(int, siginfo_t*, void*);
 

From 729c8aa29f6436d338f544d923677737f09e0660 Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Wed, 20 Sep 2023 10:41:47 -0400
Subject: [PATCH 170/243] fix glob iterator estimated objects

---
 src/Storages/StorageS3.cpp | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index e99be7a1204..288f5423c00 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -182,9 +182,7 @@ public:
 
     size_t objectsCount()
     {
-        assert(outcome_future.valid());
-        first_outcome = outcome_future.get();
-        return first_outcome->GetResult().GetContents().size();
+        return buffer.size();
     }
 
     ~Impl()
@@ -231,18 +229,8 @@ private:
     void fillInternalBufferAssumeLocked()
     {
         buffer.clear();
-
-        ListObjectsOutcome outcome;
-        if (unlikely(first_outcome))
-        {
-            outcome = std::move(*first_outcome);
-            first_outcome = std::nullopt;
-        }
-        else
-        {
-            assert(outcome_future.valid());
-            outcome = outcome_future.get();
-        }
+        assert(outcome_future.valid());
+        auto outcome = outcome_future.get();
 
         if (!outcome.IsSuccess())
         {
@@ -359,7 +347,6 @@ private:
     ThreadPool list_objects_pool;
     ThreadPoolCallbackRunner<ListObjectsOutcome> list_objects_scheduler;
     std::future<ListObjectsOutcome> outcome_future;
-    std::optional<ListObjectsOutcome> first_outcome;  /// the result will be set by `estimatedKeysCount`
     std::function<void(FileProgress)> file_progress_callback;
 };
 

From 69b36b9c88a9a41c7936c317071b24fce6a47d08 Mon Sep 17 00:00:00 2001
From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
Date: Wed, 20 Sep 2023 16:42:32 +0200
Subject: [PATCH 171/243] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 24a03e283e6..a6e23e92d36 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,7 +31,7 @@
 * Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to input/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/outoput using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)).
 * A new field `query_cache_usage` in `system.query_log` now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)).
 * Add new function `startsWithUTF8` and `endsWithUTF8`. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)).
-* Allow variable number of columns in TSV/CuatomSeprarated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)).
+* Allow variable number of columns in TSV/CustomSeprarated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)).
 * Added `SYSTEM STOP/START PULLING REPLICATION LOG` queries (for testing `ReplicatedMergeTree`). [#52881](https://github.com/ClickHouse/ClickHouse/pull/52881) ([Alexander Tokmakov](https://github.com/tavplubix)).
 * Allow to execute constant non-deterministic functions in mutations on initiator. [#53129](https://github.com/ClickHouse/ClickHouse/pull/53129) ([Anton Popov](https://github.com/CurtizJ)).
 * Add input format `One` that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)).

From 2dc2b2050e8afbe6763bf7f00e796c9e4533c6b0 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Wed, 20 Sep 2023 17:16:50 +0200
Subject: [PATCH 172/243] Update test.py

---
 tests/integration/test_backup_log/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_backup_log/test.py b/tests/integration/test_backup_log/test.py
index d0abd12f787..2bab94226e1 100644
--- a/tests/integration/test_backup_log/test.py
+++ b/tests/integration/test_backup_log/test.py
@@ -34,7 +34,7 @@ def restore_table(backup_name):
 
 def test_backup_log():
     instance.query("SYSTEM FLUSH LOGS")
-    instance.query("truncate table system.backup_log")
+    instance.query("drop table system.backup_log")
 
     backup_name = "File('/backups/test_backup/')"
     assert instance.query("SELECT * FROM system.tables WHERE name = 'backup_log'") == ""

From 859e1a266e09f372c71a4eadad5f30c8e99616a3 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 20 Sep 2023 17:59:37 +0200
Subject: [PATCH 173/243] toDaysSinceYearZero DateTime/DateTime64 support

---
 src/Functions/DateTimeTransforms.h            | 21 ++++++++++------
 src/Functions/toDaysSinceYearZero.cpp         | 19 +++++++++++++--
 .../02874_toDaysSinceYearZero.reference       | 24 +++++++++++++------
 .../0_stateless/02874_toDaysSinceYearZero.sql | 12 ++++++++--
 4 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index a351d7fdf30..b92067ecdef 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -928,9 +928,7 @@ struct ToDayOfYearImpl
 struct ToDaysSinceYearZeroImpl
 {
 private:
-    /// Constant calculated from MySQL's TO_DAYS() implementation.
-    /// https://github.com/mysql/mysql-server/blob/ea1efa9822d81044b726aab20c857d5e1b7e046a/mysys/my_time.cc#L1042
-    static constexpr auto DAYS_BETWEEN_YEARS_0_AND_1900 = 693'961; /// 01 January, each
+    static constexpr auto DAYS_BETWEEN_YEARS_0_AND_1970 = 719'528;
 
 public:
     static constexpr auto name = "toDaysSinceYearZero";
@@ -939,18 +937,27 @@ public:
     {
         throwDateTimeIsNotSupported(name);
     }
-    static UInt32 execute(UInt32, const DateLUTImpl &)
+    static UInt32 execute(UInt32 d, const DateLUTImpl &)
     {
-        throwDateTimeIsNotSupported(name);
+        return DAYS_BETWEEN_YEARS_0_AND_1970 + (d / 86400);
     }
     static UInt32 execute(Int32 d, const DateLUTImpl &)
     {
-        return DAYS_BETWEEN_YEARS_0_AND_1900 + d;
+        return DAYS_BETWEEN_YEARS_0_AND_1970 + d;
     }
     static UInt32 execute(UInt16 d, const DateLUTImpl &)
     {
-        return DAYS_BETWEEN_YEARS_0_AND_1900 + d;
+        return DAYS_BETWEEN_YEARS_0_AND_1970 + d;
     }
+    static UInt32 execute(const DecimalUtils::DecimalComponents<DateTime64> & t, const DateLUTImpl & time_zone)
+    {
+        return DAYS_BETWEEN_YEARS_0_AND_1970 + static_cast<UInt32>(time_zone.toDayNum(t.whole));
+    }
+    static DecimalUtils::DecimalComponents<DateTime64> executeExtendedResult(const DecimalUtils::DecimalComponents<DateTime64> & t, const DateLUTImpl & time_zone)
+    {
+        return {time_zone.toDate(t.whole), 0};
+    }
+
     static constexpr bool hasPreimage() { return false; }
 
     using FactorTransform = ZeroTransform;
diff --git a/src/Functions/toDaysSinceYearZero.cpp b/src/Functions/toDaysSinceYearZero.cpp
index e569c5cb1f3..7aa04fca740 100644
--- a/src/Functions/toDaysSinceYearZero.cpp
+++ b/src/Functions/toDaysSinceYearZero.cpp
@@ -6,6 +6,8 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
+#include "DataTypes/IDataType.h"
+#include "Functions/TransformDateTime64.h"
 
 
 namespace DB
@@ -23,6 +25,7 @@ namespace
 class FunctionToDaysSinceYearZero : public IFunction
 {
     using ResultType = DataTypeUInt32;
+    using Transformer = TransformDateTime64<ToDaysSinceYearZeroImpl>;
 public:
     static constexpr auto name = "toDaysSinceYearZero";
     static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionToDaysSinceYearZero>(context); }
@@ -37,8 +40,12 @@ public:
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
         FunctionArgumentDescriptors mandatory_args{
-            {"date", &isDateOrDate32<IDataType>, nullptr, "Date or Date32"}
-        };
+            {"date",
+             [](const IDataType & dt) {
+                return isDateOrDate32<IDataType>(dt) || isDateTime<IDataType>(dt) || isDateTime64<IDataType>(dt);
+             },
+             nullptr,
+             "Date, Date32, DateTime or DateTime64"}};
 
         validateFunctionArgumentTypes(*this, arguments, mandatory_args);
 
@@ -54,6 +61,14 @@ public:
             return DateTimeTransformImpl<DataTypeDate, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
         else if (which.isDate32())
             return DateTimeTransformImpl<DataTypeDate32, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
+        else if (which.isDateTime())
+            return DateTimeTransformImpl<DataTypeDateTime, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
+        else if (which.isDateTime64())
+        {
+            const auto scale = static_cast<const DataTypeDateTime64 *>(from_type)->getScale();
+            const Transformer transformer(scale);
+            return DateTimeTransformImpl<DataTypeDateTime64, ResultType, Transformer>::execute(arguments, result_type, input_rows_count, transformer);
+        }
 
         throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
             "Illegal type {} of argument of function {}",
diff --git a/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference b/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference
index 885332ab835..1a5fd5695f1 100644
--- a/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference
+++ b/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference
@@ -1,13 +1,23 @@
 Invalid parameters
 Const argument
+719528
+739136
+693961
+739136
+739136
+739136
+739136
+739136
 693961
-713569
-668394
-713569
 \N
 Non-const argument
-713569
-713569
+739136
+739136
+739136
+739136
+739136
+739136
+693961
 MySQL alias
-713569
-713569
+739136
+739136
diff --git a/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql b/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql
index 2c35920e569..99bc507d311 100644
--- a/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql
+++ b/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql
@@ -3,19 +3,27 @@ SELECT toDaysSinceYearZero(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH
 SELECT toDaysSinceYearZero(toDate('2023-09-08'), toDate('2023-09-08')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 SELECT toDaysSinceYearZero('str'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT toDaysSinceYearZero(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT toDaysSinceYearZero(toDateTime('2023-09-08 11:11:11')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11', 3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
 SELECT 'Const argument';
 SELECT toDaysSinceYearZero(toDate('1970-01-01'));
 SELECT toDaysSinceYearZero(toDate('2023-09-08'));
 SELECT toDaysSinceYearZero(toDate32('1900-01-01'));
 SELECT toDaysSinceYearZero(toDate32('2023-09-08'));
+SELECT toDaysSinceYearZero(toDateTime('2023-09-08 11:11:11'));
+SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123', 3));
+SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123123', 6));
+SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123123123', 9));
+SELECT toDaysSinceYearZero(toDateTime64('1900-01-01 00:00:00.000', 3));
 SELECT toDaysSinceYearZero(NULL);
 
 SELECT 'Non-const argument';
 SELECT toDaysSinceYearZero(materialize(toDate('2023-09-08')));
 SELECT toDaysSinceYearZero(materialize(toDate32('2023-09-08')));
+SELECT toDaysSinceYearZero(materialize(toDateTime('2023-09-08 11:11:11')));
+SELECT toDaysSinceYearZero(materialize(toDateTime64('2023-09-08 11:11:11.123', 3)));
+SELECT toDaysSinceYearZero(materialize(toDateTime64('2023-09-08 11:11:11.123123', 6)));
+SELECT toDaysSinceYearZero(materialize(toDateTime64('2023-09-08 11:11:11.123123123', 9)));
+SELECT toDaysSinceYearZero(materialize(toDateTime64('1900-01-01 00:00:00.000', 3)));
 
 SELECT 'MySQL alias';
 SELECT to_days(toDate('2023-09-08'));

From a68fbe073bc42aae2a773ea80e314200499936fc Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 20 Sep 2023 18:02:53 +0200
Subject: [PATCH 174/243] Update toDaysSinceYearZero docs

---
 contrib/base64                                |  1 +
 .../functions/date-time-functions.md          |  2 +-
 tests/integration/requirements.txt            | 30 +++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)
 create mode 160000 contrib/base64
 create mode 100644 tests/integration/requirements.txt

diff --git a/contrib/base64 b/contrib/base64
new file mode 160000
index 00000000000..8628e258090
--- /dev/null
+++ b/contrib/base64
@@ -0,0 +1 @@
+Subproject commit 8628e258090f9eb76d90ac3c91e1ab4690e9aa11
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index a3883cd22ce..50c8e4057c4 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -739,7 +739,7 @@ Aliases: `TO_DAYS`
 
 **Arguments**
 
-- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md) or [Date32](../../sql-reference/data-types/date32.md).
+- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
 
 **Returned value**
 
diff --git a/tests/integration/requirements.txt b/tests/integration/requirements.txt
new file mode 100644
index 00000000000..11efc43eded
--- /dev/null
+++ b/tests/integration/requirements.txt
@@ -0,0 +1,30 @@
+PyMySQL
+aerospike
+avro
+cassandra-driver
+confluent-kafka
+dicttoxml
+docker
+docker-compose
+grpcio
+grpcio-tools
+kafka-python
+kazoo
+minio
+lz4
+protobuf
+psycopg2-binary
+pymongo
+pytz
+pytest
+pytest-timeout
+redis
+tzlocal==2.1
+urllib3
+requests-kerberos
+dict2xml
+hypothesis
+pyhdfs
+pika
+meilisearch
+nats-py

From e2649b5e1db84673408b9e8e4f98e47d452a2d2b Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 20 Sep 2023 18:08:02 +0200
Subject: [PATCH 175/243] Remove unnecessary function

---
 src/Functions/DateTimeTransforms.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index b92067ecdef..986206429e7 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -953,10 +953,6 @@ public:
     {
         return DAYS_BETWEEN_YEARS_0_AND_1970 + static_cast<UInt32>(time_zone.toDayNum(t.whole));
     }
-    static DecimalUtils::DecimalComponents<DateTime64> executeExtendedResult(const DecimalUtils::DecimalComponents<DateTime64> & t, const DateLUTImpl & time_zone)
-    {
-        return {time_zone.toDate(t.whole), 0};
-    }
 
     static constexpr bool hasPreimage() { return false; }
 

From 44ee98ed8f612aa463e107de28fd8431892dbe33 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Wed, 20 Sep 2023 18:09:25 +0200
Subject: [PATCH 176/243] Remove accidentally commited files

---
 tests/integration/requirements.txt | 30 ------------------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 tests/integration/requirements.txt

diff --git a/tests/integration/requirements.txt b/tests/integration/requirements.txt
deleted file mode 100644
index 11efc43eded..00000000000
--- a/tests/integration/requirements.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-PyMySQL
-aerospike
-avro
-cassandra-driver
-confluent-kafka
-dicttoxml
-docker
-docker-compose
-grpcio
-grpcio-tools
-kafka-python
-kazoo
-minio
-lz4
-protobuf
-psycopg2-binary
-pymongo
-pytz
-pytest
-pytest-timeout
-redis
-tzlocal==2.1
-urllib3
-requests-kerberos
-dict2xml
-hypothesis
-pyhdfs
-pika
-meilisearch
-nats-py

From 3e08800cb5b21a3caa04125a0fecbeab772f2cde Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 19 Sep 2023 18:14:42 +0000
Subject: [PATCH 177/243] Forbid special columns for file/s3/url/... storages,
 fix insert into ephemeral columns from files

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     |  9 +++++++-
 src/Client/ClientBase.cpp                     |  7 +++++-
 src/Interpreters/Context.cpp                  | 12 ++++++++--
 .../Formats/Impl/AvroRowInputFormat.cpp       |  2 ++
 src/Storages/ColumnsDescription.cpp           |  8 +++++++
 src/Storages/ColumnsDescription.h             |  2 ++
 src/Storages/HDFS/StorageHDFS.cpp             |  5 +++++
 src/Storages/StorageAzureBlob.cpp             |  5 +++++
 src/Storages/StorageFile.cpp                  |  5 +++++
 src/Storages/StorageS3.cpp                    |  5 +++++
 src/Storages/StorageURL.cpp                   |  5 +++++
 ...2885_ephemeral_columns_from_file.reference | 13 +++++++++++
 .../02885_ephemeral_columns_from_file.sh      | 22 +++++++++++++++++++
 13 files changed, 96 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02885_ephemeral_columns_from_file.reference
 create mode 100755 tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 12d9051ac57..4260d7bd660 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6247,7 +6247,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                                               .getTable(insertion_table, scope_context)
                                               ->getInMemoryMetadataPtr()
                                               ->getColumns();
-            const auto & insert_column_names = scope_context->hasInsertionTableColumnNames() ? *scope_context->getInsertionTableColumnNames() : insert_columns.getInsertable().getNames();
+            const auto & insert_column_names = scope_context->hasInsertionTableColumnNames() ? *scope_context->getInsertionTableColumnNames() : insert_columns.getOrdinary().getNames();
             DB::ColumnsDescription structure_hint;
 
             bool use_columns_from_insert_query = true;
@@ -6282,6 +6282,8 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
 
                         ColumnDescription column = insert_columns.get(*insert_column_name_it);
                         column.name = identifier_node->getIdentifier().getFullName();
+                        /// Change ephemeral columns to default columns.
+                        column.default_desc.kind = ColumnDefaultKind::Default;
                         structure_hint.add(std::move(column));
                     }
 
@@ -6356,7 +6358,12 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                     if (asterisk)
                     {
                         for (; insert_column_name_it != insert_column_names_end; ++insert_column_name_it)
+                        {
+                            ColumnDescription column = insert_columns.get(*insert_column_name_it);
+                            /// Change ephemeral columns to default columns.
+                            column.default_desc.kind = ColumnDefaultKind::Default;
                             structure_hint.add(insert_columns.get(*insert_column_name_it));
+                        }
                     }
 
                     if (!structure_hint.empty())
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 9e86f30b691..74969931056 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1475,13 +1475,18 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
             current_format = FormatFactory::instance().getFormatFromFileName(in_file, true);
 
         /// Create temporary storage file, to support globs and parallel reading
+        /// StorageFile doesn't support ephemeral columns, change them all to ordinary.
+        ColumnsDescription columns_for_storage_file = columns_description_for_query;
+        for (const auto & [name, _] : columns_for_storage_file.getEphemeral())
+            columns_for_storage_file.modify(name, [](ColumnDescription & column){ column.default_desc.kind = ColumnDefaultKind::Default; });
+
         StorageFile::CommonArguments args{
             WithContext(global_context),
             parsed_insert_query->table_id,
             current_format,
             getFormatSettings(global_context),
             compression_method,
-            columns_description_for_query,
+            columns_for_storage_file,
             ConstraintsDescription{},
             String{},
             {},
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index a1bc6cf94ee..fe0b77a73aa 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1592,7 +1592,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                                               ->getInMemoryMetadataPtr()
                                               ->getColumns();
 
-            const auto & insert_column_names = hasInsertionTableColumnNames() ? *getInsertionTableColumnNames() : insert_columns.getInsertable().getNames();
+            const auto & insert_column_names = hasInsertionTableColumnNames() ? *getInsertionTableColumnNames() : insert_columns.getOrdinary().getNames();
             DB::ColumnsDescription structure_hint;
 
             bool use_columns_from_insert_query = true;
@@ -1626,6 +1626,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
 
                         ColumnDescription column = insert_columns.get(*insert_column_name_it);
                         column.name = identifier->name();
+                        /// Change ephemeral columns to default columns.
+                        column.default_desc.kind = ColumnDefaultKind::Default;
                         structure_hint.add(std::move(column));
                     }
 
@@ -1700,7 +1702,13 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                     if (asterisk)
                     {
                         for (; insert_column_name_it != insert_column_names_end; ++insert_column_name_it)
-                            structure_hint.add(insert_columns.get(*insert_column_name_it));
+                        {
+                            ColumnDescription column = insert_columns.get(*insert_column_name_it);
+                            /// Change ephemeral columns to default columns.
+                            column.default_desc.kind = ColumnDefaultKind::Default;
+
+                            structure_hint.add(std::move(column));
+                        }
                     }
 
                     if (!structure_hint.empty())
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index 1046125c16c..33f108f5ad7 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -1263,6 +1263,8 @@ void registerInputFormatAvro(FormatFactory & factory)
         return std::make_shared<AvroRowInputFormat>(sample, buf, params, settings);
     });
 
+    factory.markFormatSupportsSubsetOfColumns("Avro");
+
     factory.registerInputFormat("AvroConfluent",[](
         ReadBuffer & buf,
         const Block & sample,
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 9fa79387d5c..0d5508b8164 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -680,6 +680,14 @@ bool ColumnsDescription::hasDefaults() const
     return false;
 }
 
+bool ColumnsDescription::hasOnlyOrdinary() const
+{
+    for (const auto & column : columns)
+        if (column.default_desc.kind != ColumnDefaultKind::Default)
+            return false;
+    return true;
+}
+
 ColumnDefaults ColumnsDescription::getDefaults() const
 {
     ColumnDefaults ret;
diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h
index ee0bb5efb66..2d7536765ff 100644
--- a/src/Storages/ColumnsDescription.h
+++ b/src/Storages/ColumnsDescription.h
@@ -179,6 +179,8 @@ public:
     bool hasColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const;
     bool hasColumnOrNested(GetColumnsOptions::Kind kind, const String & column_name) const;
 
+    bool hasOnlyOrdinary() const;
+
     NameAndTypePair getPhysical(const String & column_name) const;
     NameAndTypePair getColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const;
     NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const;
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 605942331eb..607e09241bf 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -306,7 +306,12 @@ StorageHDFS::StorageHDFS(
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        /// We don't allow special columns in HDFS storage.
+        if (!columns_.hasOnlyOrdinary())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
         storage_metadata.setColumns(columns_);
+    }
 
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
index fc94eccfb17..9ad76657024 100644
--- a/src/Storages/StorageAzureBlob.cpp
+++ b/src/Storages/StorageAzureBlob.cpp
@@ -478,7 +478,12 @@ StorageAzureBlob::StorageAzureBlob(
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        /// We don't allow special columns in File storage.
+        if (!columns_.hasOnlyOrdinary())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine AzureBlobStorage doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
         storage_metadata.setColumns(columns_);
+    }
 
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index b44b7789135..e895bac6335 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -930,7 +930,12 @@ void StorageFile::setStorageMetadata(CommonArguments args)
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        /// We don't allow special columns in File storage.
+        if (!args.columns.hasOnlyOrdinary())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine File doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
         storage_metadata.setColumns(args.columns);
+    }
 
     storage_metadata.setConstraints(args.constraints);
     storage_metadata.setComment(args.comment);
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index f320fee5987..2ccbfd4950c 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -953,7 +953,12 @@ StorageS3::StorageS3(
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        /// We don't allow special columns in S3 storage.
+        if (!columns_.hasOnlyOrdinary())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
         storage_metadata.setColumns(columns_);
+    }
 
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index a0f5379a1fd..4d313b9e432 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -129,7 +129,12 @@ IStorageURLBase::IStorageURLBase(
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        /// We don't allow special columns in URL storage.
+        if (!columns_.hasOnlyOrdinary())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine URL doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
         storage_metadata.setColumns(columns_);
+    }
 
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
diff --git a/tests/queries/0_stateless/02885_ephemeral_columns_from_file.reference b/tests/queries/0_stateless/02885_ephemeral_columns_from_file.reference
new file mode 100644
index 00000000000..66328b152ab
--- /dev/null
+++ b/tests/queries/0_stateless/02885_ephemeral_columns_from_file.reference
@@ -0,0 +1,13 @@
+1
+1
+1
+1
+2
+3
+4
+5
+1
+2
+3
+4
+5
diff --git a/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh b/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh
new file mode 100755
index 00000000000..2917ec86957
--- /dev/null
+++ b/tests/queries/0_stateless/02885_ephemeral_columns_from_file.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select number as x from numbers(5) format JSONEachRow" > $CLICKHOUSE_TEST_UNIQUE_NAME.jsonl
+$CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', auto, 'x UInt64 Ephemeral, y UInt64 default x + 1')" 2>&1 | grep -c "BAD_ARGUMENTS"
+$CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', auto, 'x UInt64 Alias y, y UInt64')" 2>&1 | grep -c "BAD_ARGUMENTS"
+$CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl', auto, 'x UInt64 Materialized 42, y UInt64')" 2>&1 | grep -c "BAD_ARGUMENTS"
+
+$CLICKHOUSE_LOCAL -n -q "
+    create table test (x UInt64 Ephemeral, y UInt64 default x + 1) engine=Memory;
+    insert into test (x, y) select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl');
+    select * from test;
+    truncate table test;
+    insert into test (x, y) from infile '$CLICKHOUSE_TEST_UNIQUE_NAME.jsonl';
+    select * from test
+"
+
+rm  $CLICKHOUSE_TEST_UNIQUE_NAME.jsonl
+

From 520c9e95a99fce92a69ade796810b841a6ce0cc1 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 20 Sep 2023 16:25:47 +0000
Subject: [PATCH 178/243] Fix

---
 src/Client/ClientBase.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 74969931056..4b429c32bba 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -1475,10 +1475,15 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
             current_format = FormatFactory::instance().getFormatFromFileName(in_file, true);
 
         /// Create temporary storage file, to support globs and parallel reading
-        /// StorageFile doesn't support ephemeral columns, change them all to ordinary.
-        ColumnsDescription columns_for_storage_file = columns_description_for_query;
-        for (const auto & [name, _] : columns_for_storage_file.getEphemeral())
-            columns_for_storage_file.modify(name, [](ColumnDescription & column){ column.default_desc.kind = ColumnDefaultKind::Default; });
+        /// StorageFile doesn't support ephemeral/materialized/alias columns.
+        /// We should change ephemeral columns to ordinary and ignore materialized/alias columns.
+        ColumnsDescription columns_for_storage_file;
+        for (const auto & [name, _] : columns_description_for_query.getInsertable())
+        {
+            ColumnDescription column = columns_description_for_query.get(name);
+            column.default_desc.kind = ColumnDefaultKind::Default;
+            columns_for_storage_file.add(std::move(column));
+        }
 
         StorageFile::CommonArguments args{
             WithContext(global_context),

From c706101891dc491fab08de3a62d959e2fd19d8e4 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 11 Aug 2023 14:29:37 +0200
Subject: [PATCH 179/243] Fix throttling of BACKUPs from/to S3 (in case native
 copy was not used)

In some cases native copy is not possible, and such requests should be
throttled.

v0: copyS3FileNativeWithFallback
v2: revert v0 and pass write_settings
v3: pass read_settings to copyFile()
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 programs/disks/CommandCopy.cpp                |  2 +-
 src/Backups/BackupIO_Disk.cpp                 |  4 +-
 src/Backups/BackupIO_S3.cpp                   |  2 +
 src/Disks/DiskEncrypted.cpp                   |  6 +--
 src/Disks/DiskEncrypted.h                     |  2 +-
 src/Disks/DiskEncryptedTransaction.cpp        |  4 +-
 src/Disks/DiskEncryptedTransaction.h          |  2 +-
 src/Disks/DiskLocal.cpp                       |  6 +--
 src/Disks/DiskLocal.h                         |  2 +-
 src/Disks/FakeDiskTransaction.h               |  4 +-
 src/Disks/IDisk.cpp                           | 24 +++++------
 src/Disks/IDisk.h                             |  7 ++--
 src/Disks/IDiskTransaction.h                  |  6 ++-
 .../AzureBlobStorage/AzureObjectStorage.cpp   |  2 +
 .../AzureBlobStorage/AzureObjectStorage.h     |  2 +
 .../Cached/CachedObjectStorage.cpp            | 12 ++++--
 .../Cached/CachedObjectStorage.h              |  4 ++
 .../ObjectStorages/DiskObjectStorage.cpp      |  7 ++--
 src/Disks/ObjectStorages/DiskObjectStorage.h  |  3 +-
 ...jectStorageRemoteMetadataRestoreHelper.cpp |  6 +--
 ...ObjectStorageRemoteMetadataRestoreHelper.h |  4 +-
 .../DiskObjectStorageTransaction.cpp          | 17 +++++---
 .../DiskObjectStorageTransaction.h            |  2 +-
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp |  6 ++-
 .../ObjectStorages/HDFS/HDFSObjectStorage.h   |  2 +
 src/Disks/ObjectStorages/IObjectStorage.cpp   |  8 ++--
 src/Disks/ObjectStorages/IObjectStorage.h     |  4 ++
 .../Local/LocalObjectStorage.cpp              | 10 +++--
 .../ObjectStorages/Local/LocalObjectStorage.h |  2 +
 .../ObjectStorages/S3/S3ObjectStorage.cpp     | 42 +++++++++++++++----
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  4 ++
 .../ObjectStorages/Web/WebObjectStorage.cpp   |  2 +-
 .../ObjectStorages/Web/WebObjectStorage.h     |  2 +
 src/IO/S3/copyS3File.cpp                      | 10 +++--
 src/IO/S3/copyS3File.h                        |  3 ++
 .../MergeTree/DataPartStorageOnDiskBase.cpp   |  9 ++--
 .../MergeTree/DataPartStorageOnDiskBase.h     |  4 +-
 src/Storages/MergeTree/IDataPartStorage.h     |  4 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |  5 ++-
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  2 +-
 src/Storages/MergeTree/MergeTreeData.cpp      | 20 +++++----
 src/Storages/MergeTree/MergeTreeData.h        |  5 ++-
 .../MergeTree/MergeTreePartsMover.cpp         |  6 +--
 src/Storages/MergeTree/MergeTreePartsMover.h  |  2 +-
 src/Storages/MergeTree/MutateTask.cpp         |  9 +++-
 src/Storages/MergeTree/localBackup.cpp        | 25 +++++------
 src/Storages/MergeTree/localBackup.h          |  3 +-
 src/Storages/StorageMergeTree.cpp             | 19 ++++++++-
 src/Storages/StorageReplicatedMergeTree.cpp   | 35 ++++++++++++++--
 .../02844_max_backup_bandwidth_s3.reference   |  2 +
 .../02844_max_backup_bandwidth_s3.sh          | 36 ++++++++++++++++
 51 files changed, 299 insertions(+), 112 deletions(-)
 create mode 100644 tests/queries/0_stateless/02844_max_backup_bandwidth_s3.reference
 create mode 100755 tests/queries/0_stateless/02844_max_backup_bandwidth_s3.sh

diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp
index 4a7af1ced29..296fc708411 100644
--- a/programs/disks/CommandCopy.cpp
+++ b/programs/disks/CommandCopy.cpp
@@ -57,7 +57,7 @@ public:
         String relative_path_from = validatePathAndGetAsRelative(path_from);
         String relative_path_to = validatePathAndGetAsRelative(path_to);
 
-        disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* settings= */ {});
+        disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to, /* read_settings= */ {}, /* write_settings= */ {});
     }
 };
 }
diff --git a/src/Backups/BackupIO_Disk.cpp b/src/Backups/BackupIO_Disk.cpp
index 21b3afbddf8..1e260ad22d9 100644
--- a/src/Backups/BackupIO_Disk.cpp
+++ b/src/Backups/BackupIO_Disk.cpp
@@ -46,7 +46,7 @@ void BackupReaderDisk::copyFileToDisk(const String & path_in_backup, size_t file
         {
             /// Use more optimal way.
             LOG_TRACE(log, "Copying file {} from disk {} to disk {}", path_in_backup, disk->getName(), destination_disk->getName());
-            disk->copyFile(root_path / path_in_backup, *destination_disk, destination_path, write_settings);
+            disk->copyFile(root_path / path_in_backup, *destination_disk, destination_path, read_settings, write_settings);
             return; /// copied!
         }
     }
@@ -119,7 +119,7 @@ void BackupWriterDisk::copyFileFromDisk(const String & path_in_backup, DiskPtr s
             LOG_TRACE(log, "Copying file {} from disk {} to disk {}", src_path, src_disk->getName(), disk->getName());
             auto dest_file_path = root_path / path_in_backup;
             disk->createDirectories(dest_file_path.parent_path());
-            src_disk->copyFile(src_path, *disk, dest_file_path, write_settings);
+            src_disk->copyFile(src_path, *disk, dest_file_path, read_settings, write_settings);
             return; /// copied!
         }
     }
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index ef820784bdf..5b08683b157 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -170,6 +170,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                 /* dest_bucket= */ blob_path[1],
                 /* dest_key= */ blob_path[0],
                 request_settings,
+                read_settings,
                 object_attributes,
                 threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupReaderS3"),
                 /* for_disk_s3= */ true);
@@ -230,6 +231,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
                 s3_uri.bucket,
                 fs::path(s3_uri.key) / path_in_backup,
                 request_settings,
+                read_settings,
                 {},
                 threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
             return; /// copied!
diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp
index ca7cbf443f2..7bc7c1c7dc4 100644
--- a/src/Disks/DiskEncrypted.cpp
+++ b/src/Disks/DiskEncrypted.cpp
@@ -324,7 +324,7 @@ ReservationPtr DiskEncrypted::reserve(UInt64 bytes)
 }
 
 
-void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const WriteSettings & settings)
+void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings)
 {
     /// Check if we can copy the file without deciphering.
     if (isSameDiskType(*this, *to_disk))
@@ -340,14 +340,14 @@ void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::sha
                 auto wrapped_from_path = wrappedPath(from_dir);
                 auto to_delegate = to_disk_enc->delegate;
                 auto wrapped_to_path = to_disk_enc->wrappedPath(to_dir);
-                delegate->copyDirectoryContent(wrapped_from_path, to_delegate, wrapped_to_path, settings);
+                delegate->copyDirectoryContent(wrapped_from_path, to_delegate, wrapped_to_path, read_settings, write_settings);
                 return;
             }
         }
     }
 
     /// Copy the file through buffers with deciphering.
-    IDisk::copyDirectoryContent(from_dir, to_disk, to_dir, settings);
+    IDisk::copyDirectoryContent(from_dir, to_disk, to_dir, read_settings, write_settings);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h
index 2252e4f43f5..8b4461a8dee 100644
--- a/src/Disks/DiskEncrypted.h
+++ b/src/Disks/DiskEncrypted.h
@@ -112,7 +112,7 @@ public:
         delegate->listFiles(wrapped_path, file_names);
     }
 
-    void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const WriteSettings & settings) override;
+    void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings) override;
 
     std::unique_ptr<ReadBufferFromFileBase> readFile(
         const String & path,
diff --git a/src/Disks/DiskEncryptedTransaction.cpp b/src/Disks/DiskEncryptedTransaction.cpp
index 3fd2085f9cc..daeab7aae6c 100644
--- a/src/Disks/DiskEncryptedTransaction.cpp
+++ b/src/Disks/DiskEncryptedTransaction.cpp
@@ -53,11 +53,11 @@ String DiskEncryptedSettings::findKeyByFingerprint(UInt128 key_fingerprint, cons
     return it->second;
 }
 
-void DiskEncryptedTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings)
+void DiskEncryptedTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path, const ReadSettings & read_settings, const WriteSettings & write_settings)
 {
     auto wrapped_from_path = wrappedPath(from_file_path);
     auto wrapped_to_path = wrappedPath(to_file_path);
-    delegate_transaction->copyFile(wrapped_from_path, wrapped_to_path, settings);
+    delegate_transaction->copyFile(wrapped_from_path, wrapped_to_path, read_settings, write_settings);
 }
 
 std::unique_ptr<WriteBufferFromFileBase> DiskEncryptedTransaction::writeFile( // NOLINT
diff --git a/src/Disks/DiskEncryptedTransaction.h b/src/Disks/DiskEncryptedTransaction.h
index 70ed1f469ef..6cb2941cc11 100644
--- a/src/Disks/DiskEncryptedTransaction.h
+++ b/src/Disks/DiskEncryptedTransaction.h
@@ -116,7 +116,7 @@ public:
     /// but it's impossible to implement correctly in transactions because other disk can
     /// use different metadata storage.
     /// TODO: maybe remove it at all, we don't want copies
-    void copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings) override;
+    void copyFile(const std::string & from_file_path, const std::string & to_file_path, const ReadSettings & read_settings, const WriteSettings & write_settings) override;
 
     /// Open the file for write and return WriteBufferFromFileBase object.
     std::unique_ptr<WriteBufferFromFileBase> writeFile( /// NOLINT
diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index aaa22655f7b..c71f6f81de2 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -432,13 +432,13 @@ bool inline isSameDiskType(const IDisk & one, const IDisk & another)
     return typeid(one) == typeid(another);
 }
 
-void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const WriteSettings & settings)
+void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings)
 {
     /// If throttling was configured we cannot use copying directly.
-    if (isSameDiskType(*this, *to_disk) && !settings.local_throttler)
+    if (isSameDiskType(*this, *to_disk) && !read_settings.local_throttler && !write_settings.local_throttler)
         fs::copy(fs::path(disk_path) / from_dir, fs::path(to_disk->getPath()) / to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
     else
-        IDisk::copyDirectoryContent(from_dir, to_disk, to_dir, settings);
+        IDisk::copyDirectoryContent(from_dir, to_disk, to_dir, read_settings, write_settings);
 }
 
 SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index 197f6bb9367..c52c192d824 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -65,7 +65,7 @@ public:
 
     void replaceFile(const String & from_path, const String & to_path) override;
 
-    void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const WriteSettings & settings) override;
+    void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings) override;
 
     void listFiles(const String & path, std::vector<String> & file_names) const override;
 
diff --git a/src/Disks/FakeDiskTransaction.h b/src/Disks/FakeDiskTransaction.h
index 440ee6271e9..f83642eee56 100644
--- a/src/Disks/FakeDiskTransaction.h
+++ b/src/Disks/FakeDiskTransaction.h
@@ -54,9 +54,9 @@ public:
         disk.replaceFile(from_path, to_path);
     }
 
-    void copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings) override
+    void copyFile(const std::string & from_file_path, const std::string & to_file_path, const ReadSettings & read_settings, const WriteSettings & write_settings) override
     {
-        disk.copyFile(from_file_path, disk, to_file_path, settings);
+        disk.copyFile(from_file_path, disk, to_file_path, read_settings, write_settings);
     }
 
     std::unique_ptr<WriteBufferFromFileBase> writeFile( /// NOLINT
diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp
index 5b9f1208622..1997ce06990 100644
--- a/src/Disks/IDisk.cpp
+++ b/src/Disks/IDisk.cpp
@@ -24,13 +24,13 @@ bool IDisk::isDirectoryEmpty(const String & path) const
     return !iterateDirectory(path)->isValid();
 }
 
-void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path, const WriteSettings & settings) /// NOLINT
+void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path, const ReadSettings & read_settings, const WriteSettings & write_settings) /// NOLINT
 {
     LOG_DEBUG(&Poco::Logger::get("IDisk"), "Copying from {} (path: {}) {} to {} (path: {}) {}.",
               getName(), getPath(), from_file_path, to_disk.getName(), to_disk.getPath(), to_file_path);
 
-    auto in = readFile(from_file_path);
-    auto out = to_disk.writeFile(to_file_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings);
+    auto in = readFile(from_file_path, read_settings);
+    auto out = to_disk.writeFile(to_file_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, write_settings);
     copyData(*in, *out);
     out->finalize();
 }
@@ -80,7 +80,7 @@ UInt128 IDisk::getEncryptedFileIV(const String &) const
 
 using ResultsCollector = std::vector<std::future<void>>;
 
-void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, ThreadPool & pool, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings)
+void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, ThreadPool & pool, ResultsCollector & results, bool copy_root_dir, const ReadSettings & read_settings, const WriteSettings & write_settings)
 {
     if (from_disk.isFile(from_path))
     {
@@ -88,7 +88,7 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
         auto future = promise->get_future();
 
         pool.scheduleOrThrowOnError(
-            [&from_disk, from_path, &to_disk, to_path, &settings, promise, thread_group = CurrentThread::getGroup()]()
+            [&from_disk, from_path, &to_disk, to_path, &read_settings, &write_settings, promise, thread_group = CurrentThread::getGroup()]()
             {
                 try
                 {
@@ -97,7 +97,7 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
                     if (thread_group)
                         CurrentThread::attachToGroup(thread_group);
 
-                    from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings);
+                    from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), read_settings, write_settings);
                     promise->set_value();
                 }
                 catch (...)
@@ -119,19 +119,19 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
         }
 
         for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next())
-            asyncCopy(from_disk, it->path(), to_disk, dest, pool, results, true, settings);
+            asyncCopy(from_disk, it->path(), to_disk, dest, pool, results, true, read_settings, write_settings);
     }
 }
 
-void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir, WriteSettings settings)
+void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir, const ReadSettings & read_settings, WriteSettings write_settings)
 {
     ResultsCollector results;
 
     /// Disable parallel write. We already copy in parallel.
     /// Avoid high memory usage. See test_s3_zero_copy_ttl/test.py::test_move_and_s3_memory_usage
-    settings.s3_allow_parallel_part_upload = false;
+    write_settings.s3_allow_parallel_part_upload = false;
 
-    asyncCopy(*this, from_path, *to_disk, to_path, copying_thread_pool, results, copy_root_dir, settings);
+    asyncCopy(*this, from_path, *to_disk, to_path, copying_thread_pool, results, copy_root_dir, read_settings, write_settings);
 
     for (auto & result : results)
         result.wait();
@@ -140,12 +140,12 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<I
 }
 
 
-void IDisk::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const WriteSettings & settings)
+void IDisk::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings)
 {
     if (!to_disk->exists(to_dir))
         to_disk->createDirectories(to_dir);
 
-    copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir= */ false, settings);
+    copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir= */ false, read_settings, write_settings);
 }
 
 void IDisk::truncateFile(const String &, size_t)
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index fc4eaec428c..bfb418e1c5e 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -193,14 +193,15 @@ public:
     virtual void replaceFile(const String & from_path, const String & to_path) = 0;
 
     /// Recursively copy files from from_dir to to_dir. Create to_dir if not exists.
-    virtual void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const WriteSettings & settings);
+    virtual void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir, const ReadSettings & read_settings, const WriteSettings & write_settings);
 
     /// Copy file `from_file_path` to `to_file_path` located at `to_disk`.
     virtual void copyFile( /// NOLINT
         const String & from_file_path,
         IDisk & to_disk,
         const String & to_file_path,
-        const WriteSettings & settings = {});
+        const ReadSettings & read_settings = {},
+        const WriteSettings & write_settings = {});
 
     /// List files at `path` and add their names to `file_names`
     virtual void listFiles(const String & path, std::vector<String> & file_names) const = 0;
@@ -470,7 +471,7 @@ protected:
     /// Base implementation of the function copy().
     /// It just opens two files, reads data by portions from the first file, and writes it to the second one.
     /// A derived class may override copy() to provide a faster implementation.
-    void copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir, WriteSettings settings);
+    void copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir, const ReadSettings & read_settings, WriteSettings write_settings);
 
     virtual void checkAccessImpl(const String & path);
 
diff --git a/src/Disks/IDiskTransaction.h b/src/Disks/IDiskTransaction.h
index 9f18206a4ad..975c41cb70b 100644
--- a/src/Disks/IDiskTransaction.h
+++ b/src/Disks/IDiskTransaction.h
@@ -59,7 +59,11 @@ public:
     /// but it's impossible to implement correctly in transactions because other disk can
     /// use different metadata storage.
     /// TODO: maybe remove it at all, we don't want copies
-    virtual void copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings = {}) = 0;
+    virtual void copyFile(
+        const std::string & from_file_path,
+        const std::string & to_file_path,
+        const ReadSettings & read_settings = {},
+        const WriteSettings & write_settings = {}) = 0;
 
     /// Open the file for write and return WriteBufferFromFileBase object.
     virtual std::unique_ptr<WriteBufferFromFileBase> writeFile( /// NOLINT
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index f76fbd45736..73be834c1bb 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -357,6 +357,8 @@ ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) c
 void AzureObjectStorage::copyObject( /// NOLINT
     const StoredObject & object_from,
     const StoredObject & object_to,
+    const ReadSettings &,
+    const WriteSettings &,
     std::optional<ObjectAttributes> object_to_attributes)
 {
     auto client_ptr = client.get();
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index b5f81cef235..5436860818c 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -100,6 +100,8 @@ public:
     void copyObject( /// NOLINT
         const StoredObject & object_from,
         const StoredObject & object_to,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         std::optional<ObjectAttributes> object_to_attributes = {}) override;
 
     void shutdown() override {}
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
index 0da572a06ab..d94c26f27e8 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@@ -160,16 +160,22 @@ void CachedObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
 void CachedObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
     const StoredObject & object_from,
     const StoredObject & object_to,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
     IObjectStorage & object_storage_to,
     std::optional<ObjectAttributes> object_to_attributes)
 {
-    object_storage->copyObjectToAnotherObjectStorage(object_from, object_to, object_storage_to, object_to_attributes);
+    object_storage->copyObjectToAnotherObjectStorage(object_from, object_to, read_settings, write_settings, object_storage_to, object_to_attributes);
 }
 
 void CachedObjectStorage::copyObject( // NOLINT
-    const StoredObject & object_from, const StoredObject & object_to, std::optional<ObjectAttributes> object_to_attributes)
+    const StoredObject & object_from,
+    const StoredObject & object_to,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
+    std::optional<ObjectAttributes> object_to_attributes)
 {
-    object_storage->copyObject(object_from, object_to, object_to_attributes);
+    object_storage->copyObject(object_from, object_to, read_settings, write_settings, object_to_attributes);
 }
 
 std::unique_ptr<IObjectStorage> CachedObjectStorage::cloneObjectStorage(
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index 76f16c9d930..925abbc6932 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -57,11 +57,15 @@ public:
     void copyObject( /// NOLINT
         const StoredObject & object_from,
         const StoredObject & object_to,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         std::optional<ObjectAttributes> object_to_attributes = {}) override;
 
     void copyObjectToAnotherObjectStorage( /// NOLINT
         const StoredObject & object_from,
         const StoredObject & object_to,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         IObjectStorage & object_storage_to,
         std::optional<ObjectAttributes> object_to_attributes = {}) override;
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index 466a1d3d5dd..734482ae851 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -68,7 +68,7 @@ DiskObjectStorage::DiskObjectStorage(
     , send_metadata(config.getBool(config_prefix + ".send_metadata", false))
     , read_resource_name(config.getString(config_prefix + ".read_resource", ""))
     , write_resource_name(config.getString(config_prefix + ".write_resource", ""))
-    , metadata_helper(std::make_unique<DiskObjectStorageRemoteMetadataRestoreHelper>(this, ReadSettings{}))
+    , metadata_helper(std::make_unique<DiskObjectStorageRemoteMetadataRestoreHelper>(this, ReadSettings{}, WriteSettings{}))
 {}
 
 StoredObjects DiskObjectStorage::getStorageObjects(const String & local_path) const
@@ -180,7 +180,8 @@ void DiskObjectStorage::copyFile( /// NOLINT
     const String & from_file_path,
     IDisk & to_disk,
     const String & to_file_path,
-    const WriteSettings & settings)
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings)
 {
     if (this == &to_disk)
     {
@@ -192,7 +193,7 @@ void DiskObjectStorage::copyFile( /// NOLINT
     else
     {
         /// Copy through buffers
-        IDisk::copyFile(from_file_path, to_disk, to_file_path, settings);
+        IDisk::copyFile(from_file_path, to_disk, to_file_path, read_settings, write_settings);
     }
 }
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h
index 72103edd77e..ccd7e807513 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.h
@@ -162,7 +162,8 @@ public:
         const String & from_file_path,
         IDisk & to_disk,
         const String & to_file_path,
-        const WriteSettings & settings = {}) override;
+        const ReadSettings & read_settings = {},
+        const WriteSettings & write_settings = {}) override;
 
     void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override;
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
index bbcdd40d85f..91e15547068 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
@@ -84,7 +84,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::saveSchemaVersion(const int &
 {
     StoredObject object{fs::path(disk->object_storage_root_path) / SCHEMA_VERSION_OBJECT};
 
-    auto buf = disk->object_storage->writeObject(object, WriteMode::Rewrite);
+    auto buf = disk->object_storage->writeObject(object, WriteMode::Rewrite, /* attributes= */ {}, /* buf_size= */ DBMS_DEFAULT_BUFFER_SIZE, write_settings);
     writeIntText(version, *buf);
     buf->finalize();
 
@@ -93,7 +93,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::saveSchemaVersion(const int &
 void DiskObjectStorageRemoteMetadataRestoreHelper::updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const
 {
     StoredObject object{key};
-    disk->object_storage->copyObject(object, object, metadata);
+    disk->object_storage->copyObject(object, object, read_settings, write_settings, metadata);
 }
 
 void DiskObjectStorageRemoteMetadataRestoreHelper::migrateFileToRestorableSchema(const String & path) const
@@ -434,7 +434,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles(
 
         /// Copy object if we restore to different bucket / path.
         if (source_object_storage->getObjectsNamespace() != disk->object_storage->getObjectsNamespace() || disk->object_storage_root_path != source_path)
-            source_object_storage->copyObjectToAnotherObjectStorage(object_from, object_to, *disk->object_storage);
+            source_object_storage->copyObjectToAnotherObjectStorage(object_from, object_to, read_settings, write_settings, *disk->object_storage);
 
         auto tx = disk->metadata_storage->createTransaction();
         tx->addBlobToMetadata(path, relative_key, meta.size_bytes);
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h
index e7de4afcaf3..ee81e8a209e 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h
@@ -24,9 +24,10 @@ public:
     static constexpr UInt64 LATEST_REVISION = std::numeric_limits<UInt64>::max();
     static constexpr UInt64 UNKNOWN_REVISION = 0;
 
-    DiskObjectStorageRemoteMetadataRestoreHelper(DiskObjectStorage * disk_, ReadSettings read_settings_)
+    DiskObjectStorageRemoteMetadataRestoreHelper(DiskObjectStorage * disk_, ReadSettings read_settings_, WriteSettings write_settings_)
         : disk(disk_)
         , read_settings(std::move(read_settings_))
+        , write_settings(std::move(write_settings_))
         , operation_log_suffix("-" + getFQDNOrHostName())
     {
     }
@@ -94,6 +95,7 @@ private:
     ObjectStoragePtr object_storage_from_another_namespace;
 
     ReadSettings read_settings;
+    WriteSettings write_settings;
 
     String operation_log_suffix;
 };
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index fd01caacd25..99cbd234e08 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -1,6 +1,7 @@
 #include <Disks/ObjectStorages/DiskObjectStorageTransaction.h>
 #include <Disks/ObjectStorages/DiskObjectStorage.h>
 #include <Disks/IO/WriteBufferWithFinalizeCallback.h>
+#include <Interpreters/Context.h>
 #include <Common/checkStackSize.h>
 #include <ranges>
 #include <Common/logger_useful.h>
@@ -474,6 +475,9 @@ struct WriteFileObjectStorageOperation final : public IDiskObjectStorageOperatio
 
 struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation
 {
+    ReadSettings read_settings;
+    WriteSettings write_settings;
+
     /// Local paths
     std::string from_path;
     std::string to_path;
@@ -483,9 +487,13 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation
     CopyFileObjectStorageOperation(
         IObjectStorage & object_storage_,
         IMetadataStorage & metadata_storage_,
+        const ReadSettings & read_settings_,
+        const WriteSettings & write_settings_,
         const std::string & from_path_,
         const std::string & to_path_)
         : IDiskObjectStorageOperation(object_storage_, metadata_storage_)
+        , read_settings(read_settings_)
+        , write_settings(write_settings_)
         , from_path(from_path_)
         , to_path(to_path_)
     {}
@@ -505,7 +513,7 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation
             std::string blob_name = object_storage.generateBlobNameForPath(to_path);
             auto object_to = StoredObject(fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
 
-            object_storage.copyObject(object_from, object_to);
+            object_storage.copyObject(object_from, object_to, read_settings, write_settings);
 
             tx->addBlobToMetadata(to_path, blob_name, object_from.bytes_size);
 
@@ -810,13 +818,10 @@ void DiskObjectStorageTransaction::createFile(const std::string & path)
         }));
 }
 
-void DiskObjectStorageTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings)
+void DiskObjectStorageTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path, const ReadSettings & read_settings, const WriteSettings & write_settings)
 {
-    /// NOTE: For native copy we can ignore throttling, so no need to use WriteSettings
-    UNUSED(settings);
-
     operations_to_execute.emplace_back(
-        std::make_unique<CopyFileObjectStorageOperation>(object_storage, metadata_storage, from_file_path, to_file_path));
+        std::make_unique<CopyFileObjectStorageOperation>(object_storage, metadata_storage, read_settings, write_settings, from_file_path, to_file_path));
 }
 
 void DiskObjectStorageTransaction::commit()
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h
index 8ce10dad212..4b62a41e161 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h
@@ -86,7 +86,7 @@ public:
 
     void createFile(const String & path) override;
 
-    void copyFile(const std::string & from_file_path, const std::string & to_file_path, const WriteSettings & settings) override;
+    void copyFile(const std::string & from_file_path, const std::string & to_file_path, const ReadSettings & read_settings, const WriteSettings &) override;
 
     /// writeFile is a difficult function for transactions.
     /// Now it's almost noop because metadata added to transaction in finalize method
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index 60230ce2fb0..5eca98aa494 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -133,6 +133,8 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const
 void HDFSObjectStorage::copyObject( /// NOLINT
     const StoredObject & object_from,
     const StoredObject & object_to,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
     std::optional<ObjectAttributes> object_to_attributes)
 {
     if (object_to_attributes.has_value())
@@ -140,8 +142,8 @@ void HDFSObjectStorage::copyObject( /// NOLINT
             ErrorCodes::UNSUPPORTED_METHOD,
             "HDFS API doesn't support custom attributes/metadata for stored objects");
 
-    auto in = readObject(object_from);
-    auto out = writeObject(object_to, WriteMode::Rewrite);
+    auto in = readObject(object_from, read_settings);
+    auto out = writeObject(object_to, WriteMode::Rewrite, /* attributes= */ {}, /* buf_size= */ DBMS_DEFAULT_BUFFER_SIZE, write_settings);
     copyData(*in, *out);
     out->finalize();
 }
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
index a691b089b43..8d770c12d8f 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@@ -98,6 +98,8 @@ public:
     void copyObject( /// NOLINT
         const StoredObject & object_from,
         const StoredObject & object_to,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         std::optional<ObjectAttributes> object_to_attributes = {}) override;
 
     void shutdown() override;
diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp
index ea22294224c..3c77de8f5b7 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/IObjectStorage.cpp
@@ -62,14 +62,16 @@ ThreadPool & IObjectStorage::getThreadPoolWriter()
 void IObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
     const StoredObject & object_from,
     const StoredObject & object_to,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
     IObjectStorage & object_storage_to,
     std::optional<ObjectAttributes> object_to_attributes)
 {
     if (&object_storage_to == this)
-        copyObject(object_from, object_to, object_to_attributes);
+        copyObject(object_from, object_to, read_settings, write_settings, object_to_attributes);
 
-    auto in = readObject(object_from);
-    auto out = object_storage_to.writeObject(object_to, WriteMode::Rewrite);
+    auto in = readObject(object_from, read_settings);
+    auto out = object_storage_to.writeObject(object_to, WriteMode::Rewrite, /* attributes= */ {}, /* buf_size= */ DBMS_DEFAULT_BUFFER_SIZE, write_settings);
     copyData(*in, *out);
     out->finalize();
 }
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 32f9d1ba764..032795b380f 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -131,6 +131,8 @@ public:
     virtual void copyObject( /// NOLINT
         const StoredObject & object_from,
         const StoredObject & object_to,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         std::optional<ObjectAttributes> object_to_attributes = {}) = 0;
 
     /// Copy object to another instance of object storage
@@ -139,6 +141,8 @@ public:
     virtual void copyObjectToAnotherObjectStorage( /// NOLINT
         const StoredObject & object_from,
         const StoredObject & object_to,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         IObjectStorage & object_storage_to,
         std::optional<ObjectAttributes> object_to_attributes = {});
 
diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
index 69ccf309096..cc53df956c6 100644
--- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
@@ -167,10 +167,14 @@ ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & /* path
 }
 
 void LocalObjectStorage::copyObject( // NOLINT
-    const StoredObject & object_from, const StoredObject & object_to, std::optional<ObjectAttributes> /* object_to_attributes */)
+    const StoredObject & object_from,
+    const StoredObject & object_to,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
+    std::optional<ObjectAttributes> /* object_to_attributes */)
 {
-    auto in = readObject(object_from);
-    auto out = writeObject(object_to, WriteMode::Rewrite);
+    auto in = readObject(object_from, read_settings);
+    auto out = writeObject(object_to, WriteMode::Rewrite, /* attributes= */ {}, /* buf_size= */ DBMS_DEFAULT_BUFFER_SIZE, write_settings);
     copyData(*in, *out);
     out->finalize();
 }
diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
index 630320ab7f9..aa3a68731e4 100644
--- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
@@ -57,6 +57,8 @@ public:
     void copyObject( /// NOLINT
         const StoredObject & object_from,
         const StoredObject & object_to,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         std::optional<ObjectAttributes> object_to_attributes = {}) override;
 
     void shutdown() override;
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 0d9670efebe..8f020e0d1ac 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -425,6 +425,8 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons
 void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
     const StoredObject & object_from,
     const StoredObject & object_to,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
     IObjectStorage & object_storage_to,
     std::optional<ObjectAttributes> object_to_attributes)
 {
@@ -435,24 +437,48 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
         auto settings_ptr = s3_settings.get();
         auto size = S3::getObjectSize(*clients_->client, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
         auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
-        copyS3File(clients_->client, clients_->client_with_long_timeout, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path,
-                   settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true);
+        copyS3File(clients_->client,
+            clients_->client_with_long_timeout,
+            bucket,
+            object_from.remote_path,
+            0,
+            size,
+            dest_s3->bucket,
+            object_to.remote_path,
+            settings_ptr->request_settings,
+            patchSettings(read_settings),
+            object_to_attributes,
+            scheduler,
+            /* for_disk_s3= */ true);
     }
     else
-    {
-        IObjectStorage::copyObjectToAnotherObjectStorage(object_from, object_to, object_storage_to, object_to_attributes);
-    }
+        IObjectStorage::copyObjectToAnotherObjectStorage(object_from, object_to, read_settings, write_settings, object_storage_to, object_to_attributes);
 }
 
 void S3ObjectStorage::copyObject( // NOLINT
-    const StoredObject & object_from, const StoredObject & object_to, std::optional<ObjectAttributes> object_to_attributes)
+    const StoredObject & object_from,
+    const StoredObject & object_to,
+    const ReadSettings & read_settings,
+    const WriteSettings &,
+    std::optional<ObjectAttributes> object_to_attributes)
 {
     auto clients_ = clients.get();
     auto settings_ptr = s3_settings.get();
     auto size = S3::getObjectSize(*clients_->client, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
     auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
-    copyS3File(clients_->client, clients_->client_with_long_timeout, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path,
-               settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true);
+    copyS3File(clients_->client,
+        clients_->client_with_long_timeout,
+        bucket,
+        object_from.remote_path,
+        0,
+        size,
+        bucket,
+        object_to.remote_path,
+        settings_ptr->request_settings,
+        patchSettings(read_settings),
+        object_to_attributes,
+        scheduler,
+        /* for_disk_s3= */ true);
 }
 
 void S3ObjectStorage::setNewSettings(std::unique_ptr<S3ObjectStorageSettings> && s3_settings_)
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 527b1479d89..6e516b39c88 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -135,11 +135,15 @@ public:
     void copyObject( /// NOLINT
         const StoredObject & object_from,
         const StoredObject & object_to,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         std::optional<ObjectAttributes> object_to_attributes = {}) override;
 
     void copyObjectToAnotherObjectStorage( /// NOLINT
         const StoredObject & object_from,
         const StoredObject & object_to,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         IObjectStorage & object_storage_to,
         std::optional<ObjectAttributes> object_to_attributes = {}) override;
 
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
index 45b183c15f2..ea05012fb61 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@@ -264,7 +264,7 @@ void WebObjectStorage::removeObjectsIfExist(const StoredObjects &)
     throwNotAllowed();
 }
 
-void WebObjectStorage::copyObject(const StoredObject &, const StoredObject &, std::optional<ObjectAttributes>) // NOLINT
+void WebObjectStorage::copyObject(const StoredObject &, const StoredObject &, const ReadSettings &, const WriteSettings &, std::optional<ObjectAttributes>) // NOLINT
 {
     throwNotAllowed();
 }
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
index 1a21d94e230..089bdb99e71 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
@@ -68,6 +68,8 @@ public:
     void copyObject( /// NOLINT
         const StoredObject & object_from,
         const StoredObject & object_to,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         std::optional<ObjectAttributes> object_to_attributes = {}) override;
 
     void shutdown() override;
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index 002b8dde566..a16a1a41505 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -610,6 +610,7 @@ namespace
             const String & dest_bucket_,
             const String & dest_key_,
             const S3Settings::RequestSettings & request_settings_,
+            const ReadSettings & read_settings_,
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunner<void> schedule_,
             bool for_disk_s3_)
@@ -619,6 +620,7 @@ namespace
             , offset(src_offset_)
             , size(src_size_)
             , supports_multipart_copy(client_ptr_->supportsMultiPartCopy())
+            , read_settings(read_settings_)
         {
         }
 
@@ -639,12 +641,13 @@ namespace
         size_t offset;
         size_t size;
         bool supports_multipart_copy;
+        const ReadSettings read_settings;
 
         CreateReadBuffer getSourceObjectReadBuffer()
         {
             return [&]
             {
-                return std::make_unique<ReadBufferFromS3>(client_ptr, src_bucket, src_key, "", request_settings, Context::getGlobalContextInstance()->getReadSettings());
+                return std::make_unique<ReadBufferFromS3>(client_ptr, src_bucket, src_key, "", request_settings, read_settings);
             };
         }
 
@@ -826,20 +829,21 @@ void copyS3File(
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
+    const ReadSettings & read_settings,
     const std::optional<std::map<String, String>> & object_metadata,
     ThreadPoolCallbackRunner<void> schedule,
     bool for_disk_s3)
 {
     if (settings.allow_native_copy)
     {
-        CopyFileHelper helper{s3_client, s3_client_with_long_timeout, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3};
+        CopyFileHelper helper{s3_client, s3_client_with_long_timeout, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3};
         helper.performCopy();
     }
     else
     {
         auto create_read_buffer = [&]
         {
-            return std::make_unique<ReadBufferFromS3>(s3_client, src_bucket, src_key, "", settings, Context::getGlobalContextInstance()->getReadSettings());
+            return std::make_unique<ReadBufferFromS3>(s3_client, src_bucket, src_key, "", settings, read_settings);
         };
         copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, s3_client_with_long_timeout, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3);
     }
diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h
index 3477f5a20ab..1bcbfd7735e 100644
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@@ -31,6 +31,8 @@ using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 /// CompleteMultipartUpload requests. These requests need longer timeout because S3 servers often
 /// block on them for multiple seconds without sending or receiving data from us (maybe the servers
 /// are copying data internally, or maybe throttling, idk).
+///
+/// read_settings - is used for throttling in case of native copy is not possible
 void copyS3File(
     const std::shared_ptr<const S3::Client> & s3_client,
     const std::shared_ptr<const S3::Client> & s3_client_with_long_timeout,
@@ -41,6 +43,7 @@ void copyS3File(
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
+    const ReadSettings & read_settings,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunner<void> schedule_ = {},
     bool for_disk_s3 = false);
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
index 27d8991bd62..7fc8187aee5 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
@@ -416,7 +416,8 @@ void DataPartStorageOnDiskBase::backup(
 MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze(
     const std::string & to,
     const std::string & dir_path,
-    const WriteSettings & settings,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
     std::function<void(const DiskPtr &)> save_metadata_callback,
     const ClonePartParams & params) const
 {
@@ -430,7 +431,8 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze(
         disk,
         getRelativePath(),
         fs::path(to) / dir_path,
-        settings,
+        read_settings,
+        write_settings,
         params.make_source_readonly,
         /* max_level= */ {},
         params.copy_instead_of_hardlink,
@@ -466,6 +468,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart(
     const std::string & to,
     const std::string & dir_path,
     const DiskPtr & dst_disk,
+    const ReadSettings & read_settings,
     const WriteSettings & write_settings,
     Poco::Logger * log) const
 {
@@ -482,7 +485,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart(
     try
     {
         dst_disk->createDirectories(to);
-        src_disk->copyDirectoryContent(getRelativePath(), dst_disk, path_to_clone, write_settings);
+        src_disk->copyDirectoryContent(getRelativePath(), dst_disk, path_to_clone, read_settings, write_settings);
     }
     catch (...)
     {
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
index 0adf048b56a..1826e84c28d 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
@@ -63,7 +63,8 @@ public:
     MutableDataPartStoragePtr freeze(
         const std::string & to,
         const std::string & dir_path,
-        const WriteSettings & settings,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         std::function<void(const DiskPtr &)> save_metadata_callback,
         const ClonePartParams & params) const override;
 
@@ -71,6 +72,7 @@ public:
         const std::string & to,
         const std::string & dir_path,
         const DiskPtr & dst_disk,
+        const ReadSettings & read_settings,
         const WriteSettings & write_settings,
         Poco::Logger * log) const override;
 
diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h
index c76b17f3370..072cb29626e 100644
--- a/src/Storages/MergeTree/IDataPartStorage.h
+++ b/src/Storages/MergeTree/IDataPartStorage.h
@@ -250,7 +250,8 @@ public:
     virtual std::shared_ptr<IDataPartStorage> freeze(
         const std::string & to,
         const std::string & dir_path,
-        const WriteSettings & settings,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         std::function<void(const DiskPtr &)> save_metadata_callback,
         const ClonePartParams & params) const = 0;
 
@@ -259,6 +260,7 @@ public:
         const std::string & to,
         const std::string & dir_path,
         const DiskPtr & disk,
+        const ReadSettings & read_settings,
         const WriteSettings & write_settings,
         Poco::Logger * log) const = 0;
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 54a169fc779..dc387496371 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1793,12 +1793,13 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix
     return getDataPartStorage().freeze(
         storage.relative_data_path,
         *maybe_path_in_detached,
+        Context::getGlobalContextInstance()->getReadSettings(),
         Context::getGlobalContextInstance()->getWriteSettings(),
         /* save_metadata_callback= */ {},
         params);
 }
 
-MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name, const WriteSettings & write_settings) const
+MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name, const ReadSettings & read_settings, const WriteSettings & write_settings) const
 {
     assertOnDisk();
 
@@ -1808,7 +1809,7 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not clone data part {} to empty directory.", name);
 
     String path_to_clone = fs::path(storage.relative_data_path) / directory_name / "";
-    return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, write_settings, storage.log);
+    return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, read_settings, write_settings, storage.log);
 }
 
 UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 02c838458f9..c30accbc1ba 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -377,7 +377,7 @@ public:
                                                    const DiskTransactionPtr & disk_transaction) const;
 
     /// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk
-    MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name, const WriteSettings & write_settings) const;
+    MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name, const ReadSettings & read_settings, const WriteSettings & write_settings) const;
 
     /// Checks that .bin and .mrk files exist.
     ///
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 14c9961f6c3..26d110f5510 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -4968,7 +4968,7 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String &
             throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on disk '{}'", partition_id, disk->getName());
     }
 
-    MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast<Space>(disk), local_context->getWriteSettings());
+    MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast<Space>(disk), local_context->getReadSettings(), local_context->getWriteSettings());
     switch (moves_outcome)
     {
         case MovePartsOutcome::MovesAreCancelled:
@@ -5031,7 +5031,7 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
             throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on volume '{}'", partition_id, volume->getName());
     }
 
-    MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast<Space>(volume), local_context->getWriteSettings());
+    MovePartsOutcome moves_outcome = movePartsToSpace(parts, std::static_pointer_cast<Space>(volume), local_context->getReadSettings(), local_context->getWriteSettings());
     switch (moves_outcome)
     {
         case MovePartsOutcome::MovesAreCancelled:
@@ -7488,6 +7488,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
     const MergeTreePartInfo & dst_part_info,
     const StorageMetadataPtr & metadata_snapshot,
     const IDataPartStorage::ClonePartParams & params,
+    const ReadSettings & read_settings,
     const WriteSettings & write_settings)
 {
     /// Check that the storage policy contains the disk where the src_part is located.
@@ -7545,6 +7546,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
     auto dst_part_storage = src_part_storage->freeze(
         relative_data_path,
         tmp_dst_part_name,
+        read_settings,
         write_settings,
         /* save_metadata_callback= */ {},
         params);
@@ -7803,6 +7805,7 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(
         auto new_storage = data_part_storage->freeze(
             backup_part_path,
             part->getDataPartStorage().getPartDirectory(),
+            local_context->getReadSettings(),
             local_context->getWriteSettings(),
             callback,
             params);
@@ -8002,8 +8005,9 @@ bool MergeTreeData::scheduleDataMovingJob(BackgroundJobsAssignee & assignee)
     assignee.scheduleMoveTask(std::make_shared<ExecutableLambdaAdapter>(
         [this, moving_tagger] () mutable
         {
+            ReadSettings read_settings = Context::getGlobalContextInstance()->getReadSettings();
             WriteSettings write_settings = Context::getGlobalContextInstance()->getWriteSettings();
-            return moveParts(moving_tagger, write_settings, /* wait_for_move_if_zero_copy= */ false) == MovePartsOutcome::PartsMoved;
+            return moveParts(moving_tagger, read_settings, write_settings, /* wait_for_move_if_zero_copy= */ false) == MovePartsOutcome::PartsMoved;
         }, moves_assignee_trigger, getStorageID()));
     return true;
 }
@@ -8018,7 +8022,7 @@ bool MergeTreeData::areBackgroundMovesNeeded() const
     return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1;
 }
 
-MovePartsOutcome MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space, const WriteSettings & write_settings)
+MovePartsOutcome MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space, const ReadSettings & read_settings, const WriteSettings & write_settings)
 {
     if (parts_mover.moves_blocker.isCancelled())
         return MovePartsOutcome::MovesAreCancelled;
@@ -8027,7 +8031,7 @@ MovePartsOutcome MergeTreeData::movePartsToSpace(const DataPartsVector & parts,
     if (moving_tagger->parts_to_move.empty())
         return MovePartsOutcome::NothingToMove;
 
-    return moveParts(moving_tagger, write_settings, /* wait_for_move_if_zero_copy= */ true);
+    return moveParts(moving_tagger, read_settings, write_settings, /* wait_for_move_if_zero_copy= */ true);
 }
 
 MergeTreeData::CurrentlyMovingPartsTaggerPtr MergeTreeData::selectPartsForMove()
@@ -8082,7 +8086,7 @@ MergeTreeData::CurrentlyMovingPartsTaggerPtr MergeTreeData::checkPartsForMove(co
     return std::make_shared<CurrentlyMovingPartsTagger>(std::move(parts_to_move), *this);
 }
 
-MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger, const WriteSettings & write_settings, bool wait_for_move_if_zero_copy)
+MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger, const ReadSettings & read_settings, const WriteSettings & write_settings, bool wait_for_move_if_zero_copy)
 {
     LOG_INFO(log, "Got {} parts to move.", moving_tagger->parts_to_move.size());
 
@@ -8143,7 +8147,7 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr &
                     {
                         if (lock->isLocked())
                         {
-                            cloned_part = parts_mover.clonePart(moving_part, write_settings);
+                            cloned_part = parts_mover.clonePart(moving_part, read_settings, write_settings);
                             parts_mover.swapClonedPart(cloned_part);
                             break;
                         }
@@ -8170,7 +8174,7 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr &
             }
             else /// Ordinary move as it should be
             {
-                cloned_part = parts_mover.clonePart(moving_part, write_settings);
+                cloned_part = parts_mover.clonePart(moving_part, read_settings, write_settings);
                 parts_mover.swapClonedPart(cloned_part);
             }
             write_part_log({});
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 6f9779bde00..414fa493085 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -848,6 +848,7 @@ public:
         const MergeTreePartInfo & dst_part_info,
         const StorageMetadataPtr & metadata_snapshot,
         const IDataPartStorage::ClonePartParams & params,
+        const ReadSettings & read_settings,
         const WriteSettings & write_settings);
 
     virtual std::vector<MergeTreeMutationStatus> getMutationsStatus() const = 0;
@@ -1340,7 +1341,7 @@ protected:
     /// MergeTree because they store mutations in different way.
     virtual std::map<int64_t, MutationCommands> getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0;
     /// Moves part to specified space, used in ALTER ... MOVE ... queries
-    MovePartsOutcome movePartsToSpace(const DataPartsVector & parts, SpacePtr space, const WriteSettings & write_settings);
+    MovePartsOutcome movePartsToSpace(const DataPartsVector & parts, SpacePtr space, const ReadSettings & read_settings, const WriteSettings & write_settings);
 
     struct PartBackupEntries
     {
@@ -1494,7 +1495,7 @@ private:
     using CurrentlyMovingPartsTaggerPtr = std::shared_ptr<CurrentlyMovingPartsTagger>;
 
     /// Move selected parts to corresponding disks
-    MovePartsOutcome moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger, const WriteSettings & write_settings, bool wait_for_move_if_zero_copy);
+    MovePartsOutcome moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagger, const ReadSettings & read_settings, const WriteSettings & write_settings, bool wait_for_move_if_zero_copy);
 
     /// Select parts for move and disks for them. Used in background moving processes.
     CurrentlyMovingPartsTaggerPtr selectPartsForMove();
diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp
index 51e4cee19f8..f4dc6c8d042 100644
--- a/src/Storages/MergeTree/MergeTreePartsMover.cpp
+++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp
@@ -208,7 +208,7 @@ bool MergeTreePartsMover::selectPartsForMove(
         return false;
 }
 
-MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part, const WriteSettings & write_settings) const
+MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part, const ReadSettings & read_settings, const WriteSettings & write_settings) const
 {
     if (moves_blocker.isCancelled())
         throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts.");
@@ -249,12 +249,12 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
         {
             LOG_INFO(log, "Part {} was not fetched, we are the first who move it to another disk, so we will copy it", part->name);
             cloned_part_storage = part->getDataPartStorage().clonePart(
-                path_to_clone, part->getDataPartStorage().getPartDirectory(), disk, write_settings, log);
+                path_to_clone, part->getDataPartStorage().getPartDirectory(), disk, read_settings, write_settings, log);
         }
     }
     else
     {
-        cloned_part_storage = part->makeCloneOnDisk(disk, MergeTreeData::MOVING_DIR_NAME, write_settings);
+        cloned_part_storage = part->makeCloneOnDisk(disk, MergeTreeData::MOVING_DIR_NAME, read_settings, write_settings);
     }
 
     MergeTreeDataPartBuilder builder(*data, part->name, cloned_part_storage);
diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h
index 5dcc364a4e9..f172dade40e 100644
--- a/src/Storages/MergeTree/MergeTreePartsMover.h
+++ b/src/Storages/MergeTree/MergeTreePartsMover.h
@@ -65,7 +65,7 @@ public:
         const std::lock_guard<std::mutex> & moving_parts_lock);
 
     /// Copies part to selected reservation in detached folder. Throws exception if part already exists.
-    TemporaryClonedPart clonePart(const MergeTreeMoveEntry & moving_part, const WriteSettings & write_settings) const;
+    TemporaryClonedPart clonePart(const MergeTreeMoveEntry & moving_part, const ReadSettings & read_settings, const WriteSettings & write_settings) const;
 
     /// Replaces cloned part from detached directory into active data parts set.
     /// Replacing part changes state to DeleteOnDestroy and will be removed from disk after destructor of
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 808ece8dc82..15ca2b65731 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -1845,7 +1845,14 @@ bool MutateTask::prepare()
             .txn = ctx->txn, .hardlinked_files = &ctx->hardlinked_files,
             .files_to_copy_instead_of_hardlinks = std::move(files_to_copy_instead_of_hardlinks), .keep_metadata_version = true
         };
-        auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, clone_params, ctx->context->getWriteSettings());
+        auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(
+            ctx->source_part,
+            prefix,
+            ctx->future_part->part_info,
+            ctx->metadata_snapshot,
+            clone_params,
+            ctx->context->getReadSettings(),
+            ctx->context->getWriteSettings());
         part->getDataPartStorage().beginTransaction();
 
         ctx->temporary_directory_lock = std::move(lock);
diff --git a/src/Storages/MergeTree/localBackup.cpp b/src/Storages/MergeTree/localBackup.cpp
index 4c645a8628e..c84e13b167f 100644
--- a/src/Storages/MergeTree/localBackup.cpp
+++ b/src/Storages/MergeTree/localBackup.cpp
@@ -21,7 +21,8 @@ void localBackupImpl(
     IDiskTransaction * transaction,
     const String & source_path,
     const String & destination_path,
-    const WriteSettings & settings,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
     bool make_source_readonly,
     size_t level,
     std::optional<size_t> max_level,
@@ -56,13 +57,9 @@ void localBackupImpl(
             if (copy_instead_of_hardlinks || files_to_copy_instead_of_hardlinks.contains(it->name()))
             {
                 if (transaction)
-                {
-                    transaction->copyFile(source, destination, settings);
-                }
+                    transaction->copyFile(source, destination, read_settings, write_settings);
                 else
-                {
-                    disk->copyFile(source, *disk, destination, settings);
-                }
+                    disk->copyFile(source, *disk, destination, read_settings, write_settings);
             }
             else
             {
@@ -79,7 +76,8 @@ void localBackupImpl(
                 transaction,
                 source,
                 destination,
-                settings,
+                read_settings,
+                write_settings,
                 make_source_readonly,
                 level + 1,
                 max_level,
@@ -129,7 +127,8 @@ void localBackup(
     const DiskPtr & disk,
     const String & source_path,
     const String & destination_path,
-    const WriteSettings & settings,
+    const ReadSettings & read_settings,
+    const WriteSettings & write_settings,
     bool make_source_readonly,
     std::optional<size_t> max_level,
     bool copy_instead_of_hardlinks,
@@ -160,7 +159,8 @@ void localBackup(
                     disk_transaction.get(),
                     source_path,
                     destination_path,
-                    settings,
+                    read_settings,
+                    write_settings,
                     make_source_readonly,
                     /* level= */ 0,
                     max_level,
@@ -170,7 +170,7 @@ void localBackup(
             else if (copy_instead_of_hardlinks)
             {
                 CleanupOnFail cleanup([disk, destination_path]() { disk->removeRecursive(destination_path); });
-                disk->copyDirectoryContent(source_path, disk, destination_path, settings);
+                disk->copyDirectoryContent(source_path, disk, destination_path, read_settings, write_settings);
                 cleanup.success();
             }
             else
@@ -189,7 +189,8 @@ void localBackup(
                     disk_transaction.get(),
                     source_path,
                     destination_path,
-                    settings,
+                    read_settings,
+                    write_settings,
                     make_source_readonly,
                     /* level= */ 0,
                     max_level,
diff --git a/src/Storages/MergeTree/localBackup.h b/src/Storages/MergeTree/localBackup.h
index d9b7f3e8b0c..3490db9726e 100644
--- a/src/Storages/MergeTree/localBackup.h
+++ b/src/Storages/MergeTree/localBackup.h
@@ -28,7 +28,8 @@ struct WriteSettings;
         const DiskPtr & disk,
         const String & source_path,
         const String & destination_path,
-        const WriteSettings & settings,
+        const ReadSettings & read_settings,
+        const WriteSettings & write_settings,
         bool make_source_readonly = true,
         std::optional<size_t> max_level = {},
         bool copy_instead_of_hardlinks = false,
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 22700712829..694ad9a49f8 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -2043,7 +2043,14 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con
         MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level);
 
         IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()};
-        auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, my_metadata_snapshot, clone_params, local_context->getWriteSettings());
+        auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
+            src_part,
+            TMP_PREFIX,
+            dst_part_info,
+            my_metadata_snapshot,
+            clone_params,
+            local_context->getReadSettings(),
+            local_context->getWriteSettings());
         dst_parts.emplace_back(std::move(dst_part));
         dst_parts_locks.emplace_back(std::move(part_lock));
     }
@@ -2142,7 +2149,15 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const
         MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level);
 
         IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()};
-        auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params, local_context->getWriteSettings());
+        auto [dst_part, part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(
+            src_part,
+            TMP_PREFIX,
+            dst_part_info,
+            dest_metadata_snapshot,
+            clone_params,
+            local_context->getReadSettings(),
+            local_context->getWriteSettings()
+        );
         dst_parts.emplace_back(std::move(dst_part));
         dst_parts_locks.emplace_back(std::move(part_lock));
     }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 7c7e6dbd42c..276db7639b9 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2473,7 +2473,13 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
                 .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
             };
             auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk(
-                part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot, clone_params, getContext()->getWriteSettings());
+                part_desc->src_table_part,
+                TMP_PREFIX + "clone_",
+                part_desc->new_part_info,
+                metadata_snapshot,
+                clone_params,
+                getContext()->getReadSettings(),
+                getContext()->getWriteSettings());
             part_desc->res_part = std::move(res_part);
             part_desc->temporary_part_lock = std::move(temporary_part_lock);
         }
@@ -4568,7 +4574,14 @@ bool StorageReplicatedMergeTree::fetchPart(
         {
             chassert(!is_zero_copy_part(part_to_clone));
             IDataPartStorage::ClonePartParams clone_params{ .keep_metadata_version = true };
-            auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk(part_to_clone, "tmp_clone_", part_info, metadata_snapshot, clone_params, getContext()->getWriteSettings());
+            auto [cloned_part, lock] = cloneAndLoadDataPartOnSameDisk(
+                part_to_clone,
+                "tmp_clone_",
+                part_info,
+                metadata_snapshot,
+                clone_params,
+                getContext()->getReadSettings(),
+                getContext()->getWriteSettings());
             part_directory_lock = std::move(lock);
             return cloned_part;
         };
@@ -7656,7 +7669,14 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
                 .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport(),
                 .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
             };
-            auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, metadata_snapshot, clone_params, query_context->getWriteSettings());
+            auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk(
+                src_part,
+                TMP_PREFIX,
+                dst_part_info,
+                metadata_snapshot,
+                clone_params,
+                query_context->getReadSettings(),
+                query_context->getWriteSettings());
             src_parts.emplace_back(src_part);
             dst_parts.emplace_back(dst_part);
             dst_parts_locks.emplace_back(std::move(part_lock));
@@ -7896,7 +7916,14 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
                 .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport(),
                 .metadata_version_to_write = dest_metadata_snapshot->getMetadataVersion()
             };
-            auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(src_part, TMP_PREFIX, dst_part_info, dest_metadata_snapshot, clone_params, query_context->getWriteSettings());
+            auto [dst_part, dst_part_lock] = dest_table_storage->cloneAndLoadDataPartOnSameDisk(
+                src_part,
+                TMP_PREFIX,
+                dst_part_info,
+                dest_metadata_snapshot,
+                clone_params,
+                query_context->getReadSettings(),
+                query_context->getWriteSettings());
 
             src_parts.emplace_back(src_part);
             dst_parts.emplace_back(dst_part);
diff --git a/tests/queries/0_stateless/02844_max_backup_bandwidth_s3.reference b/tests/queries/0_stateless/02844_max_backup_bandwidth_s3.reference
new file mode 100644
index 00000000000..939eb45ce1b
--- /dev/null
+++ b/tests/queries/0_stateless/02844_max_backup_bandwidth_s3.reference
@@ -0,0 +1,2 @@
+native_copy	0
+no_native_copy	1
diff --git a/tests/queries/0_stateless/02844_max_backup_bandwidth_s3.sh b/tests/queries/0_stateless/02844_max_backup_bandwidth_s3.sh
new file mode 100755
index 00000000000..4650415c202
--- /dev/null
+++ b/tests/queries/0_stateless/02844_max_backup_bandwidth_s3.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+# Tag: no-fasttest - requires S3
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -nm -q "
+    drop table if exists data;
+    create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, disk='s3_disk';
+    -- reading 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds
+    insert into data select * from numbers(1e6);
+"
+
+query_id=$(random_str 10)
+$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data/backup2') SETTINGS allow_s3_native_copy=1" --max_backup_bandwidth=1M > /dev/null
+$CLICKHOUSE_CLIENT -nm -q "
+    SYSTEM FLUSH LOGS;
+    SELECT
+        'native_copy',
+        query_duration_ms >= 7e3
+    FROM system.query_log
+    WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart'
+"
+
+query_id=$(random_str 10)
+$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data/backup3') SETTINGS allow_s3_native_copy=0" --max_backup_bandwidth=1M > /dev/null
+$CLICKHOUSE_CLIENT -nm -q "
+    SYSTEM FLUSH LOGS;
+    SELECT
+        'no_native_copy',
+        query_duration_ms >= 7e3
+    FROM system.query_log
+    WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart'
+"

From dd7096bc59c1bae48581e889f08f7ce1e8b121e4 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 20 Sep 2023 18:02:59 +0200
Subject: [PATCH 180/243] Speed up fast tests a little bit

---
 docker/test/fasttest/run.sh | 24 +-----------------------
 tests/ci/fast_test_check.py |  6 +++---
 2 files changed, 4 insertions(+), 26 deletions(-)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 296a132d3e3..81b3289d8c4 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -271,34 +271,12 @@ case "$stage" in
     ;&
 "clone_root")
     clone_root
-
-    # Pass control to the script from cloned sources, unless asked otherwise.
-    if ! [ -v FASTTEST_LOCAL_SCRIPT ]
-    then
-        # 'run' stage is deprecated, used for compatibility with old scripts.
-        # Replace with 'clone_submodules' after Nov 1, 2020.
-        # cd and CLICKHOUSE_DIR are also a setup for old scripts, remove as well.
-        # In modern script we undo it by changing back into workspace dir right
-        # away, see below. Remove that as well.
-        cd "$FASTTEST_SOURCE"
-        CLICKHOUSE_DIR=$(pwd)
-        export CLICKHOUSE_DIR
-        stage=run "$FASTTEST_SOURCE/docker/test/fasttest/run.sh"
-        exit $?
-    fi
-    ;&
-"run")
-    # A deprecated stage that is called by old script and equivalent to everything
-    # after cloning root, starting with cloning submodules.
     ;&
 "clone_submodules")
-    # Recover after being called from the old script that changes into source directory.
-    # See the compatibility hacks in `clone_root` stage above. Remove at the same time,
-    # after Nov 1, 2020.
-    cd "$FASTTEST_WORKSPACE"
     clone_submodules 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/submodule_log.txt"
     ;&
 "run_cmake")
+    cd "$FASTTEST_WORKSPACE"
     run_cmake
     ;&
 "build")
diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index f1bbf356fbb..43da7d98ef8 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -24,7 +24,7 @@ from commit_status_helper import (
     format_description,
 )
 from docker_pull_helper import get_image_with_version
-from env_helper import S3_BUILDS_BUCKET, TEMP_PATH
+from env_helper import S3_BUILDS_BUCKET, TEMP_PATH, REPO_COPY
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
 from report import TestResult, TestResults, read_test_results
@@ -50,6 +50,7 @@ def get_fasttest_cmd(workspace, output_path, repo_path, pr_number, commit_sha, i
         f"-e PULL_REQUEST_NUMBER={pr_number} -e COMMIT_SHA={commit_sha} "
         f"-e COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 "
         f"-e SCCACHE_BUCKET={S3_BUILDS_BUCKET} -e SCCACHE_S3_KEY_PREFIX=ccache/sccache "
+        "-e stage=clone_submodules "
         f"--volume={workspace}:/fasttest-workspace --volume={repo_path}:/ClickHouse "
         f"--volume={output_path}:/test_output {image}"
     )
@@ -128,8 +129,7 @@ def main():
     output_path = temp_path / "fasttest-output"
     output_path.mkdir(parents=True, exist_ok=True)
 
-    repo_path = temp_path / "fasttest-repo"
-    repo_path.mkdir(parents=True, exist_ok=True)
+    repo_path = Path(REPO_COPY)
 
     run_cmd = get_fasttest_cmd(
         workspace,

From 16fc2739c665b985d24e6dff2508f6f2514d47e6 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 20 Sep 2023 18:10:50 +0200
Subject: [PATCH 181/243] Use xargs to parallel submodules

---
 docker/test/fasttest/run.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 81b3289d8c4..44cb6fb5428 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -152,7 +152,11 @@ function clone_submodules
         )
 
         git submodule sync
-        git submodule update --jobs=16 --depth 1 --single-branch --init "${SUBMODULES_TO_UPDATE[@]}"
+        git submodule init
+        # --jobs does not work as fast as real parallel running
+        printf '%s\0' "${SUBMODULES_TO_UPDATE[@]}" | \
+            xargs --max-procs=100 --null --no-run-if-empty --max-args=1 \
+              git submodule update --depth 1 --single-branch
         git submodule foreach git reset --hard
         git submodule foreach git checkout @ -f
         git submodule foreach git clean -xfd

From 71c7e3c81e6532eecf44c25ae40d7e5a363d9bf3 Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Wed, 20 Sep 2023 13:33:25 -0400
Subject: [PATCH 182/243] Add logging, fix thread name length

---
 src/Storages/StorageS3.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 288f5423c00..ad8af42d47e 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -484,7 +484,7 @@ StorageS3Source::ReadTaskIterator::ReadTaskIterator(
     : callback(callback_)
 {
     ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, max_threads_count);
-    auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "ReadTaskIteratorPrefetch");
+    auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "S3ReadTaskItr");
 
     std::vector<std::future<String>> keys;
     for (size_t i = 0; i < max_threads_count; ++i)
@@ -1070,6 +1070,7 @@ Pipe StorageS3::read(
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
     num_streams = std::min(num_streams, estimated_keys_count);
+    LOG_INFO(&Poco::Logger::get("StorageS3"), "adjusting num_streams={}", num_streams);
 
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
     bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())

From 20105958a87ff1080ccdc9fdffa93ca79d557974 Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Wed, 20 Sep 2023 13:37:06 -0400
Subject: [PATCH 183/243] add reserve

---
 src/Storages/StorageS3.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index ad8af42d47e..436f7a3c84a 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -487,6 +487,7 @@ StorageS3Source::ReadTaskIterator::ReadTaskIterator(
     auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "S3ReadTaskItr");
 
     std::vector<std::future<String>> keys;
+    keys.reserve(max_threads_count);
     for (size_t i = 0; i < max_threads_count; ++i)
         keys.push_back(pool_scheduler([this] { return callback(); }, Priority{}));
 

From 6acdd65c8e31f6b1fbaa9bd9cb31cd0a2d08e05c Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 20 Sep 2023 19:07:42 +0200
Subject: [PATCH 184/243] Launch fast tests as the current user

---
 docker/test/fasttest/Dockerfile | 6 +++++-
 docker/test/fasttest/run.sh     | 6 ++++++
 tests/ci/fast_test_check.py     | 2 +-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile
index fd7a5640964..a38f59dacac 100644
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@@ -31,7 +31,11 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
   && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \
   && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \
   && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \
-  && rm -rf /tmp/clickhouse-odbc-tmp
+  && rm -rf /tmp/clickhouse-odbc-tmp \
+  && mkdir -p /var/lib/clickhouse \
+  && chmod 777 /var/lib/clickhouse
+
+# chmod 777 to make the container user independent
 
 ENV TZ=Europe/Amsterdam
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh
index 44cb6fb5428..5afba0b9ab1 100755
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@@ -28,6 +28,12 @@ FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/bui
 FASTTEST_DATA=$(readlink -f "${FASTTEST_DATA:-$FASTTEST_WORKSPACE/db-fasttest}")
 FASTTEST_OUTPUT=$(readlink -f "${FASTTEST_OUTPUT:-$FASTTEST_WORKSPACE}")
 PATH="$FASTTEST_BUILD/programs:$FASTTEST_SOURCE/tests:$PATH"
+# Work around for non-existent user
+if [ "$HOME" == "/" ]; then
+    HOME="$FASTTEST_WORKSPACE/user-home"
+    mkdir -p "$HOME"
+    export HOME
+fi
 
 # Export these variables, so that all subsequent invocations of the script
 # use them, and not try to guess them anew, which leads to weird effects.
diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index 43da7d98ef8..281bf04a171 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -42,7 +42,7 @@ csv.field_size_limit(sys.maxsize)
 
 def get_fasttest_cmd(workspace, output_path, repo_path, pr_number, commit_sha, image):
     return (
-        f"docker run --cap-add=SYS_PTRACE "
+        f"docker run --cap-add=SYS_PTRACE --user={os.geteuid()}:{os.getegid()} "
         "--network=host "  # required to get access to IAM credentials
         f"-e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output "
         f"-e FASTTEST_SOURCE=/ClickHouse --cap-add=SYS_PTRACE "

From ec8006861138f2494f4cad59d6b697c6b85040a8 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 20 Sep 2023 18:10:00 +0200
Subject: [PATCH 185/243] add a test for no such key

---
 src/Access/Common/AccessType.h                |  1 +
 src/Access/tests/gtest_access_rights_ops.cpp  |  2 +-
 src/Common/ZooKeeper/IKeeper.cpp              | 32 +++++++++++++++++++
 src/Common/ZooKeeper/IKeeper.h                | 12 +++++++
 src/Interpreters/ActionLocksManager.cpp       |  1 +
 src/Interpreters/InterpreterSystemQuery.cpp   | 18 +++++++++++
 src/Interpreters/TransactionLog.cpp           | 14 +-------
 src/Parsers/ASTSystemQuery.cpp                |  4 ++-
 src/Parsers/ASTSystemQuery.h                  |  2 ++
 src/Parsers/ParserSystemQuery.cpp             |  2 ++
 src/Storages/MergeTree/MergeTreeSettings.h    |  2 ++
 .../ReplicatedMergeTreeCleanupThread.cpp      |  6 ++++
 .../ReplicatedMergeTreeCleanupThread.h        |  5 +++
 src/Storages/StorageReplicatedMergeTree.cpp   | 23 +++++++------
 .../integration/test_grant_and_revoke/test.py |  2 +-
 .../01271_show_privileges.reference           |  1 +
 .../02117_show_create_table_system.reference  | 22 -------------
 .../02117_show_create_table_system.sql        |  4 +--
 .../02485_zero_copy_commit_fail.reference     |  2 ++
 .../02485_zero_copy_commit_fail.sql           | 31 ++++++++++++++++++
 20 files changed, 137 insertions(+), 49 deletions(-)
 create mode 100644 tests/queries/0_stateless/02485_zero_copy_commit_fail.reference
 create mode 100644 tests/queries/0_stateless/02485_zero_copy_commit_fail.sql

diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index 0004511fc7f..33d708efafc 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -170,6 +170,7 @@ enum class AccessType
     M(SYSTEM_FETCHES, "SYSTEM STOP FETCHES, SYSTEM START FETCHES, STOP FETCHES, START FETCHES", TABLE, SYSTEM) \
     M(SYSTEM_MOVES, "SYSTEM STOP MOVES, SYSTEM START MOVES, STOP MOVES, START MOVES", TABLE, SYSTEM) \
     M(SYSTEM_PULLING_REPLICATION_LOG, "SYSTEM STOP PULLING REPLICATION LOG, SYSTEM START PULLING REPLICATION LOG", TABLE, SYSTEM) \
+    M(SYSTEM_CLEANUP, "SYSTEM STOP CLEANUP, SYSTEM START CLEANUP", TABLE, SYSTEM) \
     M(SYSTEM_DISTRIBUTED_SENDS, "SYSTEM STOP DISTRIBUTED SENDS, SYSTEM START DISTRIBUTED SENDS, STOP DISTRIBUTED SENDS, START DISTRIBUTED SENDS", TABLE, SYSTEM_SENDS) \
     M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \
     M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \
diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp
index 91d79be918b..b5a15513a89 100644
--- a/src/Access/tests/gtest_access_rights_ops.cpp
+++ b/src/Access/tests/gtest_access_rights_ops.cpp
@@ -51,7 +51,7 @@ TEST(AccessRights, Union)
               "CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, "
               "TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
               "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
-              "SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
+              "SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
               "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
               "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1");
 }
diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp
index 5897d04b8a2..0269e64be37 100644
--- a/src/Common/ZooKeeper/IKeeper.cpp
+++ b/src/Common/ZooKeeper/IKeeper.cpp
@@ -1,5 +1,8 @@
 #include <Common/ProfileEvents.h>
 #include <Common/ZooKeeper/IKeeper.h>
+#include <Common/thread_local_rng.h>
+#include <random>
+
 
 
 namespace DB
@@ -54,6 +57,35 @@ Exception::Exception(const Error code_)
 Exception::Exception(const Exception & exc) = default;
 
 
+
+SimpleFaultInjection::SimpleFaultInjection(Float64 probability_before, Float64 probability_after_, const String & description_)
+{
+    if (likely(probability_before == 0.0) && likely(probability_after_ == 0.0))
+        return;
+
+    std::bernoulli_distribution fault(probability_before);
+    if (fault(thread_local_rng))
+        throw Coordination::Exception(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (before {})", description_);
+
+    probability_after = probability_after_;
+    description = description_;
+    exceptions_level = std::uncaught_exceptions();
+}
+
+SimpleFaultInjection::~SimpleFaultInjection() noexcept(false)
+{
+    if (likely(probability_after == 0.0))
+        return;
+
+    /// Do not throw from dtor during unwinding
+    if (exceptions_level != std::uncaught_exceptions())
+        return;
+
+    std::bernoulli_distribution fault(probability_after);
+    if (fault(thread_local_rng))
+        throw Coordination::Exception(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (after {})", description);
+}
+
 using namespace DB;
 
 
diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h
index ec23b52ceb1..fa5c16d0040 100644
--- a/src/Common/ZooKeeper/IKeeper.h
+++ b/src/Common/ZooKeeper/IKeeper.h
@@ -510,6 +510,18 @@ public:
     const Error code;
 };
 
+class SimpleFaultInjection
+{
+public:
+    SimpleFaultInjection(Float64 probability_before, Float64 probability_after_, const String & description_);
+    ~SimpleFaultInjection() noexcept(false);
+
+private:
+    Float64 probability_after = 0;
+    String description;
+    int exceptions_level = 0;
+};
+
 
 /** Usage scenario:
   * - create an object and issue commands;
diff --git a/src/Interpreters/ActionLocksManager.cpp b/src/Interpreters/ActionLocksManager.cpp
index d7480d45524..fb5ef4b98ae 100644
--- a/src/Interpreters/ActionLocksManager.cpp
+++ b/src/Interpreters/ActionLocksManager.cpp
@@ -17,6 +17,7 @@ namespace ActionLocks
     extern const StorageActionBlockType PartsTTLMerge = 6;
     extern const StorageActionBlockType PartsMove = 7;
     extern const StorageActionBlockType PullReplicationLog = 8;
+    extern const StorageActionBlockType Cleanup = 9;
 }
 
 
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 12db14973bb..1e0fdca7e07 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -101,6 +101,7 @@ namespace ActionLocks
     extern const StorageActionBlockType PartsTTLMerge;
     extern const StorageActionBlockType PartsMove;
     extern const StorageActionBlockType PullReplicationLog;
+    extern const StorageActionBlockType Cleanup;
 }
 
 
@@ -156,6 +157,8 @@ AccessType getRequiredAccessType(StorageActionBlockType action_type)
         return AccessType::SYSTEM_MOVES;
     else if (action_type == ActionLocks::PullReplicationLog)
         return AccessType::SYSTEM_PULLING_REPLICATION_LOG;
+    else if (action_type == ActionLocks::Cleanup)
+        return AccessType::SYSTEM_CLEANUP;
     else
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown action type: {}", std::to_string(action_type));
 }
@@ -572,6 +575,12 @@ BlockIO InterpreterSystemQuery::execute()
         case Type::START_PULLING_REPLICATION_LOG:
             startStopAction(ActionLocks::PullReplicationLog, true);
             break;
+        case Type::STOP_CLEANUP:
+            startStopAction(ActionLocks::Cleanup, false);
+            break;
+        case Type::START_CLEANUP:
+            startStopAction(ActionLocks::Cleanup, true);
+            break;
         case Type::DROP_REPLICA:
             dropReplica(query);
             break;
@@ -1145,6 +1154,15 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
                 required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG, query.getDatabase(), query.getTable());
             break;
         }
+        case Type::STOP_CLEANUP:
+        case Type::START_CLEANUP:
+        {
+            if (!query.table)
+                required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG);
+            else
+                required_access.emplace_back(AccessType::SYSTEM_PULLING_REPLICATION_LOG, query.getDatabase(), query.getTable());
+            break;
+        }
         case Type::STOP_FETCHES:
         case Type::START_FETCHES:
         {
diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp
index 66b23f09ba0..a86f6110a84 100644
--- a/src/Interpreters/TransactionLog.cpp
+++ b/src/Interpreters/TransactionLog.cpp
@@ -405,22 +405,10 @@ CSN TransactionLog::commitTransaction(const MergeTreeTransactionPtr & txn, bool
         String csn_path_created;
         try
         {
-            if (unlikely(fault_probability_before_commit > 0.0))
-            {
-                std::bernoulli_distribution fault(fault_probability_before_commit);
-                if (fault(thread_local_rng))
-                    throw Coordination::Exception::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (before commit)");
-            }
+            Coordination::SimpleFaultInjection fault(fault_probability_before_commit, fault_probability_after_commit, "commit");
 
             /// Commit point
             csn_path_created = current_zookeeper->create(zookeeper_path_log + "/csn-", serializeTID(txn->tid), zkutil::CreateMode::PersistentSequential);
-
-            if (unlikely(fault_probability_after_commit > 0.0))
-            {
-                std::bernoulli_distribution fault(fault_probability_after_commit);
-                if (fault(thread_local_rng))
-                    throw Coordination::Exception::fromMessage(Coordination::Error::ZCONNECTIONLOSS, "Fault injected (after commit)");
-            }
         }
         catch (const Coordination::Exception & e)
         {
diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp
index 9be01719d8c..76b409ba973 100644
--- a/src/Parsers/ASTSystemQuery.cpp
+++ b/src/Parsers/ASTSystemQuery.cpp
@@ -164,7 +164,9 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
         || type == Type::STOP_DISTRIBUTED_SENDS
         || type == Type::START_DISTRIBUTED_SENDS
         || type == Type::STOP_PULLING_REPLICATION_LOG
-        || type == Type::START_PULLING_REPLICATION_LOG)
+        || type == Type::START_PULLING_REPLICATION_LOG
+        || type == Type::STOP_CLEANUP
+        || type == Type::START_CLEANUP)
     {
         if (table)
             print_database_table();
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index 5897c63f66a..aafb68af6f3 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -83,6 +83,8 @@ public:
         SYNC_FILESYSTEM_CACHE,
         STOP_PULLING_REPLICATION_LOG,
         START_PULLING_REPLICATION_LOG,
+        STOP_CLEANUP,
+        START_CLEANUP,
         END
     };
 
diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp
index 36122a48804..2d3479934df 100644
--- a/src/Parsers/ParserSystemQuery.cpp
+++ b/src/Parsers/ParserSystemQuery.cpp
@@ -381,6 +381,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
         case Type::START_REPLICATION_QUEUES:
         case Type::STOP_PULLING_REPLICATION_LOG:
         case Type::START_PULLING_REPLICATION_LOG:
+        case Type::STOP_CLEANUP:
+        case Type::START_CLEANUP:
             if (!parseQueryWithOnCluster(res, pos, expected))
                 return false;
             parseDatabaseAndTableAsAST(pos, expected, res->database, res->table);
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 52c3f3efc6d..fe1ef703175 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -121,6 +121,8 @@ struct Settings;
     M(UInt64, max_replicated_fetches_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \
     M(UInt64, max_replicated_sends_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \
     M(Milliseconds, wait_for_unique_parts_send_before_shutdown_ms, 0, "Before shutdown table will wait for required amount time for unique parts (exist only on current replica) to be fetched by other replicas (0 means disabled).", 0) \
+    M(Float, fault_probability_before_part_commit, 0, "For testing. Do not change it.", 0) \
+    M(Float, fault_probability_after_part_commit, 0, "For testing. Do not change it.", 0) \
     \
     /** Check delay of replicas settings. */ \
     M(UInt64, min_relative_delay_to_measure, 120, "Calculate relative replica delay only if absolute delay is not less that this value.", 0) \
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
index b72c148a4e8..5de3c9f5d40 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
@@ -32,6 +32,12 @@ ReplicatedMergeTreeCleanupThread::ReplicatedMergeTreeCleanupThread(StorageReplic
 
 void ReplicatedMergeTreeCleanupThread::run()
 {
+    if (cleanup_blocker.isCancelled())
+    {
+        LOG_TRACE(LogFrequencyLimiter(log, 30), "Cleanup is cancelled, exiting");
+        return;
+    }
+
     SCOPE_EXIT({ is_running.store(false, std::memory_order_relaxed); });
     is_running.store(true, std::memory_order_relaxed);
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
index 57de7944970..ae9aabdb4e7 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
@@ -5,6 +5,7 @@
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/randomSeed.h>
 #include <Common/Stopwatch.h>
+#include <Common/ActionBlocker.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <thread>
 
@@ -34,6 +35,8 @@ public:
 
     void wakeupEarlierIfNeeded();
 
+    ActionLock getCleanupLock() { return cleanup_blocker.cancel(); }
+
 private:
     StorageReplicatedMergeTree & storage;
     String log_name;
@@ -48,6 +51,8 @@ private:
 
     AtomicStopwatch wakeup_check_timer;
 
+    ActionBlocker cleanup_blocker;
+
     void run();
 
     /// Returns a number this is directly proportional to the number of cleaned up blocks
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index e56055a717a..53e6456f7fc 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -197,6 +197,7 @@ namespace ActionLocks
     extern const StorageActionBlockType PartsTTLMerge;
     extern const StorageActionBlockType PartsMove;
     extern const StorageActionBlockType PullReplicationLog;
+    extern const StorageActionBlockType Cleanup;
 }
 
 
@@ -372,14 +373,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
         /// to be manually deleted before retrying the CreateQuery.
         try
         {
-            if (zookeeper_name == default_zookeeper_name)
-            {
-                current_zookeeper = getContext()->getZooKeeper();
-            }
-            else
-            {
-                current_zookeeper = getContext()->getAuxiliaryZooKeeper(zookeeper_name);
-            }
+            setZooKeeper();
         }
         catch (...)
         {
@@ -1635,7 +1629,13 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd
         }
 
         Coordination::Responses responses;
-        Coordination::Error e = zookeeper->tryMulti(ops, responses);
+        Coordination::Error e;
+        {
+
+            Coordination::SimpleFaultInjection fault(getSettings()->fault_probability_before_part_commit,
+                                                     getSettings()->fault_probability_after_part_commit, "part commit");
+            e = zookeeper->tryMulti(ops, responses);
+        }
         if (e == Coordination::Error::ZOK)
         {
             LOG_DEBUG(log, "Part {} committed to zookeeper", part->name);
@@ -8233,6 +8233,9 @@ ActionLock StorageReplicatedMergeTree::getActionLock(StorageActionBlockType acti
     if (action_type == ActionLocks::PullReplicationLog)
         return queue.pull_log_blocker.cancel();
 
+    if (action_type == ActionLocks::Cleanup)
+        return cleanup_thread.getCleanupLock();
+
     return {};
 }
 
@@ -8244,6 +8247,8 @@ void StorageReplicatedMergeTree::onActionLockRemove(StorageActionBlockType actio
         background_operations_assignee.trigger();
     else if (action_type == ActionLocks::PartsMove)
         background_moves_assignee.trigger();
+    else if (action_type == ActionLocks::Cleanup)
+        cleanup_thread.wakeup();
 }
 
 bool StorageReplicatedMergeTree::waitForProcessingQueue(UInt64 max_wait_milliseconds, SyncReplicaMode sync_mode)
diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py
index 35f4d81b66a..c8a0ee541e2 100644
--- a/tests/integration/test_grant_and_revoke/test.py
+++ b/tests/integration/test_grant_and_revoke/test.py
@@ -188,7 +188,7 @@ def test_grant_all_on_table():
         instance.query("SHOW GRANTS FOR B")
         == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, "
         "DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, "
-        "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, "
+        "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, "
         "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n"
     )
     instance.query("REVOKE ALL ON test.table FROM B", user="A")
diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference
index abdda9aa048..37183f077c7 100644
--- a/tests/queries/0_stateless/01271_show_privileges.reference
+++ b/tests/queries/0_stateless/01271_show_privileges.reference
@@ -121,6 +121,7 @@ SYSTEM TTL MERGES	['SYSTEM STOP TTL MERGES','SYSTEM START TTL MERGES','STOP TTL
 SYSTEM FETCHES	['SYSTEM STOP FETCHES','SYSTEM START FETCHES','STOP FETCHES','START FETCHES']	TABLE	SYSTEM
 SYSTEM MOVES	['SYSTEM STOP MOVES','SYSTEM START MOVES','STOP MOVES','START MOVES']	TABLE	SYSTEM
 SYSTEM PULLING REPLICATION LOG	['SYSTEM STOP PULLING REPLICATION LOG','SYSTEM START PULLING REPLICATION LOG']	TABLE	SYSTEM
+SYSTEM CLEANUP	['SYSTEM STOP CLEANUP','SYSTEM START CLEANUP']	TABLE	SYSTEM
 SYSTEM DISTRIBUTED SENDS	['SYSTEM STOP DISTRIBUTED SENDS','SYSTEM START DISTRIBUTED SENDS','STOP DISTRIBUTED SENDS','START DISTRIBUTED SENDS']	TABLE	SYSTEM SENDS
 SYSTEM REPLICATED SENDS	['SYSTEM STOP REPLICATED SENDS','SYSTEM START REPLICATED SENDS','STOP REPLICATED SENDS','START REPLICATED SENDS']	TABLE	SYSTEM SENDS
 SYSTEM SENDS	['SYSTEM STOP SENDS','SYSTEM START SENDS','STOP SENDS','START SENDS']	\N	SYSTEM
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index 9b633314bd3..46373f49700 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -296,19 +296,6 @@ CREATE TABLE system.functions
 )
 ENGINE = SystemFunctions
 COMMENT 'SYSTEM TABLE is built on the fly.'
-CREATE TABLE system.grants
-(
-    `user_name` Nullable(String),
-    `role_name` Nullable(String),
-    `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM SYNC FILESYSTEM CACHE' = 106, 'SYSTEM DROP SCHEMA CACHE' = 107, 'SYSTEM DROP S3 CLIENT CACHE' = 108, 'SYSTEM DROP CACHE' = 109, 'SYSTEM RELOAD CONFIG' = 110, 'SYSTEM RELOAD USERS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM PULLING REPLICATION LOG' = 122, 'SYSTEM DISTRIBUTED SENDS' = 123, 'SYSTEM REPLICATED SENDS' = 124, 'SYSTEM SENDS' = 125, 'SYSTEM REPLICATION QUEUES' = 126, 'SYSTEM DROP REPLICA' = 127, 'SYSTEM SYNC REPLICA' = 128, 'SYSTEM RESTART REPLICA' = 129, 'SYSTEM RESTORE REPLICA' = 130, 'SYSTEM WAIT LOADING PARTS' = 131, 'SYSTEM SYNC DATABASE REPLICA' = 132, 'SYSTEM SYNC TRANSACTION LOG' = 133, 'SYSTEM SYNC FILE CACHE' = 134, 'SYSTEM FLUSH DISTRIBUTED' = 135, 'SYSTEM FLUSH LOGS' = 136, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 137, 'SYSTEM FLUSH' = 138, 'SYSTEM THREAD FUZZER' = 139, 'SYSTEM UNFREEZE' = 140, 'SYSTEM FAILPOINT' = 141, 'SYSTEM LISTEN' = 142, 'SYSTEM' = 143, 'dictGet' = 144, 'displaySecretsInShowAndSelect' = 145, 'addressToLine' = 146, 'addressToLineWithInlines' = 147, 'addressToSymbol' = 148, 'demangle' = 149, 'INTROSPECTION' = 150, 'FILE' = 151, 'URL' = 152, 'REMOTE' = 153, 'MONGO' = 154, 'REDIS' = 155, 'MEILISEARCH' = 156, 'MYSQL' = 157, 'POSTGRES' = 158, 'SQLITE' = 159, 'ODBC' = 160, 'JDBC' = 161, 'HDFS' = 162, 'S3' = 163, 'HIVE' = 164, 'AZURE' = 165, 'SOURCES' = 166, 'CLUSTER' = 167, 'ALL' = 168, 'NONE' = 169),
-    `database` Nullable(String),
-    `table` Nullable(String),
-    `column` Nullable(String),
-    `is_partial_revoke` UInt8,
-    `grant_option` UInt8
-)
-ENGINE = SystemGrants
-COMMENT 'SYSTEM TABLE is built on the fly.'
 CREATE TABLE system.graphite_retentions
 (
     `config_name` String,
@@ -587,15 +574,6 @@ CREATE TABLE system.parts_columns
 )
 ENGINE = SystemPartsColumns
 COMMENT 'SYSTEM TABLE is built on the fly.'
-CREATE TABLE system.privileges
-(
-    `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM SYNC FILESYSTEM CACHE' = 106, 'SYSTEM DROP SCHEMA CACHE' = 107, 'SYSTEM DROP S3 CLIENT CACHE' = 108, 'SYSTEM DROP CACHE' = 109, 'SYSTEM RELOAD CONFIG' = 110, 'SYSTEM RELOAD USERS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM PULLING REPLICATION LOG' = 122, 'SYSTEM DISTRIBUTED SENDS' = 123, 'SYSTEM REPLICATED SENDS' = 124, 'SYSTEM SENDS' = 125, 'SYSTEM REPLICATION QUEUES' = 126, 'SYSTEM DROP REPLICA' = 127, 'SYSTEM SYNC REPLICA' = 128, 'SYSTEM RESTART REPLICA' = 129, 'SYSTEM RESTORE REPLICA' = 130, 'SYSTEM WAIT LOADING PARTS' = 131, 'SYSTEM SYNC DATABASE REPLICA' = 132, 'SYSTEM SYNC TRANSACTION LOG' = 133, 'SYSTEM SYNC FILE CACHE' = 134, 'SYSTEM FLUSH DISTRIBUTED' = 135, 'SYSTEM FLUSH LOGS' = 136, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 137, 'SYSTEM FLUSH' = 138, 'SYSTEM THREAD FUZZER' = 139, 'SYSTEM UNFREEZE' = 140, 'SYSTEM FAILPOINT' = 141, 'SYSTEM LISTEN' = 142, 'SYSTEM' = 143, 'dictGet' = 144, 'displaySecretsInShowAndSelect' = 145, 'addressToLine' = 146, 'addressToLineWithInlines' = 147, 'addressToSymbol' = 148, 'demangle' = 149, 'INTROSPECTION' = 150, 'FILE' = 151, 'URL' = 152, 'REMOTE' = 153, 'MONGO' = 154, 'REDIS' = 155, 'MEILISEARCH' = 156, 'MYSQL' = 157, 'POSTGRES' = 158, 'SQLITE' = 159, 'ODBC' = 160, 'JDBC' = 161, 'HDFS' = 162, 'S3' = 163, 'HIVE' = 164, 'AZURE' = 165, 'SOURCES' = 166, 'CLUSTER' = 167, 'ALL' = 168, 'NONE' = 169),
-    `aliases` Array(String),
-    `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)),
-    `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM SYNC FILESYSTEM CACHE' = 106, 'SYSTEM DROP SCHEMA CACHE' = 107, 'SYSTEM DROP S3 CLIENT CACHE' = 108, 'SYSTEM DROP CACHE' = 109, 'SYSTEM RELOAD CONFIG' = 110, 'SYSTEM RELOAD USERS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM PULLING REPLICATION LOG' = 122, 'SYSTEM DISTRIBUTED SENDS' = 123, 'SYSTEM REPLICATED SENDS' = 124, 'SYSTEM SENDS' = 125, 'SYSTEM REPLICATION QUEUES' = 126, 'SYSTEM DROP REPLICA' = 127, 'SYSTEM SYNC REPLICA' = 128, 'SYSTEM RESTART REPLICA' = 129, 'SYSTEM RESTORE REPLICA' = 130, 'SYSTEM WAIT LOADING PARTS' = 131, 'SYSTEM SYNC DATABASE REPLICA' = 132, 'SYSTEM SYNC TRANSACTION LOG' = 133, 'SYSTEM SYNC FILE CACHE' = 134, 'SYSTEM FLUSH DISTRIBUTED' = 135, 'SYSTEM FLUSH LOGS' = 136, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 137, 'SYSTEM FLUSH' = 138, 'SYSTEM THREAD FUZZER' = 139, 'SYSTEM UNFREEZE' = 140, 'SYSTEM FAILPOINT' = 141, 'SYSTEM LISTEN' = 142, 'SYSTEM' = 143, 'dictGet' = 144, 'displaySecretsInShowAndSelect' = 145, 'addressToLine' = 146, 'addressToLineWithInlines' = 147, 'addressToSymbol' = 148, 'demangle' = 149, 'INTROSPECTION' = 150, 'FILE' = 151, 'URL' = 152, 'REMOTE' = 153, 'MONGO' = 154, 'REDIS' = 155, 'MEILISEARCH' = 156, 'MYSQL' = 157, 'POSTGRES' = 158, 'SQLITE' = 159, 'ODBC' = 160, 'JDBC' = 161, 'HDFS' = 162, 'S3' = 163, 'HIVE' = 164, 'AZURE' = 165, 'SOURCES' = 166, 'CLUSTER' = 167, 'ALL' = 168, 'NONE' = 169))
-)
-ENGINE = SystemPrivileges
-COMMENT 'SYSTEM TABLE is built on the fly.'
 CREATE TABLE system.processes
 (
     `is_initial_query` UInt8,
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.sql b/tests/queries/0_stateless/02117_show_create_table_system.sql
index 37bf2667069..32465abbed7 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.sql
+++ b/tests/queries/0_stateless/02117_show_create_table_system.sql
@@ -38,7 +38,7 @@ show create table errors format TSVRaw;
 show create table events format TSVRaw;
 show create table formats format TSVRaw;
 show create table functions format TSVRaw;
-show create table grants format TSVRaw;
+-- show create table grants format TSVRaw; -- it's updated too often, it's inconvenient to update the test
 show create table graphite_retentions format TSVRaw;
 show create table licenses format TSVRaw;
 show create table macros format TSVRaw;
@@ -53,7 +53,7 @@ show create table one format TSVRaw;
 show create table part_moves_between_shards format TSVRaw;
 show create table parts format TSVRaw;
 show create table parts_columns format TSVRaw;
-show create table privileges format TSVRaw;
+-- show create table privileges format TSVRaw; -- it's updated too often, it's inconvenient to update the test
 show create table processes format TSVRaw;
 show create table projection_parts format TSVRaw;
 show create table projection_parts_columns format TSVRaw;
diff --git a/tests/queries/0_stateless/02485_zero_copy_commit_fail.reference b/tests/queries/0_stateless/02485_zero_copy_commit_fail.reference
new file mode 100644
index 00000000000..73c1df53be4
--- /dev/null
+++ b/tests/queries/0_stateless/02485_zero_copy_commit_fail.reference
@@ -0,0 +1,2 @@
+1	1	1
+2	2	2
diff --git a/tests/queries/0_stateless/02485_zero_copy_commit_fail.sql b/tests/queries/0_stateless/02485_zero_copy_commit_fail.sql
new file mode 100644
index 00000000000..ba0b2708cd8
--- /dev/null
+++ b/tests/queries/0_stateless/02485_zero_copy_commit_fail.sql
@@ -0,0 +1,31 @@
+
+create table rmt1 (n int, m int, k int) engine=ReplicatedMergeTree('/test/02485/{database}/rmt', '1') order by n
+    settings storage_policy='s3_cache', allow_remote_fs_zero_copy_replication=1, old_parts_lifetime=60, cleanup_delay_period=60, max_cleanup_delay_period=60, cleanup_delay_period_random_add=1, min_bytes_for_wide_part=0, simultaneous_parts_removal_limit=1;
+create table rmt2 (n int, m int, k int) engine=ReplicatedMergeTree('/test/02485/{database}/rmt', '2') order by n
+    settings storage_policy='s3_cache', allow_remote_fs_zero_copy_replication=1, old_parts_lifetime=0, cleanup_delay_period=0, max_cleanup_delay_period=1, cleanup_delay_period_random_add=1, min_bytes_for_wide_part=0;
+
+insert into rmt1 values (1, 1, 1);
+insert into rmt1 values (2, 2, 2);
+system sync replica rmt2 lightweight;
+
+system stop merges rmt2;
+system stop cleanup rmt1;
+system stop replicated sends rmt1;
+
+alter table rmt1 modify setting fault_probability_before_part_commit=1;
+alter table rmt1 update k = 0 where 0;
+
+-- give rmt1 a chance to execute MUTATE_PART (and fail)
+select sleep(1) as test_does_not_rely_on_this format Null;
+system stop merges rmt1;
+system start merges rmt2;
+
+system sync replica rmt2;
+
+-- give rmt2 a chance to cleanup the source part (mutation parent)
+select sleep(3) as test_does_not_rely_on_this format Null;
+
+-- it will remove the mutated part that it failed to commit
+drop table rmt1 sync;
+
+select * from rmt2 order by n;

From f8062d77e1a0cb271dc00cee7f9cdd514ce31049 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 20 Sep 2023 19:45:34 +0200
Subject: [PATCH 186/243] fix rare data loss

---
 src/Storages/StorageReplicatedMergeTree.cpp | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 53e6456f7fc..6f406f610b7 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -9159,7 +9159,7 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
 
         files_not_to_remove.insert(parent_not_to_remove.begin(), parent_not_to_remove.end());
 
-        LOG_TRACE(logger, "Remove zookeeper lock {} for part {}", zookeeper_part_replica_node, part_name);
+        LOG_TRACE(logger, "Removing zookeeper lock {} for part {} (files to keep: [{}])", zookeeper_part_replica_node, part_name, fmt::join(files_not_to_remove, ", "));
 
         if (auto ec = zookeeper_ptr->tryRemove(zookeeper_part_replica_node); ec != Coordination::Error::ZOK)
         {
@@ -9196,7 +9196,7 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
         }
         else
         {
-            LOG_TRACE(logger, "No more children left for for {}, will try to remove the whole node", zookeeper_part_uniq_node);
+            LOG_TRACE(logger, "No more children left for {}, will try to remove the whole node", zookeeper_part_uniq_node);
         }
 
         auto error_code = zookeeper_ptr->tryRemove(zookeeper_part_uniq_node);
@@ -9252,8 +9252,19 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
         }
         else
         {
-            LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists",
-                zookeeper_part_node, part_name, children.size(), fmt::join(children, ", "));
+            /// It's possible that we have two instances of the same part with different blob names of
+            /// FILE_FOR_REFERENCES_CHECK aka checksums.txt aka part_unique_id,
+            /// and other files in both parts are hardlinks (the same blobs are shared between part instances).
+            /// It's possible after unsuccessful attempts to commit a mutated part to zk.
+            /// It's not a problem if we have found the mutation parent (so we have files_not_to_remove).
+            /// But in rare cases mutations parents could have been already removed (so we don't have the list of hardlinks).
+
+            /// I'm not 100% sure that parent_not_to_remove list cannot be incomplete (when it's not empty)
+            if (part_info.mutation && parent_not_to_remove.empty())
+                part_has_no_more_locks = false;
+
+            LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists (can remove blobs: {})",
+                zookeeper_part_node, part_name, children.size(), fmt::join(children, ", "), part_has_no_more_locks);
         }
     }
 

From 05a1c96258394b591e94601221bc0180305a27d6 Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Wed, 20 Sep 2023 23:00:25 +0200
Subject: [PATCH 187/243] Interval operator support plural literals

---
 src/Parsers/parseIntervalKind.cpp             | 33 ++++++++++++-------
 ..._operator_support_plural_literal.reference | 16 +++++++++
 ...terval_operator_support_plural_literal.sql | 16 +++++++++
 3 files changed, 54 insertions(+), 11 deletions(-)
 create mode 100644 tests/queries/0_stateless/02884_interval_operator_support_plural_literal.reference
 create mode 100644 tests/queries/0_stateless/02884_interval_operator_support_plural_literal.sql

diff --git a/src/Parsers/parseIntervalKind.cpp b/src/Parsers/parseIntervalKind.cpp
index 77c3178ae2b..fe052287083 100644
--- a/src/Parsers/parseIntervalKind.cpp
+++ b/src/Parsers/parseIntervalKind.cpp
@@ -7,77 +7,88 @@ namespace DB
 {
 bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & result)
 {
-    if (ParserKeyword("NANOSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_NANOSECOND").ignore(pos, expected)
+    if (ParserKeyword("NANOSECOND").ignore(pos, expected) || ParserKeyword("NANOSECONDS").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_NANOSECOND").ignore(pos, expected)
         || ParserKeyword("NS").ignore(pos, expected))
     {
         result = IntervalKind::Nanosecond;
         return true;
     }
 
-    if (ParserKeyword("MICROSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_MICROSECOND").ignore(pos, expected)
+    if (ParserKeyword("MICROSECOND").ignore(pos, expected) || ParserKeyword("MICROSECONDS").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_MICROSECOND").ignore(pos, expected)
         || ParserKeyword("MCS").ignore(pos, expected))
     {
         result = IntervalKind::Microsecond;
         return true;
     }
 
-    if (ParserKeyword("MILLISECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_MILLISECOND").ignore(pos, expected)
+    if (ParserKeyword("MILLISECOND").ignore(pos, expected) || ParserKeyword("MILLISECONDS").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_MILLISECOND").ignore(pos, expected)
         || ParserKeyword("MS").ignore(pos, expected))
     {
         result = IntervalKind::Millisecond;
         return true;
     }
 
-    if (ParserKeyword("SECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_SECOND").ignore(pos, expected)
+    if (ParserKeyword("SECOND").ignore(pos, expected) || ParserKeyword("SECONDS").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_SECOND").ignore(pos, expected)
         || ParserKeyword("SS").ignore(pos, expected) || ParserKeyword("S").ignore(pos, expected))
     {
         result = IntervalKind::Second;
         return true;
     }
 
-    if (ParserKeyword("MINUTE").ignore(pos, expected) || ParserKeyword("SQL_TSI_MINUTE").ignore(pos, expected)
+    if (ParserKeyword("MINUTE").ignore(pos, expected) || ParserKeyword("MINUTES").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_MINUTE").ignore(pos, expected)
         || ParserKeyword("MI").ignore(pos, expected) || ParserKeyword("N").ignore(pos, expected))
     {
         result = IntervalKind::Minute;
         return true;
     }
 
-    if (ParserKeyword("HOUR").ignore(pos, expected) || ParserKeyword("SQL_TSI_HOUR").ignore(pos, expected)
+    if (ParserKeyword("HOUR").ignore(pos, expected) || ParserKeyword("HOURS").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_HOUR").ignore(pos, expected)
         || ParserKeyword("HH").ignore(pos, expected) || ParserKeyword("H").ignore(pos, expected))
     {
         result = IntervalKind::Hour;
         return true;
     }
 
-    if (ParserKeyword("DAY").ignore(pos, expected) || ParserKeyword("SQL_TSI_DAY").ignore(pos, expected)
+    if (ParserKeyword("DAY").ignore(pos, expected) || ParserKeyword("DAYS").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_DAY").ignore(pos, expected)
         || ParserKeyword("DD").ignore(pos, expected) || ParserKeyword("D").ignore(pos, expected))
     {
         result = IntervalKind::Day;
         return true;
     }
 
-    if (ParserKeyword("WEEK").ignore(pos, expected) || ParserKeyword("SQL_TSI_WEEK").ignore(pos, expected)
+    if (ParserKeyword("WEEK").ignore(pos, expected) || ParserKeyword("WEEKS").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_WEEK").ignore(pos, expected)
         || ParserKeyword("WK").ignore(pos, expected) || ParserKeyword("WW").ignore(pos, expected))
     {
         result = IntervalKind::Week;
         return true;
     }
 
-    if (ParserKeyword("MONTH").ignore(pos, expected) || ParserKeyword("SQL_TSI_MONTH").ignore(pos, expected)
+    if (ParserKeyword("MONTH").ignore(pos, expected) || ParserKeyword("MONTHS").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_MONTH").ignore(pos, expected)
         || ParserKeyword("MM").ignore(pos, expected) || ParserKeyword("M").ignore(pos, expected))
     {
         result = IntervalKind::Month;
         return true;
     }
 
-    if (ParserKeyword("QUARTER").ignore(pos, expected) || ParserKeyword("SQL_TSI_QUARTER").ignore(pos, expected)
+    if (ParserKeyword("QUARTER").ignore(pos, expected) || ParserKeyword("QUARTERS").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_QUARTER").ignore(pos, expected)
         || ParserKeyword("QQ").ignore(pos, expected) || ParserKeyword("Q").ignore(pos, expected))
     {
         result = IntervalKind::Quarter;
         return true;
     }
 
-    if (ParserKeyword("YEAR").ignore(pos, expected) || ParserKeyword("SQL_TSI_YEAR").ignore(pos, expected)
+    if (ParserKeyword("YEAR").ignore(pos, expected) || ParserKeyword("YEARS").ignore(pos, expected)
+        || ParserKeyword("SQL_TSI_YEAR").ignore(pos, expected)
         || ParserKeyword("YYYY").ignore(pos, expected) || ParserKeyword("YY").ignore(pos, expected))
     {
         result = IntervalKind::Year;
diff --git a/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.reference b/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.reference
new file mode 100644
index 00000000000..4f1d0bdcd49
--- /dev/null
+++ b/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.reference
@@ -0,0 +1,16 @@
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2
+2009-02-14 01:31:30
+2009-02-14 01:31:30
+2009-02-15 23:31:30
+2009-02-15 23:31:30
diff --git a/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.sql b/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.sql
new file mode 100644
index 00000000000..dd5fc9eca45
--- /dev/null
+++ b/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.sql
@@ -0,0 +1,16 @@
+SELECT INTERVAL 2 year;
+SELECT INTERVAL 2 years;
+SELECT INTERVAL 2 month;
+SELECT INTERVAL 2 months;
+SELECT INTERVAL 2 week;
+SELECT INTERVAL 2 weeks;
+SELECT INTERVAL 2 day;
+SELECT INTERVAL 2 days;
+SELECT INTERVAL 2 hour;
+SELECT INTERVAL 2 hours;
+SELECT INTERVAL 2 minute;
+SELECT INTERVAL 2 minutes;
+SELECT DATE_ADD(hour, 2, toDateTime(1234567890, 'UTC'));
+SELECT DATE_ADD(hours, 2, toDateTime(1234567890, 'UTC'));
+SELECT DATE_ADD(toDateTime(1234567890, 'UTC'), INTERVAL 2 day);
+SELECT DATE_ADD(toDateTime(1234567890, 'UTC'), INTERVAL 2 days);

From 0518931bbd1e76e195fd798fa6d66a3a9f828cc3 Mon Sep 17 00:00:00 2001
From: Jordi Villar <jrdi.villar@gmail.com>
Date: Wed, 20 Sep 2023 23:16:36 +0200
Subject: [PATCH 188/243] Add more tests

---
 ...2884_interval_operator_support_plural_literal.reference | 7 +++++++
 .../02884_interval_operator_support_plural_literal.sql     | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.reference b/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.reference
index 4f1d0bdcd49..9616b4c1415 100644
--- a/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.reference
+++ b/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.reference
@@ -10,7 +10,14 @@
 2
 2
 2
+2
+2
+2
+2
+2
+2
 2009-02-14 01:31:30
 2009-02-14 01:31:30
 2009-02-15 23:31:30
 2009-02-15 23:31:30
+2009-02-15 23:31:30
diff --git a/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.sql b/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.sql
index dd5fc9eca45..41403cdf72e 100644
--- a/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.sql
+++ b/tests/queries/0_stateless/02884_interval_operator_support_plural_literal.sql
@@ -1,16 +1,23 @@
 SELECT INTERVAL 2 year;
 SELECT INTERVAL 2 years;
+SELECT INTERVAL '2 years';
 SELECT INTERVAL 2 month;
 SELECT INTERVAL 2 months;
+SELECT INTERVAL '2 months';
 SELECT INTERVAL 2 week;
 SELECT INTERVAL 2 weeks;
+SELECT INTERVAL '2 weeks';
 SELECT INTERVAL 2 day;
 SELECT INTERVAL 2 days;
+SELECT INTERVAL '2 days';
 SELECT INTERVAL 2 hour;
 SELECT INTERVAL 2 hours;
+SELECT INTERVAL '2 hours';
 SELECT INTERVAL 2 minute;
 SELECT INTERVAL 2 minutes;
+SELECT INTERVAL '2 minutes';
 SELECT DATE_ADD(hour, 2, toDateTime(1234567890, 'UTC'));
 SELECT DATE_ADD(hours, 2, toDateTime(1234567890, 'UTC'));
 SELECT DATE_ADD(toDateTime(1234567890, 'UTC'), INTERVAL 2 day);
 SELECT DATE_ADD(toDateTime(1234567890, 'UTC'), INTERVAL 2 days);
+SELECT DATE_ADD(toDateTime(1234567890, 'UTC'), INTERVAL '2 days');

From 835ef602da876697b62eb6232032448f9bc28f4b Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 20 Sep 2023 23:31:06 +0200
Subject: [PATCH 189/243] Update 02485_zero_copy_commit_fail.sql

---
 tests/queries/0_stateless/02485_zero_copy_commit_fail.sql | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02485_zero_copy_commit_fail.sql b/tests/queries/0_stateless/02485_zero_copy_commit_fail.sql
index ba0b2708cd8..dd3df6fff97 100644
--- a/tests/queries/0_stateless/02485_zero_copy_commit_fail.sql
+++ b/tests/queries/0_stateless/02485_zero_copy_commit_fail.sql
@@ -1,3 +1,4 @@
+-- Tags: no-fasttest
 
 create table rmt1 (n int, m int, k int) engine=ReplicatedMergeTree('/test/02485/{database}/rmt', '1') order by n
     settings storage_policy='s3_cache', allow_remote_fs_zero_copy_replication=1, old_parts_lifetime=60, cleanup_delay_period=60, max_cleanup_delay_period=60, cleanup_delay_period_random_add=1, min_bytes_for_wide_part=0, simultaneous_parts_removal_limit=1;

From 1c8133897608ea85373cf538069b1663e4cc63ad Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Wed, 20 Sep 2023 17:39:49 -0400
Subject: [PATCH 190/243] Convert third party library titles to headings

---
 .../third-party/client-libraries.md           | 131 +++++++++---------
 1 file changed, 66 insertions(+), 65 deletions(-)

diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md
index f7603994163..e229198bdfd 100644
--- a/docs/en/interfaces/third-party/client-libraries.md
+++ b/docs/en/interfaces/third-party/client-libraries.md
@@ -2,6 +2,7 @@
 slug: /en/interfaces/third-party/client-libraries
 sidebar_position: 26
 sidebar_label: Client Libraries
+description: Third-party client libraries
 ---
 
 # Client Libraries from Third-party Developers
@@ -10,68 +11,68 @@ sidebar_label: Client Libraries
 ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality.
 :::
 
-- Python
-    - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)
-    - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
-    - [clickhouse-client](https://github.com/yurial/clickhouse-client)
-    - [aiochclient](https://github.com/maximdanilchenko/aiochclient)
-    - [asynch](https://github.com/long2ice/asynch)
-- PHP
-    - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
-    - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)
-    - [bozerkins/clickhouse-client](https://packagist.org/packages/bozerkins/clickhouse-client)
-    - [simpod/clickhouse-client](https://packagist.org/packages/simpod/clickhouse-client)
-    - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client)
-    - [SeasClick C++ client](https://github.com/SeasX/SeasClick)
-    - [one-ck](https://github.com/lizhichao/one-ck)
-    - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
-    - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
-    - [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
-- Go
-    - [clickhouse](https://github.com/kshvakov/clickhouse/)
-    - [go-clickhouse](https://github.com/roistat/go-clickhouse)
-    - [chconn](https://github.com/vahid-sohrabloo/chconn)
-    - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse)
-    - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse)
-    - [uptrace/go-clickhouse](https://clickhouse.uptrace.dev/)
-- Swift
-    - [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO)
-    - [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor)
-- NodeJs
-    - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse)
-    - [node-clickhouse](https://github.com/apla/node-clickhouse)
-    - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
-    - [clickhouse-client](https://github.com/depyronick/clickhouse-client)
-    - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
-- Perl
-    - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
-    - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)
-    - [AnyEvent-ClickHouse](https://metacpan.org/release/AnyEvent-ClickHouse)
-- Ruby
-    - [ClickHouse (Ruby)](https://github.com/shlima/click_house)
-    - [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord)
-- Rust
-    - [clickhouse.rs](https://github.com/loyd/clickhouse.rs)
-    - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs)
-    - [Klickhouse](https://github.com/Protryon/klickhouse)
-- R
-    - [RClickHouse](https://github.com/IMSMWU/RClickHouse)
-- Java
-    - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java)
-    - [clickhouse-client](https://github.com/Ecwid/clickhouse-client)
-- Scala
-    - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client)
-- Kotlin
-    - [AORM](https://github.com/TanVD/AORM)
-- C#
-    - [Octonica.ClickHouseClient](https://github.com/Octonica/ClickHouseClient)
-    - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net)
-    - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)
-    - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
-- Elixir
-    - [clickhousex](https://github.com/appodeal/clickhousex/)
-    - [pillar](https://github.com/sofakingworld/pillar)
-- Nim
-    - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse)
-- Haskell
-    - [hdbc-clickhouse](https://github.com/zaneli/hdbc-clickhouse)
+### Python
+ - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)
+ - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver)
+ - [clickhouse-client](https://github.com/yurial/clickhouse-client)
+ - [aiochclient](https://github.com/maximdanilchenko/aiochclient)
+ - [asynch](https://github.com/long2ice/asynch)
+### PHP
+ - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse)
+ - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client)
+ - [bozerkins/clickhouse-client](https://packagist.org/packages/bozerkins/clickhouse-client)
+ - [simpod/clickhouse-client](https://packagist.org/packages/simpod/clickhouse-client)
+ - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client)
+ - [SeasClick C++ client](https://github.com/SeasX/SeasClick)
+ - [one-ck](https://github.com/lizhichao/one-ck)
+ - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
+ - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
+ - [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php)
+### Go
+ - [clickhouse](https://github.com/kshvakov/clickhouse/)
+ - [go-clickhouse](https://github.com/roistat/go-clickhouse)
+ - [chconn](https://github.com/vahid-sohrabloo/chconn)
+ - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse)
+ - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse)
+ - [uptrace/go-clickhouse](https://clickhouse.uptrace.dev/)
+### Swift
+ - [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO)
+ - [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor)
+### NodeJs
+ - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse)
+ - [node-clickhouse](https://github.com/apla/node-clickhouse)
+ - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
+ - [clickhouse-client](https://github.com/depyronick/clickhouse-client)
+ - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
+### Perl
+ - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
+ - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)
+ - [AnyEvent-ClickHouse](https://metacpan.org/release/AnyEvent-ClickHouse)
+### Ruby
+ - [ClickHouse (Ruby)](https://github.com/shlima/click_house)
+ - [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord)
+### Rust
+ - [clickhouse.rs](https://github.com/loyd/clickhouse.rs)
+ - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs)
+ - [Klickhouse](https://github.com/Protryon/klickhouse)
+### R
+ - [RClickHouse](https://github.com/IMSMWU/RClickHouse)
+### Java
+ - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java)
+ - [clickhouse-client](https://github.com/Ecwid/clickhouse-client)
+### Scala
+ - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client)
+### Kotlin
+ - [AORM](https://github.com/TanVD/AORM)
+### C#
+ - [Octonica.ClickHouseClient](https://github.com/Octonica/ClickHouseClient)
+ - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net)
+ - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)
+ - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
+### Elixir
+ - [clickhousex](https://github.com/appodeal/clickhousex/)
+ - [pillar](https://github.com/sofakingworld/pillar)
+### Nim
+ - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse)
+### Haskell
+ - [hdbc-clickhouse](https://github.com/zaneli/hdbc-clickhouse)

From 9eccb121835aad3b867276bbabe7d7889db15b10 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Wed, 20 Sep 2023 23:59:10 +0200
Subject: [PATCH 191/243] small refactor of storageFile globs

---
 src/Storages/StorageFile.cpp | 127 +++++++++++++++--------------------
 1 file changed, 54 insertions(+), 73 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index b44b7789135..75145656a70 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -107,69 +107,59 @@ namespace ErrorCodes
 namespace
 {
 
-/// Forward-declare to use in listFilesWithFoldedRegexpMatchingImpl()
+/// Forward-declare to use in expandSelector()
 void listFilesWithRegexpMatchingImpl(
     const std::string & path_for_ls,
     const std::string & for_match,
     size_t & total_bytes_to_read,
-    bool ignore_access_denied_multidirectory_globs,
     std::vector<std::string> & result,
     bool recursive = false);
 
-/*
- * When `{...}` has any `/`s, it must be processed in a different way:
- * Basically, a path with globs is processed by listFilesWithRegexpMatchingImpl. In case it detects multi-dir glob {.../..., .../...},
- * listFilesWithFoldedRegexpMatchingImpl is in charge from now on.
- * It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob.
- * Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob.
- * StorageHDFS.cpp has the same logic.
-*/
-void listFilesWithFoldedRegexpMatchingImpl(const std::string & path_for_ls,
-                                           const std::string & processed_suffix,
-                                           const std::string & suffix_with_globs,
-                                           re2::RE2 & matcher,
-                                           size_t & total_bytes_to_read,
-                                           const size_t max_depth,
-                                           const size_t next_slash_after_glob_pos,
-                                           bool ignore_access_denied_multidirectory_globs,
-                                           std::vector<std::string> & result)
+void expandSelector(const std::string & path_for_ls,
+                    const std::string & for_match,
+                    size_t & total_bytes_to_read,
+                    std::vector<std::string> & result,
+                    bool recursive)
 {
-    if (!max_depth)
-        return;
+    std::vector<size_t> anchor_positions = {};
+    bool opened = false, closed = false;
 
-    const fs::directory_iterator end;
-    fs::directory_iterator it = ignore_access_denied_multidirectory_globs
-        ? fs::directory_iterator(path_for_ls, fs::directory_options::skip_permission_denied)
-        : fs::directory_iterator(path_for_ls);
-    for (; it != end; ++it)
+    for (std::string::const_iterator it = for_match.begin(); it != for_match.end(); it++)
     {
-        const std::string full_path = it->path().string();
-        const size_t last_slash = full_path.rfind('/');
-        const String dir_or_file_name = full_path.substr(last_slash);
-
-        if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher))
+        if (*it == '{')
         {
-            if (next_slash_after_glob_pos == std::string::npos)
-            {
-                total_bytes_to_read += it->file_size();
-                result.push_back(it->path().string());
-            }
-            else
-            {
-                listFilesWithRegexpMatchingImpl(fs::path(full_path) / "" ,
-                                                suffix_with_globs.substr(next_slash_after_glob_pos),
-                                                total_bytes_to_read, ignore_access_denied_multidirectory_globs, result);
-            }
+            anchor_positions.push_back(std::distance(for_match.begin(), it));
+            opened = true;
         }
-        else if (it->is_directory())
+        else if (*it == '}')
         {
-            listFilesWithFoldedRegexpMatchingImpl(fs::path(full_path), processed_suffix + dir_or_file_name,
-                                                  suffix_with_globs, matcher, total_bytes_to_read,
-                                                  max_depth - 1, next_slash_after_glob_pos,
-                                                  ignore_access_denied_multidirectory_globs, result);
+            anchor_positions.push_back(std::distance(for_match.begin(), it));
+            closed = true;
+            break;
+        }
+        else if (*it == ',')
+        {
+            if (!opened)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                "Unexpected ''' found in path '{}' at position {}.", for_match, std::distance(for_match.begin(), it));
+            anchor_positions.push_back(std::distance(for_match.begin(), it));
         }
-
     }
+    if (!opened || !closed)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Invalid {{}} glob in path {}.", for_match);
+
+    std::string common_prefix = for_match.substr(0, anchor_positions[0]);
+    std::string common_suffix = for_match.substr(anchor_positions[anchor_positions.size()-1] + 1);
+    for (size_t i = 1; i < anchor_positions.size(); ++i)
+    {
+        std::ostringstream oss;
+        oss << common_prefix
+            << for_match.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
+            << common_suffix;
+        listFilesWithRegexpMatchingImpl(path_for_ls, oss.str(), total_bytes_to_read, result, recursive);
+    }
+
 }
 
 /* Recursive directory listing with matched paths as a result.
@@ -179,7 +169,6 @@ void listFilesWithRegexpMatchingImpl(
     const std::string & path_for_ls,
     const std::string & for_match,
     size_t & total_bytes_to_read,
-    bool ignore_access_denied_multidirectory_globs,
     std::vector<std::string> & result,
     bool recursive)
 {
@@ -191,30 +180,31 @@ void listFilesWithRegexpMatchingImpl(
 
     /// slashes_in_glob counter is a upper-bound estimate of recursion depth
     /// needed to process complex cases when `/` is included into glob, e.g. /pa{th1/a,th2/b}.csv
-    size_t slashes_in_glob = 0;
+    bool has_curly_braces = false;
     const size_t next_slash_after_glob_pos = [&]()
     {
         if (!has_glob)
             return suffix_with_globs.find('/', 1);
 
-        size_t in_curly = 0;
         for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
         {
             if (*it == '{')
-                ++in_curly;
-            else if (*it == '/')
             {
-                if (in_curly)
-                    ++slashes_in_glob;
-                else
-                    return size_t(std::distance(suffix_with_globs.begin(), it));
+                has_curly_braces = true;
+                return size_t(0);
             }
-            else if (*it == '}')
-                --in_curly;
+            else if (*it == '/')
+                return size_t(std::distance(suffix_with_globs.begin(), it));
         }
         return std::string::npos;
     }();
 
+    if (has_curly_braces)
+    {
+        expandSelector(path_for_ls, for_match, total_bytes_to_read, result, recursive);
+        return;
+    }
+
     const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
 
     auto regexp = makeRegexpPatternFromGlobs(current_glob);
@@ -235,14 +225,6 @@ void listFilesWithRegexpMatchingImpl(
 
     const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos;
 
-    if (slashes_in_glob)
-    {
-        listFilesWithFoldedRegexpMatchingImpl(fs::path(prefix_without_globs), "", suffix_with_globs, matcher,
-                                              total_bytes_to_read, slashes_in_glob, next_slash_after_glob_pos,
-                                              ignore_access_denied_multidirectory_globs, result);
-        return;
-    }
-
     const fs::directory_iterator end;
     for (fs::directory_iterator it(prefix_without_globs); it != end; ++it)
     {
@@ -265,12 +247,12 @@ void listFilesWithRegexpMatchingImpl(
             {
                 listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "",
                                                 looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob,
-                                                total_bytes_to_read, ignore_access_denied_multidirectory_globs, result, recursive);
+                                                total_bytes_to_read, result, recursive);
             }
             else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher))
                 /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
                 listFilesWithRegexpMatchingImpl(fs::path(full_path) / "", suffix_with_globs.substr(next_slash_after_glob_pos),
-                                                total_bytes_to_read, ignore_access_denied_multidirectory_globs, result);
+                                                total_bytes_to_read, result);
         }
     }
 }
@@ -278,11 +260,10 @@ void listFilesWithRegexpMatchingImpl(
 std::vector<std::string> listFilesWithRegexpMatching(
     const std::string & path_for_ls,
     const std::string & for_match,
-    size_t & total_bytes_to_read,
-    bool ignore_access_denied_multidirectory_globs)
+    size_t & total_bytes_to_read)
 {
     std::vector<std::string> result;
-    listFilesWithRegexpMatchingImpl(path_for_ls, for_match, total_bytes_to_read, ignore_access_denied_multidirectory_globs, result);
+    listFilesWithRegexpMatchingImpl(path_for_ls, for_match, total_bytes_to_read, result);
     return result;
 }
 
@@ -447,7 +428,7 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user
     else
     {
         /// We list only non-directory files.
-        paths = listFilesWithRegexpMatching("/", path, total_bytes_to_read, context->getSettingsRef().ignore_access_denied_multidirectory_globs);
+        paths = listFilesWithRegexpMatching("/", path, total_bytes_to_read);
         can_be_directory = false;
     }
 

From 4c904a5cb0a23be992e39ec579544c1b01d46bd1 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 21 Sep 2023 00:00:00 +0200
Subject: [PATCH 192/243] fix

---
 src/Common/ZooKeeper/IKeeper.cpp                                | 2 --
 ...it_fail.reference => 02485_zero_copy_commit_error.reference} | 0
 ...ro_copy_commit_fail.sql => 02485_zero_copy_commit_error.sql} | 0
 3 files changed, 2 deletions(-)
 rename tests/queries/0_stateless/{02485_zero_copy_commit_fail.reference => 02485_zero_copy_commit_error.reference} (100%)
 rename tests/queries/0_stateless/{02485_zero_copy_commit_fail.sql => 02485_zero_copy_commit_error.sql} (100%)

diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp
index 0269e64be37..f2e4b321326 100644
--- a/src/Common/ZooKeeper/IKeeper.cpp
+++ b/src/Common/ZooKeeper/IKeeper.cpp
@@ -4,7 +4,6 @@
 #include <random>
 
 
-
 namespace DB
 {
     namespace ErrorCodes
@@ -57,7 +56,6 @@ Exception::Exception(const Error code_)
 Exception::Exception(const Exception & exc) = default;
 
 
-
 SimpleFaultInjection::SimpleFaultInjection(Float64 probability_before, Float64 probability_after_, const String & description_)
 {
     if (likely(probability_before == 0.0) && likely(probability_after_ == 0.0))
diff --git a/tests/queries/0_stateless/02485_zero_copy_commit_fail.reference b/tests/queries/0_stateless/02485_zero_copy_commit_error.reference
similarity index 100%
rename from tests/queries/0_stateless/02485_zero_copy_commit_fail.reference
rename to tests/queries/0_stateless/02485_zero_copy_commit_error.reference
diff --git a/tests/queries/0_stateless/02485_zero_copy_commit_fail.sql b/tests/queries/0_stateless/02485_zero_copy_commit_error.sql
similarity index 100%
rename from tests/queries/0_stateless/02485_zero_copy_commit_fail.sql
rename to tests/queries/0_stateless/02485_zero_copy_commit_error.sql

From 5f67788c03c4c6e077bceeb7f38bd235f1a6933a Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Thu, 21 Sep 2023 00:16:17 +0200
Subject: [PATCH 193/243] update docs and comments

---
 docs/en/operations/settings/settings.md       | 41 ------------------
 docs/en/sql-reference/table-functions/file.md |  3 +-
 docs/en/sql-reference/table-functions/hdfs.md |  4 +-
 docs/ru/operations/settings/settings.md       | 42 -------------------
 docs/ru/sql-reference/table-functions/file.md |  2 +-
 docs/ru/sql-reference/table-functions/hdfs.md |  3 +-
 src/Storages/StorageFile.cpp                  |  1 +
 7 files changed, 6 insertions(+), 90 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 4cdadd27df7..993271c9b15 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -4667,44 +4667,3 @@ The default value is `false`.
 ``` xml
 <validate_tcp_client_information>true</validate_tcp_client_information>
 ```
-
-## ignore_access_denied_multidirectory_globs {#ignore_access_denied_multidirectory_globs}
-
-Allows to ignore 'permission denied' errors when using multi-directory `{}` globs for [File](../../sql-reference/table-functions/file.md#globs_in_path) and [HDFS](../../sql-reference/table-functions/hdfs.md) storages.
-This setting is only applicable to multi directory `{}` globs.
-
-Possible values: `0`, `1`.
-
-Default value: `0`.
-
-### Example
-
-Having the following structure in `user_files`:
-```
-my_directory/
-├── data1
-│   ├── f1.csv
-├── data2
-│   ├── f2.csv
-└── test_root
-```
-where `data1`, `data2` directories are accessible, but one has no rights to read `test_root` directories.
-
-For a query like `SELECT *, _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV)` an exception will be thrown:
-`Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied`.  
-It happens because a multi-directory glob requires a recursive search in _all_ available directories under `my_directory`.
-
-If this setting is on, all inaccessible directories will be silently skipped, even if they are explicitly specified inside `{}`.
-
-```sql
-SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 0;
-
-Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied
-```
-```sql
-SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;
-
-┌─_path───────────────────┬─_file───────┐
-│ <full path to file>     │ <file name> │
-└─────────────────────────┴─────────────┘
-```
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index 06571fd724b..7e869af82ef 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -141,7 +141,7 @@ Multiple path components can have globs. For being processed file must exist and
 
 - `*` — Substitutes any number of any characters except `/` including empty string.
 - `?` — Substitutes any single character.
-- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored using [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) setting for file & HDFS.
+- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. The strings can contain the `/` symbol.
 - `{N..M}` — Substitutes any number in range from N to M including both borders.
 - `**` - Fetches all files inside the folder recursively.
 
@@ -210,7 +210,6 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
 - [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
 - [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
 - [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
-- [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.
 
 
 **See Also**
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index b50356202f7..678470e9150 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -45,10 +45,10 @@ Multiple path components can have globs. For being processed file should exists
 
 - `*` — Substitutes any number of any characters except `/` including empty string.
 - `?` — Substitutes any single character.
-- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored using [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) setting.
+- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. The strings can contain the `/` symbol.
 - `{N..M}` — Substitutes any number in range from N to M including both borders.
 
-Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
+Constructions with `{}` are similar to the [remote](../../sql-reference/table-functions/remote.md)) table function.
 
 **Example**
 
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 5b8bff4b262..c58b3ae8107 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -4209,45 +4209,3 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars
 │              1.7091 │                 15008753 │
 └─────────────────────┴──────────────────────────┘
 ```
-
-## ignore_access_denied_multidirectory_globs {#ignore_access_denied_multidirectory_globs}
-
-Позволяет игнорировать ошибку 'permission denied', возникающую при использовании шаблона `{}`, содержащего `/` внутри себя.
-Работает для [File](../../sql-reference/table-functions/file.md#globs_in_path) и [HDFS](../../sql-reference/table-functions/hdfs.md).
-Работает _только_ для указанных выше шаблонов `{}`.
-
-Возможные значения: `0`, `1`.
-
-Значение по умолчанию: `0`.
-
-### Пример
-
-Пусть в `user_files` имеется следующая структура:
-```
-my_directory/
-├── data1
-│   ├── f1.csv
-├── data2
-│   ├── f2.csv
-└── test_root
-```
-Пусть также директории `data1`, `data2` могут быть прочитаны, но прав на чтение `test_root` нет.
-
-На запрос `SELECT *, _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV)` будет выброшено исключение:
-`Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied`.  
-Это происходит, так как для обработки такого шаблона необходимо выполнить рекурсивный поиск по _всем_ директориям, находящимся внутри `my_directory`.
-
-Если данная настройка имеет значение 1, то недоступные директории будут тихо пропущены, даже если они явно указаны внутри `{}`.
-
-```sql
-SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 0;
-
-Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied
-```
-```sql
-SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;
-
-┌─_path───────────────────┬─_file───────┐
-│ <full path to file>     │ <file name> │
-└─────────────────────────┴─────────────┘
-```
diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md
index 16896442d68..f698554dcf9 100644
--- a/docs/ru/sql-reference/table-functions/file.md
+++ b/docs/ru/sql-reference/table-functions/file.md
@@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
 
 -   `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
 -   `?` — заменяет ровно один любой символ.
--   `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы при помощи настройки [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs).
+-   `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. Эти строки также могут содержать символ `/`.
 -   `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
 
 Конструкция с `{}` аналогична табличной функции [remote](remote.md).
diff --git a/docs/ru/sql-reference/table-functions/hdfs.md b/docs/ru/sql-reference/table-functions/hdfs.md
index 3f7d0b408c9..b70de5e3a4f 100644
--- a/docs/ru/sql-reference/table-functions/hdfs.md
+++ b/docs/ru/sql-reference/table-functions/hdfs.md
@@ -43,7 +43,7 @@ LIMIT 2
 
 -   `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
 -   `?` — Заменяет ровно один любой символ.
--   `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы при помощи настройки [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs).
+-   `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. Эти строки также могут содержать символ `/`.
 -   `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
 
 Конструкция с `{}` аналогична табличной функции [remote](remote.md).
@@ -62,5 +62,4 @@ LIMIT 2
 **Смотрите также**
 
 -   [Виртуальные столбцы](index.md#table_engines-virtual_columns)
--   Параметр [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs)
 
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 75145656a70..419a5f080cc 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -115,6 +115,7 @@ void listFilesWithRegexpMatchingImpl(
     std::vector<std::string> & result,
     bool recursive = false);
 
+/// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
 void expandSelector(const std::string & path_for_ls,
                     const std::string & for_match,
                     size_t & total_bytes_to_read,

From 50c51c2854e5cf77c3ac131d254de39748c04980 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 20 Sep 2023 22:11:23 +0000
Subject: [PATCH 194/243] Fix new clang-tidy-s

---
 .clang-tidy                                   |  1 +
 base/base/StringRef.h                         |  2 +-
 base/base/defines.h                           |  2 +-
 base/base/iostream_debug_helpers.h            | 26 +++++++++----------
 base/base/scope_guard.h                       | 12 ++++-----
 base/base/strong_typedef.h                    |  4 +--
 src/Client/LineReader.cpp                     |  2 +-
 src/Common/Dwarf.cpp                          | 16 ++++++------
 .../examples/hash_map_string_3.cpp            |  2 +-
 .../self-extracting-executable/compressor.cpp | 10 +++----
 .../decompressor.cpp                          |  8 +++---
 11 files changed, 43 insertions(+), 42 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index 85989d311a2..cfb42ebd4c7 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -108,6 +108,7 @@ Checks: '*,
     -openmp-*,
 
     -misc-const-correctness,
+    -misc-include-cleaner, # useful but far too many occurrences
     -misc-no-recursion,
     -misc-non-private-member-variables-in-classes,
     -misc-confusable-identifiers, # useful but slooow
diff --git a/base/base/StringRef.h b/base/base/StringRef.h
index 9a97b2ea5cc..6456706fafe 100644
--- a/base/base/StringRef.h
+++ b/base/base/StringRef.h
@@ -177,7 +177,7 @@ inline bool memequalWide(const char * p1, const char * p2, size_t size)
             return false;
     }
 
-    switch (size / 16)
+    switch (size / 16) // NOLINT(bugprone-switch-missing-default-case)
     {
         case 3: if (!compare8(p1 + 32, p2 + 32)) return false; [[fallthrough]];
         case 2: if (!compare8(p1 + 16, p2 + 16)) return false; [[fallthrough]];
diff --git a/base/base/defines.h b/base/base/defines.h
index ee29ecf6118..4d3d8796d21 100644
--- a/base/base/defines.h
+++ b/base/base/defines.h
@@ -185,6 +185,6 @@
 
 /// A template function for suppressing warnings about unused variables or function results.
 template <typename... Args>
-constexpr void UNUSED(Args &&... args [[maybe_unused]])
+constexpr void UNUSED(Args &&... args [[maybe_unused]]) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
 }
diff --git a/base/base/iostream_debug_helpers.h b/base/base/iostream_debug_helpers.h
index db974c911df..f531a56031b 100644
--- a/base/base/iostream_debug_helpers.h
+++ b/base/base/iostream_debug_helpers.h
@@ -20,14 +20,14 @@ Out & dumpValue(Out &, T &&);
 
 /// Catch-all case.
 template <int priority, typename Out, typename T>
-std::enable_if_t<priority == -1, Out> & dumpImpl(Out & out, T &&)
+std::enable_if_t<priority == -1, Out> & dumpImpl(Out & out, T &&) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     return out << "{...}";
 }
 
 /// An object, that could be output with operator <<.
 template <int priority, typename Out, typename T>
-std::enable_if_t<priority == 0, Out> & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::declval<Out &>() << std::declval<T>())> * = nullptr)
+std::enable_if_t<priority == 0, Out> & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::declval<Out &>() << std::declval<T>())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     return out << x;
 }
@@ -37,7 +37,7 @@ template <int priority, typename Out, typename T>
 std::enable_if_t<priority == 1
     /// Protect from the case when operator * do effectively nothing (function pointer).
     && !std::is_same_v<std::decay_t<T>, std::decay_t<decltype(*std::declval<T>())>>
-    , Out> & dumpImpl(Out & out, T && x, std::decay_t<decltype(*std::declval<T>())> * = nullptr)
+    , Out> & dumpImpl(Out & out, T && x, std::decay_t<decltype(*std::declval<T>())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     if (!x)
         return out << "nullptr";
@@ -46,7 +46,7 @@ std::enable_if_t<priority == 1
 
 /// Container.
 template <int priority, typename Out, typename T>
-std::enable_if_t<priority == 2, Out> & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::begin(std::declval<T>()))> * = nullptr)
+std::enable_if_t<priority == 2, Out> & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::begin(std::declval<T>()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     bool first = true;
     out << "{";
@@ -64,7 +64,7 @@ std::enable_if_t<priority == 2, Out> & dumpImpl(Out & out, T && x, std::decay_t<
 
 template <int priority, typename Out, typename T>
 std::enable_if_t<priority == 3 && std::is_enum_v<std::decay_t<T>>, Out> &
-dumpImpl(Out & out, T && x)
+dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     return out << magic_enum::enum_name(x);
 }
@@ -73,7 +73,7 @@ dumpImpl(Out & out, T && x)
 
 template <int priority, typename Out, typename T>
 std::enable_if_t<priority == 3 && (std::is_same_v<std::decay_t<T>, std::string> || std::is_same_v<std::decay_t<T>, const char *>), Out> &
-dumpImpl(Out & out, T && x)
+dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     return out << std::quoted(x);
 }
@@ -82,7 +82,7 @@ dumpImpl(Out & out, T && x)
 
 template <int priority, typename Out, typename T>
 std::enable_if_t<priority == 3 && std::is_same_v<std::decay_t<T>, unsigned char>, Out> &
-dumpImpl(Out & out, T && x)
+dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     return out << int(x);
 }
@@ -90,7 +90,7 @@ dumpImpl(Out & out, T && x)
 
 /// Tuple, pair
 template <size_t N, typename Out, typename T>
-Out & dumpTupleImpl(Out & out, T && x)
+Out & dumpTupleImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     if constexpr (N == 0)
         out << "{";
@@ -108,14 +108,14 @@ Out & dumpTupleImpl(Out & out, T && x)
 }
 
 template <int priority, typename Out, typename T>
-std::enable_if_t<priority == 4, Out> & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::get<0>(std::declval<T>()))> * = nullptr)
+std::enable_if_t<priority == 4, Out> & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::get<0>(std::declval<T>()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     return dumpTupleImpl<0>(out, x);
 }
 
 
 template <int priority, typename Out, typename T>
-Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t<decltype(dumpImpl<priority>(std::declval<Out &>(), std::declval<T>()))> *)
+Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t<decltype(dumpImpl<priority>(std::declval<Out &>(), std::declval<T>()))> *) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     return dumpImpl<priority>(out, x);
 }
@@ -124,21 +124,21 @@ Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t<decltype(dumpImpl<p
 struct LowPriority { LowPriority(void *) {} };
 
 template <int priority, typename Out, typename T>
-Out & dumpDispatchPriorities(Out & out, T && x, LowPriority)
+Out & dumpDispatchPriorities(Out & out, T && x, LowPriority) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     return dumpDispatchPriorities<priority - 1>(out, x, nullptr);
 }
 
 
 template <typename Out, typename T>
-Out & dumpValue(Out & out, T && x)
+Out & dumpValue(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     return dumpDispatchPriorities<5>(out, x, nullptr);
 }
 
 
 template <typename Out, typename T>
-Out & dump(Out & out, const char * name, T && x)
+Out & dump(Out & out, const char * name, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     // Dumping string literal, printing name and demangled type is irrelevant.
     if constexpr (std::is_same_v<const char *, std::decay_t<std::remove_reference_t<T>>>)
diff --git a/base/base/scope_guard.h b/base/base/scope_guard.h
index 8524beac7ea..03670792d59 100644
--- a/base/base/scope_guard.h
+++ b/base/base/scope_guard.h
@@ -9,9 +9,9 @@ class [[nodiscard]] BasicScopeGuard
 {
 public:
     constexpr BasicScopeGuard() = default;
-    constexpr BasicScopeGuard(BasicScopeGuard && src) : function{src.release()} {} // NOLINT(hicpp-noexcept-move, performance-noexcept-move-constructor)
+    constexpr BasicScopeGuard(BasicScopeGuard && src) : function{src.release()} {} // NOLINT(hicpp-noexcept-move, performance-noexcept-move-constructor, cppcoreguidelines-noexcept-move-operations)
 
-    constexpr BasicScopeGuard & operator=(BasicScopeGuard && src) // NOLINT(hicpp-noexcept-move, performance-noexcept-move-constructor)
+    constexpr BasicScopeGuard & operator=(BasicScopeGuard && src) // NOLINT(hicpp-noexcept-move, performance-noexcept-move-constructor, cppcoreguidelines-noexcept-move-operations)
     {
         if (this != &src)
         {
@@ -23,11 +23,11 @@ public:
 
     template <typename G>
     requires std::is_convertible_v<G, F>
-    constexpr BasicScopeGuard(BasicScopeGuard<G> && src) : function{src.release()} {} // NOLINT(google-explicit-constructor)
+    constexpr BasicScopeGuard(BasicScopeGuard<G> && src) : function{src.release()} {} // NOLINT(google-explicit-constructor, cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations)
 
     template <typename G>
     requires std::is_convertible_v<G, F>
-    constexpr BasicScopeGuard & operator=(BasicScopeGuard<G> && src)
+    constexpr BasicScopeGuard & operator=(BasicScopeGuard<G> && src) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations)
     {
         if (this != &src)
         {
@@ -43,7 +43,7 @@ public:
 
     template <typename G>
     requires std::is_convertible_v<G, F>
-    constexpr BasicScopeGuard(G && function_) : function{std::move(function_)} {} // NOLINT(google-explicit-constructor, bugprone-forwarding-reference-overload, bugprone-move-forwarding-reference)
+    constexpr BasicScopeGuard(G && function_) : function{std::move(function_)} {} // NOLINT(google-explicit-constructor, bugprone-forwarding-reference-overload, bugprone-move-forwarding-reference, cppcoreguidelines-missing-std-forward)
 
     ~BasicScopeGuard() { invoke(); }
 
@@ -70,7 +70,7 @@ public:
 
     template <typename G>
     requires std::is_convertible_v<G, F>
-    BasicScopeGuard<F> & join(BasicScopeGuard<G> && other)
+    BasicScopeGuard<F> & join(BasicScopeGuard<G> && other) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved)
     {
         if (other.function)
         {
diff --git a/base/base/strong_typedef.h b/base/base/strong_typedef.h
index b3b8bced688..518d87a47b9 100644
--- a/base/base/strong_typedef.h
+++ b/base/base/strong_typedef.h
@@ -23,10 +23,10 @@ public:
     constexpr StrongTypedef(): t() {}
 
     constexpr StrongTypedef(const Self &) = default;
-    constexpr StrongTypedef(Self &&) noexcept(std::is_nothrow_move_constructible_v<T>) = default;
+    constexpr StrongTypedef(Self &&) noexcept(std::is_nothrow_move_constructible_v<T>) = default; // NOLINT(cppcoreguidelines-noexcept-move-operations, hicpp-noexcept-move, performance-noexcept-move-constructor)
 
     Self & operator=(const Self &) = default;
-    Self & operator=(Self &&) noexcept(std::is_nothrow_move_assignable_v<T>)= default;
+    Self & operator=(Self &&) noexcept(std::is_nothrow_move_assignable_v<T>)= default; // NOLINT(cppcoreguidelines-noexcept-move-operations, hicpp-noexcept-move, performance-noexcept-move-constructor)
 
     template <class Enable = typename std::is_copy_assignable<T>::type>
     Self & operator=(const T & rhs) { t = rhs; return *this;}
diff --git a/src/Client/LineReader.cpp b/src/Client/LineReader.cpp
index 77b4185ec3b..2ec90240fd1 100644
--- a/src/Client/LineReader.cpp
+++ b/src/Client/LineReader.cpp
@@ -115,7 +115,7 @@ replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String &
     return replxx::Replxx::completions_t(range.first, range.second);
 }
 
-void LineReader::Suggest::addWords(Words && new_words)
+void LineReader::Suggest::addWords(Words && new_words) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved)
 {
     Words new_words_no_case = new_words;
     if (!new_words.empty())
diff --git a/src/Common/Dwarf.cpp b/src/Common/Dwarf.cpp
index 551ed93773f..0fa2727086a 100644
--- a/src/Common/Dwarf.cpp
+++ b/src/Common/Dwarf.cpp
@@ -800,7 +800,7 @@ Dwarf::CompilationUnit Dwarf::getCompilationUnit(uint64_t offset) const
         die,
         [&](const Attribute & attr)
         {
-            switch (attr.spec.name)
+            switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case)
             {
                 case DW_AT_addr_base:
                 case DW_AT_GNU_addr_base:
@@ -996,7 +996,7 @@ bool Dwarf::findLocation(
 
     forEachAttribute(cu, die, [&](const Attribute & attr)
     {
-        switch (attr.spec.name)
+        switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case)
         {
             case DW_AT_stmt_list:
                 // Offset in .debug_line for the line number VM program for this
@@ -1143,7 +1143,7 @@ void Dwarf::findSubProgramDieForAddress(const CompilationUnit & cu,
             std::optional<uint64_t> range_offset;
             forEachAttribute(cu, child_die, [&](const Attribute & attr)
             {
-                switch (attr.spec.name)
+                switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case)
                 {
                     case DW_AT_ranges:
                         range_offset = std::get<uint64_t>(attr.attr_value);
@@ -1234,7 +1234,7 @@ void Dwarf::findInlinedSubroutineDieForAddress(
         std::optional<uint64_t> range_offset;
         forEachAttribute(cu, child_die, [&](const Attribute & attr)
         {
-            switch (attr.spec.name)
+            switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case)
             {
                 case DW_AT_ranges:
                     range_offset = std::get<uint64_t>(attr.attr_value);
@@ -1349,7 +1349,7 @@ void Dwarf::findInlinedSubroutineDieForAddress(
             // its DW_AT_call_file and DW_AT_call_line.
             forEachAttribute(srcu, die_to_look_for_name, [&](const Attribute & attr)
             {
-                switch (attr.spec.name)
+                switch (attr.spec.name) // NOLINT(bugprone-switch-missing-default-case)
                 {
                     case DW_AT_linkage_name:
                         name = std::get<std::string_view>(attr.attr_value);
@@ -1910,7 +1910,7 @@ Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index) c
                 auto attr = readLineNumberAttribute(is64Bit_, format, file_names, debugStr_, debugLineStr_);
                 if (i == index)
                 {
-                    switch (attr.content_type_code)
+                    switch (attr.content_type_code) // NOLINT(bugprone-switch-missing-default-case)
                     {
                         case DW_LNCT_path:
                             fn.relativeName = std::get<std::string_view>(attr.attr_value);
@@ -2055,7 +2055,7 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & pro
     { // standard opcode
         // Only interpret opcodes that are recognized by the version we're parsing;
         // the others are vendor extensions and we should ignore them.
-        switch (opcode)
+        switch (opcode) // NOLINT(bugprone-switch-missing-default-case)
         {
             case DW_LNS_copy:
                 basicBlock_ = false;
@@ -2127,7 +2127,7 @@ Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & pro
     auto extended_opcode = read<uint8_t>(program);
     --length;
 
-    switch (extended_opcode)
+    switch (extended_opcode) // NOLINT(bugprone-switch-missing-default-case)
     {
         case DW_LNE_end_sequence:
             return END;
diff --git a/src/Interpreters/examples/hash_map_string_3.cpp b/src/Interpreters/examples/hash_map_string_3.cpp
index 447be2dc51b..57e36bed545 100644
--- a/src/Interpreters/examples/hash_map_string_3.cpp
+++ b/src/Interpreters/examples/hash_map_string_3.cpp
@@ -126,7 +126,7 @@ struct FastHash64
         pos2 = reinterpret_cast<const unsigned char*>(pos);
         v = 0;
 
-        switch (len & 7)
+        switch (len & 7) // NOLINT(bugprone-switch-missing-default-case)
         {
             case 7: v ^= static_cast<uint64_t>(pos2[6]) << 48; [[fallthrough]];
             case 6: v ^= static_cast<uint64_t>(pos2[5]) << 40; [[fallthrough]];
diff --git a/utils/self-extracting-executable/compressor.cpp b/utils/self-extracting-executable/compressor.cpp
index 35485481662..708ea535128 100644
--- a/utils/self-extracting-executable/compressor.cpp
+++ b/utils/self-extracting-executable/compressor.cpp
@@ -174,7 +174,7 @@ int compress(int in_fd, int out_fd, int level, off_t & pointer, const struct sta
             return 1;
         }
         pointer += current_block_size;
-        printf("...block compression rate: %.2f%%\n", static_cast<float>(current_block_size) / size * 100);
+        printf("...block compression rate: %.2f%%\n", static_cast<float>(current_block_size) / size * 100); // NOLINT(modernize-use-std-print)
         total_size += size;
         compressed_size += current_block_size;
         current_block_size = 0;
@@ -266,7 +266,7 @@ int compressFiles(const char* out_name, const char* exec, char* filenames[], int
         else
             filename = filenames[i];
 
-        printf("Compressing: %s\n", filename);
+        printf("Compressing: %s\n", filename); // NOLINT(modernize-use-std-print)
 
         int input_fd = open(filename, O_RDONLY);
         if (input_fd == -1)
@@ -302,7 +302,7 @@ int compressFiles(const char* out_name, const char* exec, char* filenames[], int
 
         if (info_in.st_size == 0)
         {
-            printf("...empty file, skipped.\n");
+            printf("...empty file, skipped.\n"); // NOLINT(modernize-use-std-print)
             continue;
         }
 
@@ -597,14 +597,14 @@ int main(int argc, char* argv[])
     std::cout << "Compression with level: " << level << std::endl;
     if (0 != compressFiles(out_name, exec, &argv[start_of_files], argc - start_of_files, output_fd, level, info_out))
     {
-        printf("Compression failed.\n");
+        printf("Compression failed.\n"); // NOLING(modernize-use-std-print)
         if (0 != close(output_fd))
             perror("close");
         unlink(argv[start_of_files - 1]);
         return 1;
     }
 
-    printf("Successfully compressed.\n");
+    printf("Successfully compressed.\n"); // NOLINT(modernize-use-std-print)
 
     if (0 != close(output_fd))
         perror("close");
diff --git a/utils/self-extracting-executable/decompressor.cpp b/utils/self-extracting-executable/decompressor.cpp
index 91f4bea5a5b..967e21dac15 100644
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@@ -478,7 +478,7 @@ int main(int/* argc*/, char* argv[])
         if (lock_info.st_size == 1)
             execv(self, argv);
 
-        printf("No target executable - decompression only was performed.\n");
+        printf("No target executable - decompression only was performed.\n"); // NOLINT(modernize-use-std-print)
         return 0;
     }
 #endif
@@ -498,7 +498,7 @@ int main(int/* argc*/, char* argv[])
     /// Decompress all files
     if (0 != decompressFiles(input_fd, path, name, have_compressed_analoge, has_exec, decompressed_suffix, &decompressed_umask))
     {
-        printf("Error happened during decompression.\n");
+        printf("Error happened during decompression.\n"); // NOLINT(modernize-use-std-print)
         if (0 != close(input_fd))
             perror("close");
         return 1;
@@ -514,7 +514,7 @@ int main(int/* argc*/, char* argv[])
     }
 
     if (!have_compressed_analoge)
-        printf("No target executable - decompression only was performed.\n");
+        printf("No target executable - decompression only was performed.\n"); // NOLINT(modernize-use-std-print)
     else
     {
         const char * const decompressed_name_fmt = "%s.decompressed.%s";
@@ -563,6 +563,6 @@ int main(int/* argc*/, char* argv[])
         ftruncate(lock, 0);
 #endif
 
-        printf("No target executable - decompression only was performed.\n");
+        printf("No target executable - decompression only was performed.\n"); // NOLINT(modernize-use-std-print)
     }
 }

From 4a2f7976f01024154e61a99c6d9628e06816b211 Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Wed, 20 Sep 2023 19:43:02 -0400
Subject: [PATCH 195/243] Resolve PR issues

---
 src/Storages/StorageS3.cpp | 2 +-
 src/Storages/StorageS3.h   | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 436f7a3c84a..844e7310b16 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1071,7 +1071,6 @@ Pipe StorageS3::read(
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
     num_streams = std::min(num_streams, estimated_keys_count);
-    LOG_INFO(&Poco::Logger::get("StorageS3"), "adjusting num_streams={}", num_streams);
 
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
     bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
@@ -1079,6 +1078,7 @@ Pipe StorageS3::read(
 
     const size_t max_threads = local_context->getSettingsRef().max_threads;
     const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / num_streams);
+    LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads);
 
     pipes.reserve(num_streams);
     for (size_t i = 0; i < num_streams; ++i)
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index f0315244088..088f9000ce8 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -62,6 +62,8 @@ public:
 
         /// Estimates how many streams we need to process all files.
         /// If keys count >= max_threads_count, the returned number may not represent the actual number of the keys.
+        /// Intended to be called before any next() calls, may underestimate otherwise
+        /// fixme: May underestimate if the glob has a strong filter, so there are few matches among the first 1000 ListObjects results.
         virtual size_t estimatedKeysCount() = 0;
 
         KeyWithInfo operator ()() { return next(); }

From a05bb020d40a31107bb391fc0f9bdea43458e4e7 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Thu, 21 Sep 2023 01:50:41 +0200
Subject: [PATCH 196/243] do all the same for HDFS + remove setting

---
 src/Core/Settings.h               |   1 -
 src/Storages/HDFS/StorageHDFS.cpp | 142 ++++++++++++------------------
 src/Storages/StorageFile.cpp      |   1 -
 3 files changed, 54 insertions(+), 90 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 94968735800..dfc3dc63478 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -802,7 +802,6 @@ class IColumn;
     M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
     M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
     M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0)                                                                                                  \
-    M(Bool, ignore_access_denied_multidirectory_globs, false, "Ignore access denied errors when processing multi-directory globs for file & HDFS.", 0)\
 
 // End of COMMON_SETTINGS
     // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 605942331eb..85d5fec3d24 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -76,83 +76,58 @@ namespace ErrorCodes
 }
 namespace
 {
-    /// Forward-declared to use in LSWithFoldedRegexpMatching w/o circular dependency.
+    /// Forward-declare to use in expandSelector()
     std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(const String & path_for_ls,
                                                                 const HDFSFSPtr & fs,
-                                                                const String & for_match,
-                                                                bool ignore_access_denied_multidirectory_globs);
+                                                                const String & for_match);
 
-    /*
-     * When `{...}` has any `/`s, it must be processed in a different way:
-     * Basically, a path with globs is processed by LSWithRegexpMatching. In case it detects multi-dir glob {.../..., .../...},
-     * LSWithFoldedRegexpMatching is in charge from now on.
-     * It works a bit different: it still recursively goes through subdirectories, but does not match every directory to glob.
-     * Instead, it goes many levels down (until the approximate max_depth is reached) and compares this multi-dir path to a glob.
-     * StorageFile.cpp has the same logic.
-    */
-    std::vector<StorageHDFS::PathWithInfo> LSWithFoldedRegexpMatching(const String & path_for_ls,
-        const HDFSFSPtr & fs,
-        const String & processed_suffix,
-        const String & suffix_with_globs,
-        re2::RE2 & matcher,
-        const size_t max_depth,
-        const size_t next_slash_after_glob_pos,
-        bool ignore_access_denied_multidirectory_globs)
+    /// Process {a,b,c...} globs separately: don't match it against regex, but generate a,b,c strings instead.
+    std::vector<StorageHDFS::PathWithInfo> expandSelector(const String & path_for_ls,
+                                                          const HDFSFSPtr & fs,
+                                                          const String & for_match)
     {
-        /// We don't need to go all the way in every directory if max_depth is reached
-        /// as it is upper limit of depth by simply counting `/`s in curly braces
-        if (!max_depth)
-            return {};
+        std::vector<size_t> anchor_positions = {};
+        bool opened = false, closed = false;
 
-        HDFSFileInfo ls;
-        ls.file_info = hdfsListDirectory(fs.get(), path_for_ls.data(), &ls.length);
-        if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
+        for (std::string::const_iterator it = for_match.begin(); it != for_match.end(); it++)
         {
-            // ignore file not found (as in LSWithRegexpMatching)
-            // keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno.
-            // ignore permission denied if ignore_access_denied_multidirectory_globs is true
-            if (!(ignore_access_denied_multidirectory_globs && errno == EACCES))
-                throw Exception(
-                    ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", path_for_ls, String(hdfsGetLastError()));
-        }
-
-        std::vector<StorageHDFS::PathWithInfo> result;
-
-        if (!ls.file_info && ls.length > 0)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
-
-        for (int i = 0; i < ls.length; ++i)
-        {
-            const String full_path = String(ls.file_info[i].mName);
-            const size_t last_slash = full_path.rfind('/');
-            const String dir_or_file_name = full_path.substr(last_slash);
-            const bool is_directory = ls.file_info[i].mKind == 'D';
-
-            if (re2::RE2::FullMatch(processed_suffix + dir_or_file_name, matcher))
+            if (*it == '{')
             {
-                if (next_slash_after_glob_pos == std::string::npos)
-                {
-                    result.emplace_back(StorageHDFS::PathWithInfo{
-                        String(ls.file_info[i].mName),
-                        StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast<size_t>(ls.file_info[i].mSize)}});
-                }
-                else
-                {
-                    std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(
-                        fs::path(full_path) / "" , fs, suffix_with_globs.substr(next_slash_after_glob_pos),
-                        ignore_access_denied_multidirectory_globs);
-                    std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
-                }
+                anchor_positions.push_back(std::distance(for_match.begin(), it));
+                opened = true;
             }
-            else if (is_directory)
+            else if (*it == '}')
             {
-                std::vector<StorageHDFS::PathWithInfo> result_part = LSWithFoldedRegexpMatching(
-                    fs::path(full_path), fs, processed_suffix + dir_or_file_name, suffix_with_globs,
-                    matcher, max_depth - 1, next_slash_after_glob_pos, ignore_access_denied_multidirectory_globs);
-                std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
+                anchor_positions.push_back(std::distance(for_match.begin(), it));
+                closed = true;
+                break;
+            }
+            else if (*it == ',')
+            {
+                if (!opened)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                    "Unexpected ''' found in path '{}' at position {}.", for_match, std::distance(for_match.begin(), it));
+                anchor_positions.push_back(std::distance(for_match.begin(), it));
             }
         }
-        return result;
+        if (!opened || !closed)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "Invalid {{}} glob in path {}.", for_match);
+
+        std::vector<StorageHDFS::PathWithInfo> ret = {};
+
+        std::string common_prefix = for_match.substr(0, anchor_positions[0]);
+        std::string common_suffix = for_match.substr(anchor_positions[anchor_positions.size()-1] + 1);
+        for (size_t i = 1; i < anchor_positions.size(); ++i)
+        {
+            std::ostringstream oss;
+            oss << common_prefix
+                << for_match.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
+                << common_suffix;
+            std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(path_for_ls, fs, oss.str());
+            ret.insert(ret.end(), result_part.begin(), result_part.end());
+        }
+        return ret;
     }
 
     /* Recursive directory listing with matched paths as a result.
@@ -161,8 +136,7 @@ namespace
     std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(
         const String & path_for_ls,
         const HDFSFSPtr & fs,
-        const String & for_match,
-        bool ignore_access_denied_multidirectory_globs)
+        const String & for_match)
     {
         const size_t first_glob_pos = for_match.find_first_of("*?{");
         const bool has_glob = first_glob_pos != std::string::npos;
@@ -171,30 +145,28 @@ namespace
         const String suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
         const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
 
-        size_t slashes_in_glob = 0;
+        bool has_curly_braces = false;
         const size_t next_slash_after_glob_pos = [&]()
         {
             if (!has_glob)
                 return suffix_with_globs.find('/', 1);
 
-            size_t in_curly = 0;
             for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
-            {
+        {
                 if (*it == '{')
-                    ++in_curly;
-                else if (*it == '/')
                 {
-                    if (in_curly)
-                        ++slashes_in_glob;
-                    else
-                        return size_t(std::distance(suffix_with_globs.begin(), it));
+                    has_curly_braces = true;
+                    return size_t(0);
                 }
-                else if (*it == '}')
-                    --in_curly;
-            }
+                else if (*it == '/')
+                    return size_t(std::distance(suffix_with_globs.begin(), it));
+        }
             return std::string::npos;
         }();
 
+        if (has_curly_braces)
+            return expandSelector(path_for_ls, fs, for_match);
+
         const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
 
         re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob));
@@ -202,12 +174,6 @@ namespace
             throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
                 "Cannot compile regex from glob ({}): {}", for_match, matcher.error());
 
-        if (slashes_in_glob)
-        {
-            return LSWithFoldedRegexpMatching(fs::path(prefix_without_globs), fs, "", suffix_with_globs, matcher,
-                                              slashes_in_glob, next_slash_after_glob_pos, ignore_access_denied_multidirectory_globs);
-        }
-
         HDFSFileInfo ls;
         ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length);
         if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
@@ -239,7 +205,7 @@ namespace
                 if (re2::RE2::FullMatch(file_name, matcher))
                 {
                     std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs,
-                        suffix_with_globs.substr(next_slash_after_glob_pos), ignore_access_denied_multidirectory_globs);
+                        suffix_with_globs.substr(next_slash_after_glob_pos));
                     /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
                     std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
                 }
@@ -267,7 +233,7 @@ namespace
         HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
         HDFSFSPtr fs = createHDFSFS(builder.get());
 
-        auto res = LSWithRegexpMatching("/", fs, path_from_uri, context->getSettingsRef().ignore_access_denied_multidirectory_globs);
+        auto res = LSWithRegexpMatching("/", fs, path_from_uri);
         return res;
     }
 }
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 419a5f080cc..dfb4358ef18 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -160,7 +160,6 @@ void expandSelector(const std::string & path_for_ls,
             << common_suffix;
         listFilesWithRegexpMatchingImpl(path_for_ls, oss.str(), total_bytes_to_read, result, recursive);
     }
-
 }
 
 /* Recursive directory listing with matched paths as a result.

From 2901eab8ccf16955b7040f7ac729f14d9d390e04 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Thu, 21 Sep 2023 03:44:51 +0000
Subject: [PATCH 197/243] avoid race condition on last_updated_time

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Common/isLocalAddress.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index ce0316db2c4..902505404a6 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -91,16 +91,19 @@ struct NetworkInterfaces : public boost::noncopyable
     {
         static constexpr int NET_INTERFACE_VALID_PERIOD_MS = 30000;
         static NetworkInterfaces nf;
-        static auto last_updated_time = std::chrono::steady_clock::now();
+        static std::atomic<std::chrono::steady_clock::time_point> last_updated_time = std::chrono::steady_clock::now();
         static std::shared_mutex nf_mtx;
 
         auto now = std::chrono::steady_clock::now();
+        auto last_updated_time_snapshot = last_updated_time.load();
 
-        if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_updated_time).count() > NET_INTERFACE_VALID_PERIOD_MS)
+        if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_updated_time_snapshot).count() > NET_INTERFACE_VALID_PERIOD_MS)
         {
             std::unique_lock lock(nf_mtx);
+            if (last_updated_time.load() != last_updated_time_snapshot) /// it's possible that last_updated_time after we get the snapshot
+                return nf;
             nf.swap(NetworkInterfaces());
-            last_updated_time = now;
+            last_updated_time.store(now);
             return nf;
         }
         else

From 7a8246ecded2161cc86df2084b8a05a1f9d7171a Mon Sep 17 00:00:00 2001
From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
Date: Thu, 21 Sep 2023 07:07:32 +0200
Subject: [PATCH 198/243] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a6e23e92d36..30ac45ced03 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,7 +31,7 @@
 * Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to input/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/outoput using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)).
 * A new field `query_cache_usage` in `system.query_log` now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)).
 * Add new function `startsWithUTF8` and `endsWithUTF8`. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)).
-* Allow variable number of columns in TSV/CustomSeprarated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)).
+* Allow variable number of columns in TSV/CustomSeparated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)).
 * Added `SYSTEM STOP/START PULLING REPLICATION LOG` queries (for testing `ReplicatedMergeTree`). [#52881](https://github.com/ClickHouse/ClickHouse/pull/52881) ([Alexander Tokmakov](https://github.com/tavplubix)).
 * Allow to execute constant non-deterministic functions in mutations on initiator. [#53129](https://github.com/ClickHouse/ClickHouse/pull/53129) ([Anton Popov](https://github.com/CurtizJ)).
 * Add input format `One` that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)).

From 13815a96bf2d04750a5509287b875f8fc475686e Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Thu, 21 Sep 2023 09:07:40 +0200
Subject: [PATCH 199/243] Fix test

---
 tests/integration/test_keeper_four_word_command/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py
index 0de7de8c3be..5419d2334c7 100644
--- a/tests/integration/test_keeper_four_word_command/test.py
+++ b/tests/integration/test_keeper_four_word_command/test.py
@@ -279,7 +279,7 @@ def test_cmd_conf(started_cluster):
         assert result["stale_log_gap"] == "10000"
         assert result["fresh_log_gap"] == "200"
 
-        assert result["max_requests_batch_size"] == "1000"
+        assert result["max_requests_batch_size"] == "100"
         assert result["max_requests_batch_bytes_size"] == "102400"
         assert result["max_flush_batch_size"] == "1000"
         assert result["max_request_queue_size"] == "100000"

From d80de4ff3afde93aa5890ee0596d2680757b893e Mon Sep 17 00:00:00 2001
From: "wangtao.2077" <wangtao.2077@bytedance.com>
Date: Mon, 4 Sep 2023 16:46:11 +0800
Subject: [PATCH 200/243] fix name clash for multi join rewrit

---
 .../JoinToSubqueryTransformVisitor.cpp        | 108 ++++++++++++++----
 ...ter_v2_handle_last_table_columns.reference |  48 ++++++++
 ..._rewriter_v2_handle_last_table_columns.sql |  47 ++++++++
 3 files changed, 183 insertions(+), 20 deletions(-)
 create mode 100644 tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.reference
 create mode 100644 tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.sql

diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
index bac82d967f2..bf2d1eb79cd 100644
--- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
+++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp
@@ -37,10 +37,13 @@ namespace
 
 /// @note we use `--` prefix for unique short names and `--.` for subqueries.
 /// It expects that user do not use names starting with `--` and column names starting with dot.
-ASTPtr makeSubqueryTemplate()
+ASTPtr makeSubqueryTemplate(const String & table_alias)
 {
     ParserTablesInSelectQueryElement parser(true);
-    ASTPtr subquery_template = parseQuery(parser, "(select * from _t) as `--.s`", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+    String query_template = "(select * from _t)";
+    if (!table_alias.empty())
+        query_template += " as " + table_alias;
+    ASTPtr subquery_template = parseQuery(parser, query_template, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
     if (!subquery_template)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse subquery template");
     return subquery_template;
@@ -232,8 +235,7 @@ struct RewriteTablesVisitorData
 {
     using TypeToVisit = ASTTablesInSelectQuery;
 
-    ASTPtr left;
-    ASTPtr right;
+    ASTs new_tables;
     bool done = false;
 
     /// @note Do not change ASTTablesInSelectQuery itself. No need to change select.tables.
@@ -241,7 +243,6 @@ struct RewriteTablesVisitorData
     {
         if (done)
             return;
-        ASTs new_tables{left, right};
         ast->children.swap(new_tables);
         done = true;
     }
@@ -483,7 +484,11 @@ struct TableNeededColumns
     /// t.x as `some`
     static void addAliasedName(const String & table, const String & column, const String & alias, ASTExpressionList & expression_list)
     {
-        auto ident = std::make_shared<ASTIdentifier>(std::vector<String>{table, column});
+        std::vector<String> name_parts;
+        if (!table.empty())
+            name_parts.push_back(table);
+        name_parts.push_back(column);
+        auto ident = std::make_shared<ASTIdentifier>(std::move(name_parts));
         ident->setAlias(alias);
         expression_list.children.emplace_back(std::move(ident));
     }
@@ -494,7 +499,8 @@ class UniqueShortNames
 public:
     /// We know that long names are unique (do not clashes with others).
     /// So we could make unique names base on this knolage by adding some unused prefix.
-    static constexpr const char * pattern = "--";
+    /// Add a heading underscore to make unique names valid for `isValidIdentifierBegin`
+    static constexpr const char * pattern = "_--";
 
     String longToShort(const String & long_name)
     {
@@ -558,8 +564,6 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
     const std::unordered_set<ASTIdentifier *> & public_identifiers,
     UniqueShortNames & unique_names)
 {
-    size_t last_table_pos = tables.size() - 1;
-
     std::vector<TableNeededColumns> needed_columns;
     needed_columns.reserve(tables.size());
     for (const auto & table : tables)
@@ -602,12 +606,9 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
                     const auto & unique_long_name = ident->name();
 
                     /// For tables moved into subselects we need unique short names for clashed names
-                    if (*table_pos != last_table_pos)
-                    {
-                        String unique_short_name = unique_names.longToShort(unique_long_name);
-                        ident->setShortName(unique_short_name);
-                        needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name);
-                    }
+                    String unique_short_name = unique_names.longToShort(unique_long_name);
+                    ident->setShortName(unique_short_name);
+                    needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name);
                 }
                 else
                 {
@@ -625,9 +626,47 @@ std::vector<TableNeededColumns> normalizeColumnNamesExtractNeeded(
                 restoreName(*ident, original_long_name, restored_names);
             }
             else if (got_alias)
-                needed_columns[*table_pos].alias_clashes.emplace(ident->shortName());
+            {
+                String short_name = ident->shortName();
+                if (!isValidIdentifierBegin(short_name.at(0)))
+                {
+                    String original_long_name;
+                    if (public_identifiers.contains(ident))
+                        original_long_name = ident->name();
+
+                    const auto & table = tables[*table_pos];
+                    IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column
+                    const auto & unique_long_name = ident->name();
+
+                    String unique_short_name = unique_names.longToShort(unique_long_name);
+                    ident->setShortName(unique_short_name);
+                    needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name);
+                    restoreName(*ident, original_long_name, restored_names);
+                }
+                else
+                    needed_columns[*table_pos].alias_clashes.emplace(ident->shortName());
+            }
             else
-                needed_columns[*table_pos].no_clashes.emplace(ident->shortName());
+            {
+                String short_name = ident->shortName();
+                if (!isValidIdentifierBegin(short_name.at(0)))
+                {
+                    String original_long_name;
+                    if (public_identifiers.contains(ident))
+                        original_long_name = ident->name();
+
+                    const auto & table = tables[*table_pos];
+                    IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column
+                    const auto & unique_long_name = ident->name();
+
+                    String unique_short_name = unique_names.longToShort(unique_long_name);
+                    ident->setShortName(unique_short_name);
+                    needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name);
+                    restoreName(*ident, original_long_name, restored_names);
+                }
+                else
+                    needed_columns[*table_pos].no_clashes.emplace(ident->shortName());
+            }
         }
     }
 
@@ -785,7 +824,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast
 
     ASTPtr left_table = src_tables[0];
 
-    static ASTPtr subquery_template = makeSubqueryTemplate();
+    static ASTPtr subquery_template = makeSubqueryTemplate("`--.s`");
 
     for (size_t i = 1; i < src_tables.size() - 1; ++i)
     {
@@ -798,7 +837,36 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast
         left_table = replaceJoin(left_table, src_tables[i], subquery);
     }
 
-    RewriteVisitor::Data visitor_data{left_table, src_tables.back()};
+    // expand the last table into a subselect, to resolve alias clashes inside it
+    static ASTPtr last_select_template = makeSubqueryTemplate("`--.t`");
+    auto last_select = last_select_template->clone();
+    {
+        auto expression_list = std::make_shared<ASTExpressionList>();
+        needed_columns[src_tables.size() - 1].fillExpressionList(*expression_list);
+
+        SubqueryExpressionsRewriteVisitor::Data expr_rewrite_data{std::move(expression_list)};
+        SubqueryExpressionsRewriteVisitor(expr_rewrite_data).visit(last_select);
+
+        // move ASTTableJoin out of subquery
+        auto * last_table_elem = src_tables.back()->as<ASTTablesInSelectQueryElement>();
+        auto * last_select_elem = last_select->as<ASTTablesInSelectQueryElement>();
+        if (!last_table_elem || !last_select_elem)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Two TablesInSelectQueryElements expected");
+
+        if (!last_table_elem->table_join)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join expected");
+
+        last_select_elem->table_join = std::move(last_table_elem->table_join);
+        last_select_elem->children.emplace_back(last_select_elem->table_join);
+        last_table_elem->children.erase(
+            std::remove(last_table_elem->children.begin(), last_table_elem->children.end(), last_select_elem->table_join),
+            last_table_elem->children.end());
+
+        RewriteVisitor::Data visitor_data{{src_tables.back()}};
+        RewriteVisitor(visitor_data).visit(last_select);
+    }
+
+    RewriteVisitor::Data visitor_data{{left_table, last_select}};
     RewriteVisitor(visitor_data).visit(select.refTables());
 
     data.done = true;
@@ -815,7 +883,7 @@ ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTPtr ast_left, ASTPtr ast_r
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join expected");
 
     /// replace '_t' with pair of joined tables
-    RewriteVisitor::Data visitor_data{ast_left, ast_right};
+    RewriteVisitor::Data visitor_data{{ast_left, ast_right}};
     RewriteVisitor(visitor_data).visit(subquery_template);
     return subquery_template;
 }
diff --git a/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.reference b/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.reference
new file mode 100644
index 00000000000..76294a66748
--- /dev/null
+++ b/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.reference
@@ -0,0 +1,48 @@
+-- { echo }
+
+-- no clash name
+SELECT
+  c + 1,
+  Z.c + 1
+FROM
+  (SELECT 10 a) X
+CROSS JOIN
+  (SELECT 20 b) Y
+CROSS JOIN
+  (SELECT 30 c) Z;
+31	31
+-- alias clash
+SELECT
+  (a + 1) AS c,
+  Z.c + 1
+FROM
+  (SELECT 10 a) X
+CROSS JOIN
+  (SELECT 20 b) Y
+CROSS JOIN
+  (SELECT 30 c) Z;
+11	31
+-- column clash
+SELECT
+  (X.c + 1) AS c,
+  Z.c + 1
+FROM
+  (SELECT 10 c) X
+CROSS JOIN
+  (SELECT 20 b) Y
+CROSS JOIN
+  (SELECT 30 c) Z;
+11	31
+SELECT
+   (X.a + 1) AS a,
+   (Y.a + 1) AS Y_a,
+   (Z.a + 1) AS Z_a,
+   (Y.b + 1) AS b,
+   (Z.b + 1) AS Z_b
+FROM
+  (SELECT 10 a) X
+CROSS JOIN
+  (SELECT 20 a, 21 as b) Y
+CROSS JOIN
+  (SELECT 30 a, 31 as b, 32 as c) Z;
+11	21	31	22	32
diff --git a/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.sql b/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.sql
new file mode 100644
index 00000000000..15a1d36c082
--- /dev/null
+++ b/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.sql
@@ -0,0 +1,47 @@
+-- { echo }
+
+-- no clash name
+SELECT
+  c + 1,
+  Z.c + 1
+FROM
+  (SELECT 10 a) X
+CROSS JOIN
+  (SELECT 20 b) Y
+CROSS JOIN
+  (SELECT 30 c) Z;
+
+-- alias clash
+SELECT
+  (a + 1) AS c,
+  Z.c + 1
+FROM
+  (SELECT 10 a) X
+CROSS JOIN
+  (SELECT 20 b) Y
+CROSS JOIN
+  (SELECT 30 c) Z;
+
+-- column clash
+SELECT
+  (X.c + 1) AS c,
+  Z.c + 1
+FROM
+  (SELECT 10 c) X
+CROSS JOIN
+  (SELECT 20 b) Y
+CROSS JOIN
+  (SELECT 30 c) Z;
+
+SELECT
+   (X.a + 1) AS a,
+   (Y.a + 1) AS Y_a,
+   (Z.a + 1) AS Z_a,
+   (Y.b + 1) AS b,
+   (Z.b + 1) AS Z_b
+FROM
+  (SELECT 10 a) X
+CROSS JOIN
+  (SELECT 20 a, 21 as b) Y
+CROSS JOIN
+  (SELECT 30 a, 31 as b, 32 as c) Z;

From 4030ae24fe7d1d0353627987ef5d800a715995a7 Mon Sep 17 00:00:00 2001
From: "wangtao.2077" <wangtao.2077@bytedance.com>
Date: Wed, 20 Sep 2023 14:14:30 +0800
Subject: [PATCH 201/243] fix test

---
 .../0_stateless/02420_final_setting.reference    | 16 ++++++++--------
 .../02420_final_setting_analyzer.reference       | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tests/queries/0_stateless/02420_final_setting.reference b/tests/queries/0_stateless/02420_final_setting.reference
index 42acc78c57e..63b09df66cf 100644
--- a/tests/queries/0_stateless/02420_final_setting.reference
+++ b/tests/queries/0_stateless/02420_final_setting.reference
@@ -66,7 +66,7 @@ explain syntax select left_table.id,val_left, val_middle, val_right from left_ta
                                                               inner join right_table on middle_table.id = right_table.id
 ORDER BY left_table.id, val_left, val_middle, val_right;
 SELECT
-    `--left_table.id` AS `left_table.id`,
+    `_--left_table.id` AS `left_table.id`,
     val_left,
     val_middle,
     val_right
@@ -74,9 +74,9 @@ FROM
 (
     SELECT
         val_left,
-        id AS `--left_table.id`,
+        id AS `_--left_table.id`,
         val_middle,
-        middle_table.id AS `--middle_table.id`
+        middle_table.id AS `_--middle_table.id`
     FROM left_table
     FINAL
     ALL INNER JOIN
@@ -85,18 +85,18 @@ FROM
             id,
             val_middle
         FROM middle_table
-    ) AS middle_table ON `--left_table.id` = `--middle_table.id`
+    ) AS middle_table ON `_--left_table.id` = `_--middle_table.id`
 ) AS `--.s`
 ALL INNER JOIN
 (
     SELECT
-        id,
-        val_right
+        val_right,
+        id AS `_--right_table.id`
     FROM right_table
     FINAL
-) AS right_table ON `--middle_table.id` = id
+) AS `--.t` ON `_--middle_table.id` = `_--right_table.id`
 ORDER BY
-    `--left_table.id` ASC,
+    `_--left_table.id` ASC,
     val_left ASC,
     val_middle ASC,
     val_right ASC
diff --git a/tests/queries/0_stateless/02420_final_setting_analyzer.reference b/tests/queries/0_stateless/02420_final_setting_analyzer.reference
index 9a03c484765..dd9fed65f13 100644
--- a/tests/queries/0_stateless/02420_final_setting_analyzer.reference
+++ b/tests/queries/0_stateless/02420_final_setting_analyzer.reference
@@ -67,7 +67,7 @@ explain syntax select left_table.id,val_left, val_middle, val_right from left_ta
                                                                              inner join right_table on middle_table.id = right_table.id
                ORDER BY left_table.id, val_left, val_middle, val_right;
 SELECT
-    `--left_table.id` AS `left_table.id`,
+    `_--left_table.id` AS `left_table.id`,
     val_left,
     val_middle,
     val_right
@@ -75,9 +75,9 @@ FROM
 (
     SELECT
         val_left,
-        id AS `--left_table.id`,
+        id AS `_--left_table.id`,
         val_middle,
-        middle_table.id AS `--middle_table.id`
+        middle_table.id AS `_--middle_table.id`
     FROM left_table
     FINAL
     ALL INNER JOIN
@@ -86,18 +86,18 @@ FROM
             id,
             val_middle
         FROM middle_table
-    ) AS middle_table ON `--left_table.id` = `--middle_table.id`
+    ) AS middle_table ON `_--left_table.id` = `_--middle_table.id`
 ) AS `--.s`
 ALL INNER JOIN
 (
     SELECT
-        id,
-        val_right
+        val_right,
+        id AS `_--right_table.id`
     FROM right_table
     FINAL
-) AS right_table ON `--middle_table.id` = id
+) AS `--.t` ON `_--middle_table.id` = `_--right_table.id`
 ORDER BY
-    `--left_table.id` ASC,
+    `_--left_table.id` ASC,
     val_left ASC,
     val_middle ASC,
     val_right ASC

From c2a593baf93b216edbedb1c468a92e1bb4c1c7f7 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 19 Sep 2023 18:52:46 +0000
Subject: [PATCH 202/243] Split tests

---
 .../mergetree-family/annindexes.md            |   4 +-
 .../MergeTree/MergeTreeIndexAnnoy.cpp         |  10 +-
 .../MergeTree/MergeTreeIndexUSearch.cpp       |   6 +-
 .../0_stateless/02354_annoy_index.reference   | 150 ---------
 .../queries/0_stateless/02354_annoy_index.sql | 295 ------------------
 .../0_stateless/02354_usearch_index.reference | 153 ---------
 .../0_stateless/02354_usearch_index.sql       | 290 -----------------
 .../02354_vector_search_bugs.reference        |  17 +
 .../0_stateless/02354_vector_search_bugs.sql  | 145 +++++++++
 ...ector_search_default_granularity.reference |   7 +
 ...2354_vector_search_default_granularity.sql |  35 +++
 ...r_search_index_creation_negative.reference |   8 +
 ..._vector_search_index_creation_negative.sql |  63 ++++
 .../02354_vector_search_queries.reference     | 196 ++++++++++++
 .../02354_vector_search_queries.sql           | 240 ++++++++++++++
 15 files changed, 725 insertions(+), 894 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02354_annoy_index.reference
 delete mode 100644 tests/queries/0_stateless/02354_annoy_index.sql
 delete mode 100644 tests/queries/0_stateless/02354_usearch_index.reference
 delete mode 100644 tests/queries/0_stateless/02354_usearch_index.sql
 create mode 100644 tests/queries/0_stateless/02354_vector_search_bugs.reference
 create mode 100644 tests/queries/0_stateless/02354_vector_search_bugs.sql
 create mode 100644 tests/queries/0_stateless/02354_vector_search_default_granularity.reference
 create mode 100644 tests/queries/0_stateless/02354_vector_search_default_granularity.sql
 create mode 100644 tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
 create mode 100644 tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
 create mode 100644 tests/queries/0_stateless/02354_vector_search_queries.reference
 create mode 100644 tests/queries/0_stateless/02354_vector_search_queries.sql

diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md
index d6ff7f23bb4..1f87c4eb3d4 100644
--- a/docs/en/engines/table-engines/mergetree-family/annindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md
@@ -205,8 +205,8 @@ more accurate search results but slower index creation / query times (approximat
 :::note
 Indexes over columns of type `Array` will generally work faster than indexes on `Tuple` columns. All arrays must have same length. To avoid
 errors, you can use a [CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT
-constraint_name_1 CHECK length(vectors) = 256`. Also, unspecified `Array` values in INSERT statements (i.e. default values) are not
-supported.
+constraint_name_1 CHECK length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default
+values) are not supported.
 :::
 
 Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
index 3eec8614dcd..d2ad7e7fefd 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
@@ -155,11 +155,15 @@ void MergeTreeIndexAggregatorAnnoy<Distance>::update(const Block & block, size_t
     if (const auto & column_array = typeid_cast<const ColumnArray *>(column_cut.get()))
     {
         const auto & column_array_data = column_array->getData();
-        const auto & column_arary_data_float_data = typeid_cast<const ColumnFloat32 &>(column_array_data).getData();
+        const auto & column_array_data_float = typeid_cast<const ColumnFloat32 &>(column_array_data);
+        const auto & column_array_data_float_data = column_array_data_float.getData();
 
         const auto & column_array_offsets = column_array->getOffsets();
         const size_t num_rows = column_array_offsets.size();
 
+        if (column_array->empty())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Array is unexpectedly empty");
+
         /// The Annoy algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays
         /// are INSERTed into an Annoy-indexed column or if no value was specified at all in which case the arrays take on their default
         /// value which is also empty.
@@ -182,9 +186,9 @@ void MergeTreeIndexAggregatorAnnoy<Distance>::update(const Block & block, size_t
             index = std::make_shared<AnnoyIndexWithSerialization<Distance>>(dimension);
 
         /// Add all rows of block
-        index->add_item(index->get_n_items(), column_arary_data_float_data.data());
+        index->add_item(index->get_n_items(), column_array_data_float_data.data());
         for (size_t current_row = 1; current_row < num_rows; ++current_row)
-            index->add_item(index->get_n_items(), &column_arary_data_float_data[column_array_offsets[current_row - 1]]);
+            index->add_item(index->get_n_items(), &column_array_data_float_data[column_array_offsets[current_row - 1]]);
     }
     else if (const auto & column_tuple = typeid_cast<const ColumnTuple *>(column_cut.get()))
     {
diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
index 009c004faea..45595305abd 100644
--- a/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.cpp
@@ -174,11 +174,15 @@ void MergeTreeIndexAggregatorUSearch<Metric>::update(const Block & block, size_t
     if (const auto & column_array = typeid_cast<const ColumnArray *>(column_cut.get()))
     {
         const auto & column_array_data = column_array->getData();
-        const auto & column_array_data_float_data = typeid_cast<const ColumnFloat32 &>(column_array_data).getData();
+        const auto & column_array_data_float = typeid_cast<const ColumnFloat32 &>(column_array_data);
+        const auto & column_array_data_float_data = column_array_data_float.getData();
 
         const auto & column_array_offsets = column_array->getOffsets();
         const size_t num_rows = column_array_offsets.size();
 
+        if (column_array->empty())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Array is unexpectedly empty");
+
         /// The Usearch algorithm naturally assumes that the indexed vectors have dimension >= 1. This condition is violated if empty arrays
         /// are INSERTed into an Usearch-indexed column or if no value was specified at all in which case the arrays take on their default
         /// values which is also empty.
diff --git a/tests/queries/0_stateless/02354_annoy_index.reference b/tests/queries/0_stateless/02354_annoy_index.reference
deleted file mode 100644
index 81f2ff8aa59..00000000000
--- a/tests/queries/0_stateless/02354_annoy_index.reference
+++ /dev/null
@@ -1,150 +0,0 @@
---- Negative tests ---
---- Test default GRANULARITY (should be 100 mio. for annoy)---
-CREATE TABLE default.tab\n(\n    `id` Int32,\n    `vector` Array(Float32),\n    INDEX annoy_index vector TYPE annoy GRANULARITY 100000000\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
-CREATE TABLE default.tab\n(\n    `id` Int32,\n    `vector` Array(Float32),\n    INDEX annoy_index vector TYPE annoy GRANULARITY 100000000\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
---- Test with Array, GRANULARITY = 1, index_granularity = 5 ---
-WHERE type, L2Distance, check that index is used
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    ReadFromMergeTree (default.tab)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 3/3
-      Skip
-        Name: annoy_index
-        Description: annoy GRANULARITY 1
-        Parts: 1/1
-        Granules: 1/3
-ORDER BY type, L2Distance, check that index is used
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 3/3
-          Skip
-            Name: annoy_index
-            Description: annoy GRANULARITY 1
-            Parts: 1/1
-            Granules: 3/3
-Reference ARRAYs with non-matching dimension are rejected
-Special case: MaximumDistance is negative
-WHERE type, L2Distance
-Special case: setting annoy_index_search_k_nodes
-Special case: setting max_limit_for_ann_queries
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 3/3
---- Test with Tuple, GRANULARITY = 1, index_granularity = 5 ---
-WHERE type, L2Distance, check that index is used
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    ReadFromMergeTree (default.tab)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 3/3
-      Skip
-        Name: annoy_index
-        Description: annoy GRANULARITY 1
-        Parts: 1/1
-        Granules: 1/3
-ORDER BY type, L2Distance, check that index is used
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 3/3
-          Skip
-            Name: annoy_index
-            Description: annoy GRANULARITY 1
-            Parts: 1/1
-            Granules: 3/3
---- Test non-default metric (cosine distance) + non-default NumTrees (200) ---
---- Test with Array, GRANULARITY = 2, index_granularity = 4 ---
-WHERE type, L2Distance, check that index is used
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    ReadFromMergeTree (default.tab)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 4/4
-      Skip
-        Name: annoy_index
-        Description: annoy GRANULARITY 2
-        Parts: 0/1
-        Granules: 0/4
-ORDER BY type, L2Distance, check that index is used
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 4/4
-          Skip
-            Name: annoy_index
-            Description: annoy GRANULARITY 2
-            Parts: 1/1
-            Granules: 2/4
---- Test with Array, GRANULARITY = 4, index_granularity = 4 ---
-WHERE type, L2Distance, check that index is used
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    ReadFromMergeTree (default.tab)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 4/4
-      Skip
-        Name: annoy_index
-        Description: annoy GRANULARITY 4
-        Parts: 0/1
-        Granules: 0/4
-ORDER BY type, L2Distance, check that index is used
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 4/4
-          Skip
-            Name: annoy_index
-            Description: annoy GRANULARITY 4
-            Parts: 1/1
-            Granules: 1/4
---- Test correctness of Annoy index with > 1 mark
-1	[1,0,0,0]
-9000	[9000,0,0,0]
-1	(1,0,0,0)
-9000	(9000,0,0,0)
---- Bugs ---
diff --git a/tests/queries/0_stateless/02354_annoy_index.sql b/tests/queries/0_stateless/02354_annoy_index.sql
deleted file mode 100644
index 67ef64cc301..00000000000
--- a/tests/queries/0_stateless/02354_annoy_index.sql
+++ /dev/null
@@ -1,295 +0,0 @@
--- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan
-
-SET allow_experimental_annoy_index = 1;
-SET allow_experimental_analyzer = 0;
-
-SELECT '--- Negative tests ---';
-
-DROP TABLE IF EXISTS tab;
-
--- must have at most 2 arguments
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy('too', 'many', 'arguments')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
-
--- first argument (distance_function) must be String
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
-
--- 2nd argument (number of trees) must be UInt64
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy('L2Distance', 'not an UInt64')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
-
--- must be created on single column
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index (vector, id) TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
-
--- reject unsupported distance functions
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy('wormholeDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
-
--- must be created on Array/Tuple(Float32) columns
-SET allow_suspicious_low_cardinality_types = 1;
-CREATE TABLE tab(id Int32, vector Float32, INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vector Array(Float64), INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vector Tuple(Float64), INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vector LowCardinality(Float32), INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vector Nullable(Float32), INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-
-SELECT '--- Test default GRANULARITY (should be 100 mio. for annoy)---';
-
-CREATE TABLE tab (id Int32, vector Array(Float32), INDEX annoy_index(vector) TYPE annoy) ENGINE=MergeTree ORDER BY id;
-SHOW CREATE TABLE tab;
-DROP TABLE tab;
-
-CREATE TABLE tab (id Int32, vector Array(Float32)) ENGINE=MergeTree ORDER BY id;
-ALTER TABLE tab ADD INDEX annoy_index(vector) TYPE annoy;
-SHOW CREATE TABLE tab;
-
-DROP TABLE tab;
-
-SELECT '--- Test with Array, GRANULARITY = 1, index_granularity = 5 ---';
-
-DROP TABLE IF EXISTS tab;
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy() GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5;
-INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
-
--- rows = 15, index_granularity = 5, GRANULARITY = 1 gives 3 annoy-indexed blocks (each comprising a single granule)
--- condition 'L2Distance(vector, reference_vector) < 1.0' ensures that only one annoy-indexed block produces results --> "Granules: 1/3"
-
--- See (*) why commented out
--- SELECT 'WHERE type, L2Distance';
--- SELECT *
--- FROM tab
--- WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0
--- LIMIT 3;
-
-SELECT 'WHERE type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0
-LIMIT 3;
-
--- See (*) why commented out
--- SELECT 'ORDER BY type, L2Distance';
--- SELECT *
--- FROM tab
--- ORDER BY L2Distance(vector, [0.0, 0.0, 10.0])
--- LIMIT 3;
-
-SELECT 'ORDER BY type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [0.0, 0.0, 10.0])
-LIMIT 3;
-
--- Test special cases. Corresponding special case tests are omitted from later tests.
-
-SELECT 'Reference ARRAYs with non-matching dimension are rejected';
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [0.0, 0.0])
-LIMIT 3; -- { serverError INCORRECT_QUERY }
-
-SELECT 'Special case: MaximumDistance is negative';
-SELECT 'WHERE type, L2Distance';
-SELECT *
-FROM tab
-WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < -1.0
-LIMIT 3; -- { serverError INCORRECT_QUERY }
-
-SELECT 'Special case: setting annoy_index_search_k_nodes';
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [5.3, 7.3, 2.1])
-LIMIT 3
-SETTINGS annoy_index_search_k_nodes=0; -- searches zero nodes --> no results
-
-SELECT 'Special case: setting max_limit_for_ann_queries';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [5.3, 7.3, 2.1])
-LIMIT 3
-SETTINGS max_limit_for_ann_queries=2; -- doesn't use the ann index
-
-DROP TABLE tab;
-
--- Test Tuple embeddings. Triggers different logic than Array inside MergeTreeIndexAnnoy but the same logic as Array above MergeTreeIndexAnnoy.
--- Therefore test Tuple case just once.
-
-SELECT '--- Test with Tuple, GRANULARITY = 1, index_granularity = 5 ---';
-
-CREATE TABLE tab(id Int32, vector Tuple(Float32, Float32, Float32), INDEX annoy_index vector TYPE annoy() GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5;
-INSERT INTO tab VALUES (1, (0.0, 0.0, 10.0)), (2, (0.0, 0.0, 10.5)), (3, (0.0, 0.0, 9.5)), (4, (0.0, 0.0, 9.7)), (5, (0.0, 0.0, 10.2)), (6, (10.0, 0.0, 0.0)), (7, (9.5, 0.0, 0.0)), (8, (9.7, 0.0, 0.0)), (9, (10.2, 0.0, 0.0)), (10, (10.5, 0.0, 0.0)), (11, (0.0, 10.0, 0.0)), (12, (0.0, 9.5, 0.0)), (13, (0.0, 9.7, 0.0)), (14, (0.0, 10.2, 0.0)), (15, (0.0, 10.5, 0.0));
-
--- See (*) why commented out
--- SELECT 'WHERE type, L2Distance';
--- SELECT *
--- FROM tab
--- WHERE L2Distance(vector, (0.0, 0.0, 10.0)) < 1.0
--- LIMIT 3;
-
-SELECT 'WHERE type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-WHERE L2Distance(vector, (0.0, 0.0, 10.0)) < 1.0
-LIMIT 3;
-
--- See (*) why commented out
--- SELECT 'ORDER BY type, L2Distance';
--- SELECT *
--- FROM tab
--- ORDER BY L2Distance(vector, (0.0, 0.0, 10.0))
--- LIMIT 3;
-
-SELECT 'ORDER BY type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, (0.0, 0.0, 10.0))
-LIMIT 3;
-
-DROP TABLE tab;
-
--- Not a systematic test, just to make sure no bad things happen
-SELECT '--- Test non-default metric (cosine distance) + non-default NumTrees (200) ---';
-
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy('cosineDistance', 200) GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5;
-INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
-
--- See (*) why commented out
--- SELECT 'WHERE type, L2Distance';
--- SELECT *
--- FROM tab
--- WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0
--- LIMIT 3;
-
--- See (*) why commented out
--- SELECT 'ORDER BY type, L2Distance';
--- SELECT *
--- FROM tab
--- ORDER BY L2Distance(vector, [0.0, 0.0, 10.0])
--- LIMIT 3;
-
-DROP TABLE tab;
-
-SELECT '--- Test with Array, GRANULARITY = 2, index_granularity = 4 ---';
-
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 4;
-INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0, 0.0]), (2, [0.0, 0.0, 10.5, 0.0]), (3, [0.0, 0.0, 9.5, 0.0]), (4, [0.0, 0.0, 9.7, 0.0]), (5, [10.0, 0.0, 0.0, 0.0]), (6, [9.5, 0.0, 0.0, 0.0]), (7, [9.7, 0.0, 0.0, 0.0]), (8, [10.2, 0.0, 0.0, 0.0]), (9, [0.0, 10.0, 0.0, 0.0]), (10, [0.0, 9.5, 0.0, 0.0]), (11, [0.0, 9.7, 0.0, 0.0]), (12, [0.0, 9.7, 0.0, 0.0]), (13, [0.0, 0.0, 0.0, 10.3]), (14, [0.0, 0.0, 0.0, 9.5]), (15, [0.0, 0.0, 0.0, 10.0]), (16, [0.0, 0.0, 0.0, 10.5]);
-
--- rows = 16, index_granularity = 4, GRANULARITY = 2 gives 2 annoy-indexed blocks (each comprising two granules)
--- condition 'L2Distance(vector, reference_vector) < 1.0' ensures that only one annoy-indexed block produces results --> "Granules: 2/4"
-
--- See (*) why commented out
--- SELECT 'WHERE type, L2Distance';
--- SELECT *
--- FROM tab
--- WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0
--- LIMIT 3;
-
-SELECT 'WHERE type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0
-LIMIT 3;
-
--- See (*) why commented out
--- SELECT 'ORDER BY type, L2Distance';
--- SELECT *
--- FROM tab
--- ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0])
--- LIMIT 3;
-
-SELECT 'ORDER BY type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0])
-LIMIT 3;
-
-DROP TABLE tab;
-
-SELECT '--- Test with Array, GRANULARITY = 4, index_granularity = 4 ---';
-
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy() GRANULARITY 4) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 4;
-INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0, 0.0]), (2, [0.0, 0.0, 10.5, 0.0]), (3, [0.0, 0.0, 9.5, 0.0]), (4, [0.0, 0.0, 9.7, 0.0]), (5, [10.0, 0.0, 0.0, 0.0]), (6, [9.5, 0.0, 0.0, 0.0]), (7, [9.7, 0.0, 0.0, 0.0]), (8, [10.2, 0.0, 0.0, 0.0]), (9, [0.0, 10.0, 0.0, 0.0]), (10, [0.0, 9.5, 0.0, 0.0]), (11, [0.0, 9.7, 0.0, 0.0]), (12, [0.0, 9.7, 0.0, 0.0]), (13, [0.0, 0.0, 0.0, 10.3]), (14, [0.0, 0.0, 0.0, 9.5]), (15, [0.0, 0.0, 0.0, 10.0]), (16, [0.0, 0.0, 0.0, 10.5]);
-
--- rows = 16, index_granularity = 4, GRANULARITY = 4 gives a single annoy-indexed block (comprising all granules)
--- no two matches happen to be located in the same granule, so with LIMIT = 3, we'll get "Granules: 2/4"
-
--- See (*) why commented out
--- SELECT 'WHERE type, L2Distance';
--- SELECT *
--- FROM tab
--- WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0
--- LIMIT 3;
-
-SELECT 'WHERE type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0
-LIMIT 3;
-
--- See (*) why commented out
--- SELECT 'ORDER BY type, L2Distance';
--- SELECT *
--- FROM tab
--- ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0])
--- LIMIT 3;
-
-SELECT 'ORDER BY type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0])
-LIMIT 3;
-
-DROP TABLE tab;
-
--- (*) Storage and search in Annoy indexes is inherently random. Tests which check for exact row matches would be unstable. Therefore,
--- comment them out.
-
-SELECT '--- Test correctness of Annoy index with > 1 mark';
-
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug
-INSERT INTO tab SELECT number, [toFloat32(number), 0., 0., 0.] from numbers(10000);
-
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [1.0, 0.0, 0.0, 0.0])
-LIMIT 1;
-
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [9000.0, 0.0, 0.0, 0.0])
-LIMIT 1;
-
-DROP TABLE tab;
-
-CREATE TABLE tab(id Int32, vector Tuple(Float32, Float32, Float32, Float32), INDEX annoy_index vector TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug
-INSERT INTO tab SELECT number, (toFloat32(number), 0., 0., 0.) from numbers(10000);
-
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, (1.0, 0.0, 0.0, 0.0))
-LIMIT 1;
-
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, (9000.0, 0.0, 0.0, 0.0))
-LIMIT 1;
-
-DROP TABLE tab;
-
-SELECT '--- Bugs ---';
-
--- Arrays with default values are rejected, issue #52258
-CREATE TABLE tab (`uuid` String, `vector` Array(Float32), `version` UInt32, INDEX idx vector TYPE annoy()) ENGINE = MergeTree() ORDER BY (uuid);
-INSERT INTO tab (uuid, version) VALUES ('1', 3); -- { serverError INCORRECT_DATA }
-DROP TABLE tab;
-
--- Tuples with default value work
-CREATE TABLE tab (`uuid` String, `vector` Tuple(Float32, Float32), `version` UInt32, INDEX idx vector TYPE annoy()) ENGINE = MergeTree() ORDER BY (uuid);
-INSERT INTO tab (uuid, version) VALUES ('1', 3); -- works fine
-DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_usearch_index.reference b/tests/queries/0_stateless/02354_usearch_index.reference
deleted file mode 100644
index c2791e99a54..00000000000
--- a/tests/queries/0_stateless/02354_usearch_index.reference
+++ /dev/null
@@ -1,153 +0,0 @@
---- Negative tests ---
---- Test default GRANULARITY (should be 100 mio. for usearch)---
-CREATE TABLE default.tab\n(\n    `id` Int32,\n    `vector` Array(Float32),\n    INDEX usearch_index vector TYPE usearch GRANULARITY 100000000\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
-CREATE TABLE default.tab\n(\n    `id` Int32,\n    `vector` Array(Float32),\n    INDEX usearch_index vector TYPE usearch GRANULARITY 100000000\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192
---- Test with Array, GRANULARITY = 1, index_granularity = 5 ---
-WHERE type, L2Distance, check that index is used
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    ReadFromMergeTree (default.tab)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 3/3
-      Skip
-        Name: usearch_index
-        Description: usearch GRANULARITY 1
-        Parts: 1/1
-        Granules: 1/3
-ORDER BY type, L2Distance, check that index is used
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 3/3
-          Skip
-            Name: usearch_index
-            Description: usearch GRANULARITY 1
-            Parts: 1/1
-            Granules: 3/3
-Reference ARRAYs with non-matching dimension are rejected
-Special case: MaximumDistance is negative
-WHERE type, L2Distance
-Special case: setting max_limit_for_ann_queries
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 3/3
---- Test with Tuple, GRANULARITY = 1, index_granularity = 5 ---
-WHERE type, L2Distance, check that index is used
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    ReadFromMergeTree (default.tab)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 3/3
-      Skip
-        Name: usearch_index
-        Description: usearch GRANULARITY 1
-        Parts: 1/1
-        Granules: 1/3
-ORDER BY type, L2Distance, check that index is used
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 3/3
-          Skip
-            Name: usearch_index
-            Description: usearch GRANULARITY 1
-            Parts: 1/1
-            Granules: 3/3
---- Test non-default metric (cosine distance) ---
---- Test with Array, GRANULARITY = 2, index_granularity = 4 ---
-WHERE type, L2Distance, check that index is used
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    ReadFromMergeTree (default.tab)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 4/4
-      Skip
-        Name: usearch_index
-        Description: usearch GRANULARITY 2
-        Parts: 0/1
-        Granules: 0/4
-ORDER BY type, L2Distance, check that index is used
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 4/4
-          Skip
-            Name: usearch_index
-            Description: usearch GRANULARITY 2
-            Parts: 1/1
-            Granules: 2/4
---- Test with Array, GRANULARITY = 4, index_granularity = 4 ---
-WHERE type, L2Distance, check that index is used
-Expression ((Projection + Before ORDER BY))
-  Limit (preliminary LIMIT (without OFFSET))
-    ReadFromMergeTree (default.tab)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 4/4
-      Skip
-        Name: usearch_index
-        Description: usearch GRANULARITY 4
-        Parts: 0/1
-        Granules: 0/4
-ORDER BY type, L2Distance, check that index is used
-Expression (Projection)
-  Limit (preliminary LIMIT (without OFFSET))
-    Sorting (Sorting for ORDER BY)
-      Expression (Before ORDER BY)
-        ReadFromMergeTree (default.tab)
-        Indexes:
-          PrimaryKey
-            Condition: true
-            Parts: 1/1
-            Granules: 4/4
-          Skip
-            Name: usearch_index
-            Description: usearch GRANULARITY 4
-            Parts: 1/1
-            Granules: 1/4
---- Test correctness of Usearch index with > 1 mark
-1	[1,0,0,0]
-9000	[9000,0,0,0]
-1	(1,0,0,0)
-9000	(9000,0,0,0)
---- Test quantization ---
-1	[0,0,10]
-2	[0,0,10.5]
-3	[0,0,9.5]
---- Bugs ---
diff --git a/tests/queries/0_stateless/02354_usearch_index.sql b/tests/queries/0_stateless/02354_usearch_index.sql
deleted file mode 100644
index fc2954d6c5d..00000000000
--- a/tests/queries/0_stateless/02354_usearch_index.sql
+++ /dev/null
@@ -1,290 +0,0 @@
--- Tags: no-fasttest
--- no-fasttest because needs usearch lib
-
-SET allow_experimental_usearch_index = 1;
-SET allow_experimental_analyzer = 0;
-
-SELECT '--- Negative tests ---';
-
-DROP TABLE IF EXISTS tab;
-
--- must have at most 2 arguments
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('too', 'many', 'args')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
-
--- first argument (distance_function) must be String
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
-
--- must be created on single column
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index (vector, id) TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
-
--- reject unsupported distance functions
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('wormholeDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
-
--- reject unsupported distance functions
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('L2Distance', 'invalid')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
-
--- must be created on Array/Tuple(Float32) columns
-SET allow_suspicious_low_cardinality_types = 1;
-CREATE TABLE tab(id Int32, vector Float32, INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vector Array(Float64), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vector Tuple(Float64), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vector LowCardinality(Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-CREATE TABLE tab(id Int32, vector Nullable(Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
-
-SELECT '--- Test default GRANULARITY (should be 100 mio. for usearch)---';
-
-CREATE TABLE tab (id Int32, vector Array(Float32), INDEX usearch_index(vector) TYPE usearch) ENGINE=MergeTree ORDER BY id;
-SHOW CREATE TABLE tab;
-DROP TABLE tab;
-
-CREATE TABLE tab (id Int32, vector Array(Float32)) ENGINE=MergeTree ORDER BY id;
-ALTER TABLE tab ADD INDEX usearch_index(vector) TYPE usearch;
-SHOW CREATE TABLE tab;
-
-DROP TABLE tab;
-
-
-SELECT '--- Test with Array, GRANULARITY = 1, index_granularity = 5 ---';
-
-DROP TABLE IF EXISTS tab;
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5;
-INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
-
--- rows = 15, index_granularity = 5, GRANULARITY = 1 gives 3 usearch-indexed blocks (each comprising a single granule)
--- condition 'L2Distance(vector, reference_vector) < 1.0' ensures that only one usearch-indexed block produces results --> "Granules: 1/3"
-
--- SELECT 'WHERE type, L2Distance';
--- SELECT *
--- FROM tab
--- WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0
--- LIMIT 3;
-
-SELECT 'WHERE type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0
-LIMIT 3;
-
--- SELECT 'ORDER BY type, L2Distance';
--- SELECT *
--- FROM tab
--- ORDER BY L2Distance(vector, [0.0, 0.0, 10.0])
--- LIMIT 3;
-
-SELECT 'ORDER BY type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [0.0, 0.0, 10.0])
-LIMIT 3;
-
--- Test special cases. Corresponding special case tests are omitted from later tests.
-
-SELECT 'Reference ARRAYs with non-matching dimension are rejected';
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [0.0, 0.0])
-LIMIT 3; -- { serverError INCORRECT_QUERY }
-
-SELECT 'Special case: MaximumDistance is negative';
-SELECT 'WHERE type, L2Distance';
-SELECT *
-FROM tab
-WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < -1.0
-LIMIT 3; -- { serverError INCORRECT_QUERY }
-
-SELECT 'Special case: setting max_limit_for_ann_queries';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [5.3, 7.3, 2.1])
-LIMIT 3
-SETTINGS max_limit_for_ann_queries=2; -- doesn't use the ann index
-
-DROP TABLE tab;
-
--- Test Tuple embeddings. Triggers different logic than Array inside MergeTreeIndexUSearch but the same logic as Array above MergeTreeIndexusearch.
--- Therefore test Tuple case just once.
-
-SELECT '--- Test with Tuple, GRANULARITY = 1, index_granularity = 5 ---';
-
-CREATE TABLE tab(id Int32, vector Tuple(Float32, Float32, Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5;
-INSERT INTO tab VALUES (1, (0.0, 0.0, 10.0)), (2, (0.0, 0.0, 10.5)), (3, (0.0, 0.0, 9.5)), (4, (0.0, 0.0, 9.7)), (5, (0.0, 0.0, 10.2)), (6, (10.0, 0.0, 0.0)), (7, (9.5, 0.0, 0.0)), (8, (9.7, 0.0, 0.0)), (9, (10.2, 0.0, 0.0)), (10, (10.5, 0.0, 0.0)), (11, (0.0, 10.0, 0.0)), (12, (0.0, 9.5, 0.0)), (13, (0.0, 9.7, 0.0)), (14, (0.0, 10.2, 0.0)), (15, (0.0, 10.5, 0.0));
-
--- SELECT 'WHERE type, L2Distance';
--- SELECT *
--- FROM tab
--- WHERE L2Distance(vector, (0.0, 0.0, 10.0)) < 1.0
--- LIMIT 3;
-
-SELECT 'WHERE type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-WHERE L2Distance(vector, (0.0, 0.0, 10.0)) < 1.0
-LIMIT 3;
-
--- SELECT 'ORDER BY type, L2Distance';
--- SELECT *
--- FROM tab
--- ORDER BY L2Distance(vector, (0.0, 0.0, 10.0))
--- LIMIT 3;
-
-SELECT 'ORDER BY type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, (0.0, 0.0, 10.0))
-LIMIT 3;
-
-DROP TABLE tab;
-
--- Not a systematic test, just to make sure no bad things happen
-SELECT '--- Test non-default metric (cosine distance) ---';
-
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('cosineDistance') GRANULARITY 1) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 5;
-INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
-
--- SELECT 'WHERE type, L2Distance';
--- SELECT *
--- FROM tab
--- WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0
--- LIMIT 3;
-
--- SELECT 'ORDER BY type, L2Distance';
--- SELECT *
--- FROM tab
--- ORDER BY L2Distance(vector, [0.0, 0.0, 10.0])
--- LIMIT 3;
-
-DROP TABLE tab;
-
-SELECT '--- Test with Array, GRANULARITY = 2, index_granularity = 4 ---';
-
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 4;
-INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0, 0.0]), (2, [0.0, 0.0, 10.5, 0.0]), (3, [0.0, 0.0, 9.5, 0.0]), (4, [0.0, 0.0, 9.7, 0.0]), (5, [10.0, 0.0, 0.0, 0.0]), (6, [9.5, 0.0, 0.0, 0.0]), (7, [9.7, 0.0, 0.0, 0.0]), (8, [10.2, 0.0, 0.0, 0.0]), (9, [0.0, 10.0, 0.0, 0.0]), (10, [0.0, 9.5, 0.0, 0.0]), (11, [0.0, 9.7, 0.0, 0.0]), (12, [0.0, 9.7, 0.0, 0.0]), (13, [0.0, 0.0, 0.0, 10.3]), (14, [0.0, 0.0, 0.0, 9.5]), (15, [0.0, 0.0, 0.0, 10.0]), (16, [0.0, 0.0, 0.0, 10.5]);
-
--- rows = 16, index_granularity = 4, GRANULARITY = 2 gives 2 usearch-indexed blocks (each comprising two granules)
--- condition 'L2Distance(vector, reference_vector) < 1.0' ensures that only one usearch-indexed block produces results --> "Granules: 2/4"
-
--- SELECT 'WHERE type, L2Distance';
--- SELECT *
--- FROM tab
--- WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0
--- LIMIT 3;
-
-SELECT 'WHERE type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0
-LIMIT 3;
-
--- SELECT 'ORDER BY type, L2Distance';
--- SELECT *
--- FROM tab
--- ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0])
--- LIMIT 3;
-
-SELECT 'ORDER BY type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0])
-LIMIT 3;
-
-DROP TABLE tab;
-
-SELECT '--- Test with Array, GRANULARITY = 4, index_granularity = 4 ---';
-
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch() GRANULARITY 4) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 4;
-INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0, 0.0]), (2, [0.0, 0.0, 10.5, 0.0]), (3, [0.0, 0.0, 9.5, 0.0]), (4, [0.0, 0.0, 9.7, 0.0]), (5, [10.0, 0.0, 0.0, 0.0]), (6, [9.5, 0.0, 0.0, 0.0]), (7, [9.7, 0.0, 0.0, 0.0]), (8, [10.2, 0.0, 0.0, 0.0]), (9, [0.0, 10.0, 0.0, 0.0]), (10, [0.0, 9.5, 0.0, 0.0]), (11, [0.0, 9.7, 0.0, 0.0]), (12, [0.0, 9.7, 0.0, 0.0]), (13, [0.0, 0.0, 0.0, 10.3]), (14, [0.0, 0.0, 0.0, 9.5]), (15, [0.0, 0.0, 0.0, 10.0]), (16, [0.0, 0.0, 0.0, 10.5]);
-
--- rows = 16, index_granularity = 4, GRANULARITY = 4 gives a single usearch-indexed block (comprising all granules)
--- no two matches happen to be located in the same granule, so with LIMIT = 3, we'll get "Granules: 2/4"
-
--- SELECT 'WHERE type, L2Distance';
--- SELECT *
--- FROM tab
--- WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0
--- LIMIT 3;
-
-SELECT 'WHERE type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-WHERE L2Distance(vector, [10.0, 0.0, 10.0, 0.0]) < 5.0
-LIMIT 3;
-
--- SELECT 'ORDER BY type, L2Distance';
--- SELECT *
--- FROM tab
--- ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0])
--- LIMIT 3;
-
-SELECT 'ORDER BY type, L2Distance, check that index is used';
-EXPLAIN indexes=1
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [10.0, 0.0, 10.0, 0.0])
-LIMIT 3;
-
-DROP TABLE tab;
-
-SELECT '--- Test correctness of Usearch index with > 1 mark';
-
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug
-INSERT INTO tab SELECT number, [toFloat32(number), 0., 0., 0.] from numbers(10000);
-
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [1.0, 0.0, 0.0, 0.0])
-LIMIT 1;
-
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, [9000.0, 0.0, 0.0, 0.0])
-LIMIT 1;
-
-DROP TABLE tab;
-
-CREATE TABLE tab(id Int32, vector Tuple(Float32, Float32, Float32, Float32), INDEX usearch_index vector TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; -- disable adaptive granularity due to bug
-INSERT INTO tab SELECT number, (toFloat32(number), 0., 0., 0.) from numbers(10000);
-
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, (1.0, 0.0, 0.0, 0.0))
-LIMIT 1;
-
-SELECT *
-FROM tab
-ORDER BY L2Distance(vector, (9000.0, 0.0, 0.0, 0.0))
-LIMIT 1;
-
-DROP TABLE tab;
-
-SELECT '--- Test quantization ---';
-
-DROP TABLE IF EXISTS tab;
-CREATE TABLE tab(id Int32, vector Array(Float32), INDEX usearch_index vector TYPE usearch('L2Distance', 'f16')) ENGINE = MergeTree ORDER BY id;
-INSERT INTO tab VALUES (1, [0.0, 0.0, 10.0]), (2, [0.0, 0.0, 10.5]), (3, [0.0, 0.0, 9.5]), (4, [0.0, 0.0, 9.7]), (5, [0.0, 0.0, 10.2]), (6, [10.0, 0.0, 0.0]), (7, [9.5, 0.0, 0.0]), (8, [9.7, 0.0, 0.0]), (9, [10.2, 0.0, 0.0]), (10, [10.5, 0.0, 0.0]), (11, [0.0, 10.0, 0.0]), (12, [0.0, 9.5, 0.0]), (13, [0.0, 9.7, 0.0]), (14, [0.0, 10.2, 0.0]), (15, [0.0, 10.5, 0.0]);
-
-SELECT *
-FROM tab
-WHERE L2Distance(vector, [0.0, 0.0, 10.0]) < 1.0
-LIMIT 3;
-
-DROP TABLE tab;
-
-SELECT '--- Bugs ---';
-
--- Arrays with default values are rejected, issue #52258
-CREATE TABLE tab (`uuid` String, `vector` Array(Float32), `version` UInt32, INDEX idx vector TYPE usearch()) ENGINE = MergeTree() ORDER BY (uuid);
-INSERT INTO tab (uuid, version) VALUES ('1', 3); -- { serverError INCORRECT_DATA }
-DROP TABLE tab;
-
--- Tuples with default value work
-CREATE TABLE tab (`uuid` String, `vector` Tuple(Float32, Float32), `version` UInt32, INDEX idx vector TYPE usearch()) ENGINE = MergeTree() ORDER BY (uuid);
-INSERT INTO tab (uuid, version) VALUES ('1', 3); -- works fine
-DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.reference b/tests/queries/0_stateless/02354_vector_search_bugs.reference
new file mode 100644
index 00000000000..d2c2d7e2fb7
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.reference
@@ -0,0 +1,17 @@
+Issue #52258: Empty Arrays or Arrays with default values are rejected
+- Annoy
+- Usearch
+It is possible to create parts with different Array vector sizes but there will be an error at query time
+- Annoy
+- Usearch
+Correctness of index with > 1 mark
+- Annoy
+1	[1,0]	0
+9000	[9000,0]	0
+1	(1,0)	0
+9000	(9000,0)	0
+- Usearch
+1	[1,0]	0
+9000	[9000,0]	0
+1	(1,0)	0
+9000	(9000,0)	0
diff --git a/tests/queries/0_stateless/02354_vector_search_bugs.sql b/tests/queries/0_stateless/02354_vector_search_bugs.sql
new file mode 100644
index 00000000000..db0c06c4e16
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_bugs.sql
@@ -0,0 +1,145 @@
+-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan
+
+-- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage,
+-- therefore they are tested in a single file.
+
+-- This file contains tests for various bugs and special cases
+
+SET allow_experimental_annoy_index = 1;
+SET allow_experimental_usearch_index = 1;
+
+SET allow_experimental_analyzer = 1; -- 0 vs. 1 produce slightly different error codes, make it future-proof
+
+DROP TABLE IF EXISTS tab;
+
+SELECT 'Issue #52258: Empty Arrays or Arrays with default values are rejected';
+
+SELECT '- Annoy';
+
+CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree() ORDER BY (id);
+INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA }
+INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA }
+DROP TABLE tab;
+
+CREATE TABLE tab (id UInt64, vec Tuple(Float32, Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree() ORDER BY (id);
+INSERT INTO tab (id) VALUES (1); -- works fine, takes on default tuple (0.0, 0.0)
+DROP TABLE tab;
+
+SELECT '- Usearch';
+
+CREATE TABLE tab (id UInt64, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree() ORDER BY (id);
+INSERT INTO tab VALUES (1, []); -- { serverError INCORRECT_DATA }
+INSERT INTO tab (id) VALUES (1); -- { serverError INCORRECT_DATA }
+DROP TABLE tab;
+
+CREATE TABLE tab (id UInt64, vec Tuple(Float32, Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree() ORDER BY (id);
+INSERT INTO tab (id) VALUES (1); -- works fine, takes on default tuple (0.0, 0.0)
+DROP TABLE tab;
+
+SELECT 'It is possible to create parts with different Array vector sizes but there will be an error at query time';
+
+SELECT '- Annoy';
+
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id;
+SYSTEM STOP MERGES tab;
+INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2]);
+INSERT INTO tab values (2, [2.2, 2.3, 2.4]) (3, [3.1, 3.2, 3.3]);
+
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3; -- { serverError SIZES_OF_ARRAYS_DONT_MATCH }
+
+DROP TABLE tab;
+
+SELECT '- Usearch';
+
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id;
+SYSTEM STOP MERGES tab;
+INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2]);
+INSERT INTO tab values (2, [2.2, 2.3, 2.4]) (3, [3.1, 3.2, 3.3]);
+
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3; -- { serverError SIZES_OF_ARRAYS_DONT_MATCH }
+
+DROP TABLE tab;
+
+SELECT 'Correctness of index with > 1 mark';
+
+SELECT '- Annoy';
+
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug
+INSERT INTO tab SELECT number, [toFloat32(number), 0.0] from numbers(10000);
+
+WITH [1.0, 0.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 1;
+
+WITH [9000.0, 0.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 1;
+
+DROP TABLE tab;
+
+-- same, but with Tuples
+CREATE TABLE tab(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug
+INSERT INTO tab SELECT number, (toFloat32(number), 0.0) from numbers(10000);
+
+WITH (1.0, 0.0) AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 1;
+
+WITH (9000.0, 0.0) AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 1;
+
+DROP TABLE tab;
+
+SELECT '- Usearch';
+
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug
+INSERT INTO tab SELECT number, [toFloat32(number), 0.0] from numbers(10000);
+
+WITH [1.0, 0.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 1;
+
+WITH [9000.0, 0.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 1;
+
+DROP TABLE tab;
+
+-- same, but with Tuples
+CREATE TABLE tab(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity_bytes=0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity=8192; -- disable adaptive granularity due to bug
+INSERT INTO tab SELECT number, (toFloat32(number), 0.0) from numbers(10000);
+
+WITH (1.0, 0.0) AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 1;
+
+WITH (9000.0, 0.0) AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 1;
+
+DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_default_granularity.reference b/tests/queries/0_stateless/02354_vector_search_default_granularity.reference
new file mode 100644
index 00000000000..2f97ed72c52
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_default_granularity.reference
@@ -0,0 +1,7 @@
+Test the default index granularity for vector search indexes (CREATE TABLE AND ALTER TABLE), should be 100 million for Annoy and USearch
+- Annoy
+100000000
+100000000
+- Usearch
+100000000
+100000000
diff --git a/tests/queries/0_stateless/02354_vector_search_default_granularity.sql b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
new file mode 100644
index 00000000000..f15554505f8
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_default_granularity.sql
@@ -0,0 +1,35 @@
+-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan
+
+-- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage,
+-- therefore they are tested in a single file.
+
+-- This file contains tests for the non-standard default granularity of vector search indexes.
+
+SET allow_experimental_annoy_index = 1;
+SET allow_experimental_usearch_index = 1;
+
+SELECT 'Test the default index granularity for vector search indexes (CREATE TABLE AND ALTER TABLE), should be 100 million for Annoy and USearch';
+
+SELECT '- Annoy';
+
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE annoy) ENGINE=MergeTree ORDER BY id;
+SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
+
+DROP TABLE tab;
+CREATE TABLE tab (id Int32, vec Array(Float32)) ENGINE=MergeTree ORDER BY id;
+ALTER TABLE tab ADD INDEX idx(vec) TYPE annoy;
+SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
+
+SELECT '- Usearch';
+
+DROP TABLE tab;
+CREATE TABLE tab (id Int32, vec Array(Float32), INDEX idx(vec) TYPE usearch) ENGINE=MergeTree ORDER BY id;
+SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
+
+DROP TABLE tab;
+CREATE TABLE tab (id Int32, vec Array(Float32)) ENGINE=MergeTree ORDER BY id;
+ALTER TABLE tab ADD INDEX idx(vec) TYPE usearch;
+SELECT granularity FROM system.data_skipping_indices WHERE database = currentDatabase() AND table = 'tab' AND name = 'idx';
+
+DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
new file mode 100644
index 00000000000..43bc49e8adc
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.reference
@@ -0,0 +1,8 @@
+At most two index arguments
+1st argument (distance function) must be String
+Rejects unsupported distance functions
+2nd argument (Annoy: number of trees, USearch: scalar kind) must be UInt64 (Annoy) / String (Usearch)
+Rejects unsupported scalar kinds (only Usearch)
+Must be created on single column
+Must be created on Array(Float32) or Tuple(Float32, Float, ...) columns
+Rejects INSERTs of Arrays with different sizes
diff --git a/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
new file mode 100644
index 00000000000..6a4d6448629
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_index_creation_negative.sql
@@ -0,0 +1,63 @@
+-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan
+
+-- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage,
+-- therefore they are tested in a single file.
+
+-- This file tests that various conditions are checked during creation of vector search indexes.
+
+SET allow_experimental_annoy_index = 1;
+SET allow_experimental_usearch_index = 1;
+
+DROP TABLE IF EXISTS tab;
+
+SELECT 'At most two index arguments';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy('too', 'many', 'arguments')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('too', 'many', 'args')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+
+SELECT '1st argument (distance function) must be String';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+
+SELECT 'Rejects unsupported distance functions';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy('invalidDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('invalidDistance')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
+
+SELECT '2nd argument (Annoy: number of trees, USearch: scalar kind) must be UInt64 (Annoy) / String (Usearch)';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy('L2Distance', 'not an UInt64')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch(3)) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_QUERY }
+
+SELECT 'Rejects unsupported scalar kinds (only Usearch)';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch('L2Distance', 'invalidKind')) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_DATA }
+
+SELECT 'Must be created on single column';
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx (vec, id) TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError INCORRECT_NUMBER_OF_COLUMNS }
+
+SELECT 'Must be created on Array(Float32) or Tuple(Float32, Float, ...) columns';
+
+SET allow_suspicious_low_cardinality_types = 1;
+
+CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Float32, INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+
+CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Array(Float64), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+
+CREATE TABLE tab(id Int32, vec Tuple(Float64), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Tuple(Float64), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+
+CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec LowCardinality(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+
+CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+CREATE TABLE tab(id Int32, vec Nullable(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id; -- { serverError ILLEGAL_COLUMN }
+
+SELECT 'Rejects INSERTs of Arrays with different sizes';
+
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id;
+INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA }
+DROP TABLE tab;
+
+CREATE TABLE tab(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id;
+INSERT INTO tab values (0, [2.2, 2.3]) (1, [3.1, 3.2, 3.3]); -- { serverError INCORRECT_DATA }
+DROP TABLE tab;
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.reference b/tests/queries/0_stateless/02354_vector_search_queries.reference
new file mode 100644
index 00000000000..befa6af5a08
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_queries.reference
@@ -0,0 +1,196 @@
+ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block
+- Annoy: WHERE-type
+5	[0,2]	0
+6	[0,2.1]	0.09999990463256836
+7	[0,2.2]	0.20000004768371582
+- Annoy: ORDER-BY-type
+5	[0,2]	0
+6	[0,2.1]	0.09999990463256836
+7	[0,2.2]	0.20000004768371582
+- Usearch: WHERE-type
+5	[0,2]	0
+6	[0,2.1]	0.09999990463256836
+7	[0,2.2]	0.20000004768371582
+- Usearch: ORDER-BY-type
+5	[0,2]	0
+6	[0,2.1]	0.09999990463256836
+7	[0,2.2]	0.20000004768371582
+- Annoy: WHERE-type, EXPLAIN
+Expression ((Projection + Before ORDER BY))
+  Limit (preliminary LIMIT (without OFFSET))
+    ReadFromMergeTree (default.tab_annoy)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: idx
+        Description: annoy GRANULARITY 100000000
+        Parts: 1/1
+        Granules: 1/1
+- Annoy: ORDER-BY-type, EXPLAIN
+Expression (Projection)
+  Limit (preliminary LIMIT (without OFFSET))
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        ReadFromMergeTree (default.tab_annoy)
+        Indexes:
+          PrimaryKey
+            Condition: true
+            Parts: 1/1
+            Granules: 1/1
+          Skip
+            Name: idx
+            Description: annoy GRANULARITY 100000000
+            Parts: 1/1
+            Granules: 1/1
+- Usearch: WHERE-type, EXPLAIN
+Expression ((Projection + Before ORDER BY))
+  Limit (preliminary LIMIT (without OFFSET))
+    ReadFromMergeTree (default.tab_usearch)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: idx
+        Description: usearch GRANULARITY 100000000
+        Parts: 1/1
+        Granules: 1/1
+- Usearch: ORDER-BY-type, EXPLAIN
+Expression (Projection)
+  Limit (preliminary LIMIT (without OFFSET))
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        ReadFromMergeTree (default.tab_usearch)
+        Indexes:
+          PrimaryKey
+            Condition: true
+            Parts: 1/1
+            Granules: 1/1
+          Skip
+            Name: idx
+            Description: usearch GRANULARITY 100000000
+            Parts: 1/1
+            Granules: 1/1
+ARRAY vectors, 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block
+- Annoy: WHERE-type
+6	[0,2]	0
+7	[0,2.1]	0.09999990463256836
+8	[0,2.2]	0.20000004768371582
+- Annoy: ORDER-BY-type
+6	[0,2]	0
+7	[0,2.1]	0.09999990463256836
+8	[0,2.2]	0.20000004768371582
+- Usearch: WHERE-type
+6	[0,2]	0
+7	[0,2.1]	0.09999990463256836
+8	[0,2.2]	0.20000004768371582
+- Usearch: ORDER-BY-type
+6	[0,2]	0
+7	[0,2.1]	0.09999990463256836
+8	[0,2.2]	0.20000004768371582
+- Annoy: WHERE-type, EXPLAIN
+Expression ((Projection + Before ORDER BY))
+  Limit (preliminary LIMIT (without OFFSET))
+    ReadFromMergeTree (default.tab_annoy)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 4/4
+      Skip
+        Name: idx
+        Description: annoy GRANULARITY 2
+        Parts: 1/1
+        Granules: 1/4
+- Annoy: ORDER-BY-type, EXPLAIN
+Expression (Projection)
+  Limit (preliminary LIMIT (without OFFSET))
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        ReadFromMergeTree (default.tab_annoy)
+        Indexes:
+          PrimaryKey
+            Condition: true
+            Parts: 1/1
+            Granules: 4/4
+          Skip
+            Name: idx
+            Description: annoy GRANULARITY 2
+            Parts: 1/1
+            Granules: 2/4
+- Usearch: WHERE-type, EXPLAIN
+Expression ((Projection + Before ORDER BY))
+  Limit (preliminary LIMIT (without OFFSET))
+    ReadFromMergeTree (default.tab_usearch)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 4/4
+      Skip
+        Name: idx
+        Description: usearch GRANULARITY 2
+        Parts: 1/1
+        Granules: 1/4
+- Usearch: ORDER-BY-type, EXPLAIN
+Expression (Projection)
+  Limit (preliminary LIMIT (without OFFSET))
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        ReadFromMergeTree (default.tab_usearch)
+        Indexes:
+          PrimaryKey
+            Condition: true
+            Parts: 1/1
+            Granules: 4/4
+          Skip
+            Name: idx
+            Description: usearch GRANULARITY 2
+            Parts: 1/1
+            Granules: 2/4
+TUPLE vectors and special cases
+- Annoy: WHERE-type
+0	(4.6,2.3)	0.5527864045000421
+1	(2,3.2)	0.15200169244542905
+2	(4.2,3.4)	0.37080174340866845
+- Annoy: ORDER-BY-type
+6	(1,9.3)	0.005731362878640178
+1	(2,3.2)	0.15200169244542905
+7	(5.5,4.7)	0.3503476876550442
+- Usearch: WHERE-type
+0	(4.6,2.3)	0.5527864045000421
+1	(2,3.2)	0.15200169244542905
+2	(4.2,3.4)	0.37080174340866845
+- Usearch: ORDER-BY-type
+6	(1,9.3)	0.005731362878640178
+1	(2,3.2)	0.15200169244542905
+7	(5.5,4.7)	0.3503476876550442
+- Special case: MaximumDistance is negative
+- Special case: MaximumDistance is negative
+- Special case: setting "annoy_index_search_k_nodes"
+- Special case: setting "max_limit_for_ann_queries"
+Expression (Projection)
+  Limit (preliminary LIMIT (without OFFSET))
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        ReadFromMergeTree (default.tab_annoy)
+        Indexes:
+          PrimaryKey
+            Condition: true
+            Parts: 1/1
+            Granules: 4/4
+- Special case: setting "max_limit_for_ann_queries"
+Expression (Projection)
+  Limit (preliminary LIMIT (without OFFSET))
+    Sorting (Sorting for ORDER BY)
+      Expression (Before ORDER BY)
+        ReadFromMergeTree (default.tab_usearch)
+        Indexes:
+          PrimaryKey
+            Condition: true
+            Parts: 1/1
+            Granules: 4/4
diff --git a/tests/queries/0_stateless/02354_vector_search_queries.sql b/tests/queries/0_stateless/02354_vector_search_queries.sql
new file mode 100644
index 00000000000..2ccf869fdba
--- /dev/null
+++ b/tests/queries/0_stateless/02354_vector_search_queries.sql
@@ -0,0 +1,240 @@
+-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-ordinary-database, no-asan
+
+-- Tests vector search in ClickHouse, i.e. Annoy and Usearch indexes. Both index types share similarities in implementation and usage,
+-- therefore they are tested in a single file.
+
+-- This file tests various simple approximate nearest neighborhood (ANN) queries that utilize vector search indexes.
+
+SET allow_experimental_annoy_index = 1;
+SET allow_experimental_usearch_index = 1;
+
+SET allow_experimental_analyzer = 0;
+
+SELECT 'ARRAY, 10 rows, index_granularity = 8192, GRANULARITY = 1 million --> 1 granule, 1 indexed block';
+
+DROP TABLE IF EXISTS tab_annoy;
+DROP TABLE IF EXISTS tab_usearch;
+
+CREATE TABLE tab_annoy(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
+INSERT INTO tab_annoy VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
+
+CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch()) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192;
+INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [0.0, 2.0]), (6, [0.0, 2.1]), (7, [0.0, 2.2]), (8, [0.0, 2.3]), (9, [0.0, 2.4]);
+
+
+SELECT '- Annoy: WHERE-type';
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_annoy
+WHERE L2Distance(vec, reference_vec) < 1.0
+LIMIT 3;
+
+SELECT '- Annoy: ORDER-BY-type';
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_annoy
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+SELECT '- Usearch: WHERE-type';
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_usearch
+WHERE L2Distance(vec, reference_vec) < 1.0
+LIMIT 3;
+
+SELECT '- Usearch: ORDER-BY-type';
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_usearch
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+SELECT '- Annoy: WHERE-type, EXPLAIN';
+EXPLAIN indexes=1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_annoy
+WHERE L2Distance(vec, reference_vec) < 1.0
+LIMIT 3;
+
+SELECT '- Annoy: ORDER-BY-type, EXPLAIN';
+EXPLAIN indexes=1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_annoy
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+SELECT '- Usearch: WHERE-type, EXPLAIN';
+EXPLAIN indexes=1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_usearch
+WHERE L2Distance(vec, reference_vec) < 1.0
+LIMIT 3;
+
+SELECT '- Usearch: ORDER-BY-type, EXPLAIN';
+EXPLAIN indexes=1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_usearch
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+DROP TABLE tab_annoy;
+DROP TABLE tab_usearch;
+
+
+SELECT 'ARRAY vectors, 12 rows, index_granularity = 3, GRANULARITY = 2 --> 4 granules, 2 indexed block';
+
+CREATE TABLE tab_annoy(id Int32, vec Array(Float32), INDEX idx vec TYPE annoy() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+INSERT INTO tab_annoy VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
+
+CREATE TABLE tab_usearch(id Int32, vec Array(Float32), INDEX idx vec TYPE usearch() GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+INSERT INTO tab_usearch VALUES (0, [1.0, 0.0]), (1, [1.1, 0.0]), (2, [1.2, 0.0]), (3, [1.3, 0.0]), (4, [1.4, 0.0]), (5, [1.5, 0.0]), (6, [0.0, 2.0]), (7, [0.0, 2.1]), (8, [0.0, 2.2]), (9, [0.0, 2.3]), (10, [0.0, 2.4]), (11, [0.0, 2.5]);
+
+SELECT '- Annoy: WHERE-type';
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_annoy
+WHERE L2Distance(vec, reference_vec) < 1.0
+LIMIT 3;
+
+SELECT '- Annoy: ORDER-BY-type';
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_annoy
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+SELECT '- Usearch: WHERE-type';
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_usearch
+WHERE L2Distance(vec, reference_vec) < 1.0
+LIMIT 3;
+
+SELECT '- Usearch: ORDER-BY-type';
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_usearch
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+SELECT '- Annoy: WHERE-type, EXPLAIN';
+EXPLAIN indexes=1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_annoy
+WHERE L2Distance(vec, reference_vec) < 1.0
+LIMIT 3;
+
+SELECT '- Annoy: ORDER-BY-type, EXPLAIN';
+EXPLAIN indexes=1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_annoy
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+SELECT '- Usearch: WHERE-type, EXPLAIN';
+EXPLAIN indexes=1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_usearch
+WHERE L2Distance(vec, reference_vec) < 1.0
+LIMIT 3;
+
+SELECT '- Usearch: ORDER-BY-type, EXPLAIN';
+EXPLAIN indexes=1
+WITH [0.0, 2.0] AS reference_vec
+SELECT id, vec, L2Distance(vec, reference_vec)
+FROM tab_usearch
+ORDER BY L2Distance(vec, reference_vec)
+LIMIT 3;
+
+DROP TABLE tab_annoy;
+DROP TABLE tab_usearch;
+
+
+SELECT 'TUPLE vectors and special cases';
+-- Not a systematic test, just to check that no bad things happen.
+-- Just for jun, use metric = 'cosineDistance' (Annoy/Usearch), tree_count = 200 (Annoy), scalarKind = 'f64' (Usearch)
+
+CREATE TABLE tab_annoy(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE annoy('cosineDistance', 200) GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+INSERT INTO tab_annoy VALUES (0, (4.6, 2.3)), (1, (2.0, 3.2)), (2, (4.2, 3.4)), (3, (5.3, 2.9)), (4, (2.4, 5.2)), (5, (5.3, 2.3)), (6, (1.0, 9.3)), (7, (5.5, 4.7)), (8, (6.4, 3.5)), (9, (5.3, 2.5)), (10, (6.4, 3.4)), (11, (6.4, 3.2));
+
+CREATE TABLE tab_usearch(id Int32, vec Tuple(Float32, Float32), INDEX idx vec TYPE usearch('cosineDistance', 'f64') GRANULARITY 2) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 3;
+INSERT INTO tab_usearch VALUES (0, (4.6, 2.3)), (1, (2.0, 3.2)), (2, (4.2, 3.4)), (3, (5.3, 2.9)), (4, (2.4, 5.2)), (5, (5.3, 2.3)), (6, (1.0, 9.3)), (7, (5.5, 4.7)), (8, (6.4, 3.5)), (9, (5.3, 2.5)), (10, (6.4, 3.4)), (11, (6.4, 3.2));
+
+SELECT '- Annoy: WHERE-type';
+WITH (0.0, 2.0) AS reference_vec
+SELECT id, vec, cosineDistance(vec, reference_vec)
+FROM tab_annoy
+WHERE cosineDistance(vec, reference_vec) < 1.0
+LIMIT 3;
+
+SELECT '- Annoy: ORDER-BY-type';
+WITH (0.0, 2.0) AS reference_vec
+SELECT id, vec, cosineDistance(vec, reference_vec)
+FROM tab_annoy
+ORDER BY cosineDistance(vec, reference_vec)
+LIMIT 3;
+
+SELECT '- Usearch: WHERE-type';
+WITH (0.0, 2.0) AS reference_vec
+SELECT id, vec, cosineDistance(vec, reference_vec)
+FROM tab_usearch
+WHERE cosineDistance(vec, reference_vec) < 1.0
+LIMIT 3;
+
+SELECT '- Usearch: ORDER-BY-type';
+WITH (0.0, 2.0) AS reference_vec
+SELECT id, vec, cosineDistance(vec, reference_vec)
+FROM tab_usearch
+ORDER BY cosineDistance(vec, reference_vec)
+LIMIT 3;
+
+SELECT '- Special case: MaximumDistance is negative';
+WITH (0.0, 2.0) as reference_vec
+SELECT id, vec, cosineDistance(vec, reference_vec)
+FROM tab_annoy
+WHERE cosineDistance(vec, reference_vec) < -1.0
+LIMIT 3; -- { serverError INCORRECT_QUERY }
+
+SELECT '- Special case: MaximumDistance is negative';
+WITH (0.0, 2.0) as reference_vec
+SELECT id, vec, cosineDistance(vec, reference_vec)
+FROM tab_usearch
+WHERE cosineDistance(vec, reference_vec) < -1.0
+LIMIT 3; -- { serverError INCORRECT_QUERY }
+
+SELECT '- Special case: setting "annoy_index_search_k_nodes"';
+WITH (0.0, 2.0) as reference_vec
+SELECT id, vec, cosineDistance(vec, reference_vec)
+FROM tab_annoy
+ORDER BY cosineDistance(vec, reference_vec)
+LIMIT 3
+SETTINGS annoy_index_search_k_nodes=0; -- searches zero nodes --> no results
+
+SELECT '- Special case: setting "max_limit_for_ann_queries"';
+EXPLAIN indexes=1
+WITH (0.0, 2.0) as reference_vec
+SELECT id, vec, cosineDistance(vec, reference_vec)
+FROM tab_annoy
+ORDER BY cosineDistance(vec, reference_vec)
+LIMIT 3
+SETTINGS max_limit_for_ann_queries=2; -- LIMIT 3 > 2 --> don't use the ann index
+
+SELECT '- Special case: setting "max_limit_for_ann_queries"';
+EXPLAIN indexes=1
+WITH (0.0, 2.0) as reference_vec
+SELECT id, vec, cosineDistance(vec, reference_vec)
+FROM tab_usearch
+ORDER BY cosineDistance(vec, reference_vec)
+LIMIT 3
+SETTINGS max_limit_for_ann_queries=2; -- LIMIT 3 > 2 --> don't use the ann index
+
+DROP TABLE tab_annoy;
+DROP TABLE tab_usearch;

From a0862531cbaea2b088668cbcd2f3238439e00dd8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 21 Sep 2023 09:53:52 +0000
Subject: [PATCH 203/243] Fix clang-17 build

---
 src/Server/KeeperTCPHandler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp
index 84ed7388503..742300f9b2e 100644
--- a/src/Server/KeeperTCPHandler.cpp
+++ b/src/Server/KeeperTCPHandler.cpp
@@ -382,9 +382,9 @@ void KeeperTCPHandler::runImpl()
     }
 
     auto response_fd = poll_wrapper->getResponseFD();
-    auto response_callback = [responses = this->responses, response_fd](const Coordination::ZooKeeperResponsePtr & response)
+    auto response_callback = [responses_ = this->responses, response_fd](const Coordination::ZooKeeperResponsePtr & response)
     {
-        if (!responses->push(response))
+        if (!responses_->push(response))
             throw Exception(ErrorCodes::SYSTEM_ERROR,
                 "Could not push response with xid {} and zxid {}",
                 response->xid,

From 8fdad7f2685bdc4c144ed4b4915e01777869ff47 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 21 Sep 2023 11:17:00 +0000
Subject: [PATCH 204/243] Remove timeout command

---
 .../0_stateless/02703_keeper_map_concurrent_create_drop.sh   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh
index 3964427895c..17d1fa92377 100755
--- a/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh
+++ b/tests/queries/0_stateless/02703_keeper_map_concurrent_create_drop.sh
@@ -15,7 +15,8 @@ function create_drop_loop()
     done
 
     i=0
-    while true;
+    local TIMELIMIT=$((SECONDS+$2))
+    while [ $SECONDS -lt "$TIMELIMIT" ];
     do
         $CLICKHOUSE_CLIENT --query="CREATE TABLE IF NOT EXISTS $table_name (key UInt64, value UInt64) ENGINE = KeeperMap('/02703_keeper_map/$CLICKHOUSE_DATABASE') PRIMARY KEY(key)"
         $CLICKHOUSE_CLIENT --query="INSERT INTO $table_name VALUES ($1, $i)"
@@ -40,7 +41,7 @@ TIMEOUT=30
 
 for i in `seq $THREADS`
 do
-    timeout $TIMEOUT bash -c "create_drop_loop $i" 2> /dev/null &
+    create_drop_loop $i $TIMEOUT 2> /dev/null &
 done
 
 wait

From d3f97d9b82129210eed8311ec95040e83305ade1 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Thu, 21 Sep 2023 14:22:15 +0200
Subject: [PATCH 205/243] Remove base64 contrib

---
 contrib/base64 | 1 -
 1 file changed, 1 deletion(-)
 delete mode 160000 contrib/base64

diff --git a/contrib/base64 b/contrib/base64
deleted file mode 160000
index 8628e258090..00000000000
--- a/contrib/base64
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 8628e258090f9eb76d90ac3c91e1ab4690e9aa11

From 853e3f0aa789d5b6dcb251a403276d9fdc02902c Mon Sep 17 00:00:00 2001
From: Pavel Novitskiy <106629323+pnovitskiy@users.noreply.github.com>
Date: Thu, 21 Sep 2023 15:20:18 +0200
Subject: [PATCH 206/243] Add drop table IF EMPTY (#48915)

---
 docs/en/sql-reference/statements/drop.md      |   3 +-
 src/Common/ErrorCodes.cpp                     |   2 +-
 src/Interpreters/InterpreterDropQuery.cpp     |  38 ++++++-
 src/Parsers/ASTDropQuery.cpp                  |   3 +
 src/Parsers/ASTDropQuery.h                    |   1 +
 src/Parsers/ParserDropQuery.cpp               |   9 ++
 src/Parsers/ParserDropQuery.h                 |   5 +-
 .../test_drop_if_empty/__init__.py            |   0
 .../configs/drop_if_empty_check.xml           |  14 +++
 .../configs/remote_servers.xml                |  17 +++
 .../test_drop_if_empty/configs/zookeeper.xml  |  19 ++++
 tests/integration/test_drop_if_empty/test.py  | 105 ++++++++++++++++++
 .../0_stateless/02716_drop_if_empty.reference |  10 ++
 .../0_stateless/02716_drop_if_empty.sql       |  22 ++++
 14 files changed, 243 insertions(+), 5 deletions(-)
 create mode 100644 tests/integration/test_drop_if_empty/__init__.py
 create mode 100644 tests/integration/test_drop_if_empty/configs/drop_if_empty_check.xml
 create mode 100644 tests/integration/test_drop_if_empty/configs/remote_servers.xml
 create mode 100644 tests/integration/test_drop_if_empty/configs/zookeeper.xml
 create mode 100644 tests/integration/test_drop_if_empty/test.py
 create mode 100644 tests/queries/0_stateless/02716_drop_if_empty.reference
 create mode 100644 tests/queries/0_stateless/02716_drop_if_empty.sql

diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md
index eed76dbcd5c..8ed00f625d6 100644
--- a/docs/en/sql-reference/statements/drop.md
+++ b/docs/en/sql-reference/statements/drop.md
@@ -21,6 +21,7 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC]
 ## DROP TABLE
 
 Deletes the table.
+In case when `IF EMPTY` clause is specified server will check if table is empty only on replica that received initial query.  
 
 :::tip
 Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
@@ -29,7 +30,7 @@ Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
 Syntax:
 
 ``` sql
-DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC]
+DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db.]name [ON CLUSTER cluster] [SYNC]
 ```
 
 ## DROP DICTIONARY
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index be2b0a7bd5e..a4837ac3c44 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -587,7 +587,7 @@
     M(702, CLIENT_INFO_DOES_NOT_MATCH) \
     M(703, INVALID_IDENTIFIER) \
     M(704, CANNOT_USE_QUERY_CACHE_WITH_NONDETERMINISTIC_FUNCTIONS) \
-    \
+    M(705, TABLE_NOT_EMPTY) \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
     M(1001, STD_EXCEPTION) \
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index 9dd28cbeaac..dd52b6c2e14 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -34,6 +34,7 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
     extern const int INCORRECT_QUERY;
     extern const int TABLE_IS_READ_ONLY;
+    extern const int TABLE_NOT_EMPTY;
 }
 
 namespace ActionLocks
@@ -55,7 +56,8 @@ InterpreterDropQuery::InterpreterDropQuery(const ASTPtr & query_ptr_, ContextMut
 BlockIO InterpreterDropQuery::execute()
 {
     auto & drop = query_ptr->as<ASTDropQuery &>();
-    if (!drop.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext()))
+
+    if (!drop.cluster.empty() && drop.table && !drop.if_empty && !maybeRemoveOnCluster(query_ptr, getContext()))
     {
         DDLQueryOnClusterParams params;
         params.access_to_check = getRequiredAccessForDDLOnCluster();
@@ -67,6 +69,12 @@ BlockIO InterpreterDropQuery::execute()
 
     if (drop.table)
         return executeToTable(drop);
+    else if (drop.database && !drop.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext()))
+    {
+            DDLQueryOnClusterParams params;
+            params.access_to_check = getRequiredAccessForDDLOnCluster();
+            return executeDDLQueryOnCluster(query_ptr, getContext(), params);
+    }
     else if (drop.database)
         return executeToDatabase(drop);
     else
@@ -122,6 +130,12 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
 
     if (database && table)
     {
+        const auto & settings = getContext()->getSettingsRef();
+        if (query.if_empty)
+        {
+            if (auto rows = table->totalRows(settings); rows > 0)
+                throw Exception(ErrorCodes::TABLE_NOT_EMPTY, "Table {} is not empty", backQuoteIfNeed(table_id.table_name));
+        }
         checkStorageSupportsTransactionsIfNeeded(table, context_);
 
         auto & ast_drop_query = query.as<ASTDropQuery &>();
@@ -151,6 +165,18 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
         else
             drop_storage = AccessType::DROP_TABLE;
 
+        auto new_query_ptr = query.clone();
+        auto & query_to_send = new_query_ptr->as<ASTDropQuery &>();
+
+        if (!query.cluster.empty() && !maybeRemoveOnCluster(new_query_ptr, getContext()))
+        {
+            query_to_send.if_empty = false;
+
+            DDLQueryOnClusterParams params;
+            params.access_to_check = getRequiredAccessForDDLOnCluster();
+            return executeDDLQueryOnCluster(new_query_ptr, getContext(), params);
+        }
+
         if (database->shouldReplicateQuery(getContext(), query_ptr))
         {
             if (query.kind == ASTDropQuery::Kind::Detach)
@@ -162,7 +188,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
 
             ddl_guard->releaseTableLock();
             table.reset();
-            return database->tryEnqueueReplicatedDDL(query.clone(), context_);
+
+            query_to_send.if_empty = false;
+
+            return database->tryEnqueueReplicatedDDL(new_query_ptr, context_);
         }
 
         if (query.kind == ASTDropQuery::Kind::Detach)
@@ -340,9 +369,13 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
             if (query.kind == ASTDropQuery::Kind::Detach && query.permanently)
                 throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH PERMANENTLY is not implemented for databases");
 
+            if (query.if_empty)
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP IF EMPTY is not implemented for databases");
+
             if (database->hasReplicationThread())
                 database->stopReplication();
 
+
             if (database->shouldBeEmptyOnDetach())
             {
                 /// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish.
@@ -355,6 +388,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
                 if (truncate)
                     query_for_table.kind = ASTDropQuery::Kind::Drop;
                 query_for_table.if_exists = true;
+                query_for_table.if_empty = false;
                 query_for_table.setDatabase(database_name);
                 query_for_table.sync = query.sync;
 
diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp
index 93a4b547025..ad1294c6e71 100644
--- a/src/Parsers/ASTDropQuery.cpp
+++ b/src/Parsers/ASTDropQuery.cpp
@@ -60,6 +60,9 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState
     if (if_exists)
         settings.ostr << "IF EXISTS ";
 
+    if (if_empty)
+        settings.ostr << "IF EMPTY ";
+
     settings.ostr << (settings.hilite ? hilite_none : "");
 
     if (!table && database)
diff --git a/src/Parsers/ASTDropQuery.h b/src/Parsers/ASTDropQuery.h
index 05515ba4005..a732b354260 100644
--- a/src/Parsers/ASTDropQuery.h
+++ b/src/Parsers/ASTDropQuery.h
@@ -21,6 +21,7 @@ public:
 
     Kind kind;
     bool if_exists{false};
+    bool if_empty{false};
 
     /// Useful if we already have a DDL lock
     bool no_ddl_lock{false};
diff --git a/src/Parsers/ParserDropQuery.cpp b/src/Parsers/ParserDropQuery.cpp
index f40a39e6b2f..450c8a1afec 100644
--- a/src/Parsers/ParserDropQuery.cpp
+++ b/src/Parsers/ParserDropQuery.cpp
@@ -19,6 +19,7 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons
     ParserKeyword s_database("DATABASE");
     ParserToken s_dot(TokenType::Dot);
     ParserKeyword s_if_exists("IF EXISTS");
+    ParserKeyword s_if_empty("IF EMPTY");
     ParserIdentifier name_p(true);
     ParserKeyword s_permanently("PERMANENTLY");
     ParserKeyword s_no_delay("NO DELAY");
@@ -28,6 +29,7 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons
     ASTPtr table;
     String cluster_str;
     bool if_exists = false;
+    bool if_empty = false;
     bool temporary = false;
     bool is_dictionary = false;
     bool is_view = false;
@@ -39,6 +41,9 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons
         if (s_if_exists.ignore(pos, expected))
             if_exists = true;
 
+        if (s_if_empty.ignore(pos, expected))
+            if_empty = true;
+
         if (!name_p.parse(pos, database, expected))
             return false;
     }
@@ -60,6 +65,9 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons
         if (s_if_exists.ignore(pos, expected))
             if_exists = true;
 
+        if (s_if_empty.ignore(pos, expected))
+            if_empty = true;
+
         if (!name_p.parse(pos, table, expected))
             return false;
 
@@ -90,6 +98,7 @@ bool parseDropQuery(IParser::Pos & pos, ASTPtr & node, Expected & expected, cons
 
     query->kind = kind;
     query->if_exists = if_exists;
+    query->if_empty = if_empty;
     query->temporary = temporary;
     query->is_dictionary = is_dictionary;
     query->is_view = is_view;
diff --git a/src/Parsers/ParserDropQuery.h b/src/Parsers/ParserDropQuery.h
index 39ff5b7c4fd..f77b477a2fd 100644
--- a/src/Parsers/ParserDropQuery.h
+++ b/src/Parsers/ParserDropQuery.h
@@ -8,7 +8,10 @@ namespace DB
 {
 
 /** Query like this:
-  * DROP|DETACH|TRUNCATE TABLE [IF EXISTS] [db.]name [PERMANENTLY]
+  * DROP TABLE [IF EXISTS|EMPTY] [db.]name [PERMANENTLY]
+  *
+  * Or:
+  * DETACH|TRUNCATE TABLE [IF EXISTS] [db.]name [PERMANENTLY]
   *
   * Or:
   * DROP DATABASE [IF EXISTS] db
diff --git a/tests/integration/test_drop_if_empty/__init__.py b/tests/integration/test_drop_if_empty/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_drop_if_empty/configs/drop_if_empty_check.xml b/tests/integration/test_drop_if_empty/configs/drop_if_empty_check.xml
new file mode 100644
index 00000000000..34a78ac9919
--- /dev/null
+++ b/tests/integration/test_drop_if_empty/configs/drop_if_empty_check.xml
@@ -0,0 +1,14 @@
+<clickhouse>
+    <http_port>8123</http_port>
+    <tcp_port>9000</tcp_port>
+    <profiles>
+        <default>
+            <allow_experimental_database_replicated>1</allow_experimental_database_replicated>
+        </default>
+    </profiles>
+    <users>
+        <default>
+            <profile>default</profile>
+        </default>
+    </users>
+</clickhouse>
diff --git a/tests/integration/test_drop_if_empty/configs/remote_servers.xml b/tests/integration/test_drop_if_empty/configs/remote_servers.xml
new file mode 100644
index 00000000000..34fda471187
--- /dev/null
+++ b/tests/integration/test_drop_if_empty/configs/remote_servers.xml
@@ -0,0 +1,17 @@
+<clickhouse>
+    <remote_servers>
+        <cluster>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </cluster>
+    </remote_servers>
+</clickhouse>
+
diff --git a/tests/integration/test_drop_if_empty/configs/zookeeper.xml b/tests/integration/test_drop_if_empty/configs/zookeeper.xml
new file mode 100644
index 00000000000..d23bf1625ce
--- /dev/null
+++ b/tests/integration/test_drop_if_empty/configs/zookeeper.xml
@@ -0,0 +1,19 @@
+
+<clickhouse>
+    <zookeeper>
+        <node index="1">
+            <host>zoo1</host>
+            <port>2181</port>
+        </node>
+        <node index="2">
+            <host>zoo2</host>
+            <port>2181</port>
+        </node>
+            <node index="3">
+            <host>zoo3</host>
+            <port>2181</port>
+        </node>
+        <session_timeout_ms>20000</session_timeout_ms>
+    </zookeeper>
+</clickhouse>
+    
diff --git a/tests/integration/test_drop_if_empty/test.py b/tests/integration/test_drop_if_empty/test.py
new file mode 100644
index 00000000000..d96936eb826
--- /dev/null
+++ b/tests/integration/test_drop_if_empty/test.py
@@ -0,0 +1,105 @@
+import re
+import time
+
+import pytest
+import requests
+from helpers.cluster import ClickHouseCluster
+from helpers.client import QueryRuntimeException
+
+cluster = ClickHouseCluster(__file__, zookeeper_config_path="configs/zookeeper.xml")
+
+main_configs = [
+    "configs/remote_servers.xml",
+]
+
+node1 = cluster.add_instance(
+    "node1",
+    main_configs=main_configs + ["configs/drop_if_empty_check.xml"],
+    macros={"replica": "node1", "shard": "shard1"},
+    with_zookeeper=True,
+)
+
+node2 = cluster.add_instance(
+    "node2",
+    main_configs=main_configs + ["configs/drop_if_empty_check.xml"],
+    macros={"replica": "node2", "shard": "shard1"},
+    with_zookeeper=True,
+)
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def test_drop_if_empty(start_cluster):
+    settings = {
+        "allow_experimental_database_replicated": 1,
+    }
+    node1.query(
+        "CREATE DATABASE replicateddb "
+        "ENGINE = Replicated('/clickhouse/databases/replicateddb', 'shard1', 'node1')",
+        settings=settings,
+    )
+    node2.query(
+        "CREATE DATABASE replicateddb "
+        "ENGINE = Replicated('/clickhouse/databases/replicateddb', 'shard1', 'node2')",
+        settings=settings,
+    )
+    node1.query(
+        "CREATE TABLE default.tbl ON CLUSTER 'cluster' ("
+        "x UInt64"
+        ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}')"
+        "ORDER BY x"
+    )
+    node1.query(
+        "CREATE TABLE replicateddb.tbl2 (" "x UInt64" ") ENGINE=MergeTree " "ORDER BY x"
+    )
+
+    assert 1 == int(
+        node2.query("SELECT count() FROM system.tables WHERE name = 'tbl';")
+    )
+    assert 1 == int(
+        node2.query("SELECT count() FROM system.databases WHERE name = 'replicateddb';")
+    )
+    assert 1 == int(
+        node2.query("SELECT count() FROM system.tables WHERE name = 'tbl2';")
+    )
+
+    node2.query("SYSTEM STOP MERGES;")
+    node2.query("SYSTEM STOP FETCHES;")
+    node2.query("SYSTEM STOP REPLICATION QUEUES;")
+
+    node1.query("INSERT INTO default.tbl SELECT * FROM system.numbers_mt LIMIT 10000;")
+    node1.query(
+        "INSERT INTO replicateddb.tbl2 SELECT * FROM system.numbers_mt LIMIT 10000;"
+    )
+
+    assert 0 == int(node2.query("SELECT count() FROM default.tbl;"))
+    assert 0 == int(node2.query("SELECT count() FROM replicateddb.tbl2;"))
+
+    node2.query("DROP TABLE IF EMPTY default.tbl ON CLUSTER 'cluster';")
+    node2.query("DROP TABLE IF EMPTY replicateddb.tbl2;")
+
+    assert 0 == int(
+        node1.query("SELECT count() FROM system.tables WHERE name = 'tbl';")
+    )
+    assert 0 == int(
+        node2.query("SELECT count() FROM system.tables WHERE name = 'tbl';")
+    )
+    assert 0 == int(
+        node1.query("SELECT count() FROM system.tables WHERE name = 'tbl2';")
+    )
+    assert 0 == int(
+        node2.query("SELECT count() FROM system.tables WHERE name = 'tbl2';")
+    )
+
+    with pytest.raises(
+        QueryRuntimeException,
+        match="DB::Exception: DROP IF EMPTY is not implemented for databases.",
+    ):
+        node2.query("DROP DATABASE IF EMPTY replicateddb;")
diff --git a/tests/queries/0_stateless/02716_drop_if_empty.reference b/tests/queries/0_stateless/02716_drop_if_empty.reference
new file mode 100644
index 00000000000..6ddd9b205af
--- /dev/null
+++ b/tests/queries/0_stateless/02716_drop_if_empty.reference
@@ -0,0 +1,10 @@
+-- { echoOn }
+DROP TABLE IF EMPTY data_02716_2;
+DROP TABLE IF EMPTY data_02716_1; -- { serverError TABLE_NOT_EMPTY }
+TRUNCATE TABLE data_02716_1;
+DROP TABLE IF EMPTY data_02716_1;
+DROP DATABASE IF EMPTY {CLICKHOUSE_DATABASE_1:Identifier}; -- { serverError NOT_IMPLEMENTED }
+SELECT count() FROM system.tables WHERE database = {CLICKHOUSE_DATABASE_1:String};
+1
+SELECT count() FROM system.tables WHERE database = 'default' AND name IN ('data_02716_1', 'data_02716_2');
+0
diff --git a/tests/queries/0_stateless/02716_drop_if_empty.sql b/tests/queries/0_stateless/02716_drop_if_empty.sql
new file mode 100644
index 00000000000..6cea90a6d5c
--- /dev/null
+++ b/tests/queries/0_stateless/02716_drop_if_empty.sql
@@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS data_02716_1;
+DROP TABLE IF EXISTS data_02716_2;
+DROP TABLE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}.data_02716_3;
+DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier};
+
+CREATE TABLE data_02716_1 (v UInt64) ENGINE = MergeTree ORDER BY v;
+CREATE TABLE data_02716_2 (v UInt64) ENGINE = MergeTree ORDER BY v;
+
+CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier};
+CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.data_02716_3 (v UInt64) ENGINE = MergeTree ORDER BY v;
+
+INSERT INTO data_02716_1 SELECT * FROM system.numbers LIMIT 1;
+
+-- { echoOn }
+DROP TABLE IF EMPTY data_02716_2;
+DROP TABLE IF EMPTY data_02716_1; -- { serverError TABLE_NOT_EMPTY }
+TRUNCATE TABLE data_02716_1;
+DROP TABLE IF EMPTY data_02716_1;
+DROP DATABASE IF EMPTY {CLICKHOUSE_DATABASE_1:Identifier}; -- { serverError NOT_IMPLEMENTED }
+
+SELECT count() FROM system.tables WHERE database = {CLICKHOUSE_DATABASE_1:String};
+SELECT count() FROM system.tables WHERE database = 'default' AND name IN ('data_02716_1', 'data_02716_2');

From 8610baabefbe0e90fa861ea9e86c568bad09879b Mon Sep 17 00:00:00 2001
From: Igor Nikonov <954088+devcrafter@users.noreply.github.com>
Date: Thu, 21 Sep 2023 15:57:47 +0200
Subject: [PATCH 207/243] Revert "Avoid excessive calls to getifaddrs in
 isLocalAddress"

---
 src/Common/isLocalAddress.cpp | 43 +++--------------------------------
 1 file changed, 3 insertions(+), 40 deletions(-)

diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp
index 902505404a6..7569c6fc14e 100644
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@@ -1,14 +1,9 @@
 #include <Common/isLocalAddress.h>
 
 #include <ifaddrs.h>
-#include <chrono>
 #include <cstring>
-#include <memory>
-#include <mutex>
-#include <shared_mutex>
 #include <optional>
 #include <base/types.h>
-#include <boost/core/noncopyable.hpp>
 #include <Common/Exception.h>
 #include <Poco/Net/IPAddress.h>
 #include <Poco/Net/SocketAddress.h>
@@ -25,7 +20,7 @@ namespace ErrorCodes
 namespace
 {
 
-struct NetworkInterfaces : public boost::noncopyable
+struct NetworkInterfaces
 {
     ifaddrs * ifaddr;
     NetworkInterfaces()
@@ -36,13 +31,6 @@ struct NetworkInterfaces : public boost::noncopyable
         }
     }
 
-    void swap(NetworkInterfaces && other)
-    {
-        auto * tmp = ifaddr;
-        ifaddr = other.ifaddr;
-        other.ifaddr = tmp;
-    }
-
     bool hasAddress(const Poco::Net::IPAddress & address) const
     {
         ifaddrs * iface;
@@ -86,32 +74,6 @@ struct NetworkInterfaces : public boost::noncopyable
     {
         freeifaddrs(ifaddr);
     }
-
-    static const NetworkInterfaces & instance()
-    {
-        static constexpr int NET_INTERFACE_VALID_PERIOD_MS = 30000;
-        static NetworkInterfaces nf;
-        static std::atomic<std::chrono::steady_clock::time_point> last_updated_time = std::chrono::steady_clock::now();
-        static std::shared_mutex nf_mtx;
-
-        auto now = std::chrono::steady_clock::now();
-        auto last_updated_time_snapshot = last_updated_time.load();
-
-        if (std::chrono::duration_cast<std::chrono::milliseconds>(now - last_updated_time_snapshot).count() > NET_INTERFACE_VALID_PERIOD_MS)
-        {
-            std::unique_lock lock(nf_mtx);
-            if (last_updated_time.load() != last_updated_time_snapshot) /// it's possible that last_updated_time after we get the snapshot
-                return nf;
-            nf.swap(NetworkInterfaces());
-            last_updated_time.store(now);
-            return nf;
-        }
-        else
-        {
-            std::shared_lock lock(nf_mtx);
-            return nf;
-        }
-    }
 };
 
 }
@@ -149,7 +111,8 @@ bool isLocalAddress(const Poco::Net::IPAddress & address)
         }
     }
 
-    return NetworkInterfaces::instance().hasAddress(address);
+    NetworkInterfaces interfaces;
+    return interfaces.hasAddress(address);
 }
 
 

From 2b8a263355f79e10619ad209e3efcfebedae5d58 Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Thu, 21 Sep 2023 16:11:08 +0200
Subject: [PATCH 208/243] Change referential_constraints to an empty table

---
 .../referential_constraints.sql               | 28 ++-----------------
 1 file changed, 2 insertions(+), 26 deletions(-)

diff --git a/src/Storages/System/InformationSchema/referential_constraints.sql b/src/Storages/System/InformationSchema/referential_constraints.sql
index 9722db92050..7683865a3c9 100644
--- a/src/Storages/System/InformationSchema/referential_constraints.sql
+++ b/src/Storages/System/InformationSchema/referential_constraints.sql
@@ -1,4 +1,4 @@
-ATTACH VIEW referential_constraints
+CREATE TABLE referential_constraints
     (
      `constraint_catalog` String,
      `constraint_schema` String,
@@ -22,28 +22,4 @@ ATTACH VIEW referential_constraints
      `DELETE_RULE` String,
      `TABLE_NAME` String,
      `REFERENCED_TABLE_NAME` String
-) AS
-SELECT 
-    ''                        AS constraint_catalog,
-    NULL                      AS constraint_name,
-    ''                        AS constraint_schema,
-    ''                        AS unique_constraint_catalog,
-    NULL                      AS unique_constraint_name,
-    ''                        AS unique_constraint_schema,
-    ''                        AS match_option,
-    ''                        AS update_rule,
-    ''                        AS delete_rule,
-    ''                        AS table_name,
-    ''                        AS referenced_table_name,
-    constraint_catalog        AS CONSTRAINT_CATALOG,
-    constraint_name           AS CONSTRAINT_NAME,
-    constraint_schema         AS CONSTRAINT_SCHEMA,
-    unique_constraint_catalog AS UNIQUE_CONSTRAINT_CATALOG,
-    unique_constraint_name    AS UNIQUE_CONSTRAINT_NAME,
-    unique_constraint_schema  AS UNIQUE_CONSTRAINT_SCHEMA,
-    match_option              AS MATCH_OPTION,
-    update_rule               AS UPDATE_RULE,
-    delete_rule               AS DELETE_RULE,
-    table_name                AS TABLE_NAME,
-    referenced_table_name     AS REFERENCED_TABLE_NAME
-WHERE false; -- make sure this view is always empty
+) ENGINE Memory;

From 4d3b127a1ff370fd5cee3dccdc2456163ad816ed Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Thu, 21 Sep 2023 16:16:03 +0200
Subject: [PATCH 209/243] clang-format to resolve style issues

---
 src/Functions/DateTimeTransforms.h    |  3 ++-
 src/Functions/toDaysSinceYearZero.cpp | 38 ++++++++++++++-------------
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index 986206429e7..d74eefc70d8 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -928,7 +928,8 @@ struct ToDayOfYearImpl
 struct ToDaysSinceYearZeroImpl
 {
 private:
-    static constexpr auto DAYS_BETWEEN_YEARS_0_AND_1970 = 719'528;
+    /// Constant is taken from Java LocalDate implementation
+    static constexpr auto DAYS_BETWEEN_YEARS_0_AND_1970 = 719'528; /// 01 January, each
 
 public:
     static constexpr auto name = "toDaysSinceYearZero";
diff --git a/src/Functions/toDaysSinceYearZero.cpp b/src/Functions/toDaysSinceYearZero.cpp
index 7aa04fca740..abc2b73f31d 100644
--- a/src/Functions/toDaysSinceYearZero.cpp
+++ b/src/Functions/toDaysSinceYearZero.cpp
@@ -15,7 +15,7 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
 namespace
@@ -26,11 +26,12 @@ class FunctionToDaysSinceYearZero : public IFunction
 {
     using ResultType = DataTypeUInt32;
     using Transformer = TransformDateTime64<ToDaysSinceYearZeroImpl>;
+
 public:
     static constexpr auto name = "toDaysSinceYearZero";
     static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionToDaysSinceYearZero>(context); }
 
-    explicit FunctionToDaysSinceYearZero(ContextPtr /*context*/) {}
+    explicit FunctionToDaysSinceYearZero(ContextPtr /*context*/) { }
 
     String getName() const override { return name; }
     size_t getNumberOfArguments() const override { return 1; }
@@ -41,9 +42,7 @@ public:
     {
         FunctionArgumentDescriptors mandatory_args{
             {"date",
-             [](const IDataType & dt) {
-                return isDateOrDate32<IDataType>(dt) || isDateTime<IDataType>(dt) || isDateTime64<IDataType>(dt);
-             },
+             [](const IDataType & dt) { return isDateOrDate32<IDataType>(dt) || isDateTime<IDataType>(dt) || isDateTime64<IDataType>(dt); },
              nullptr,
              "Date, Date32, DateTime or DateTime64"}};
 
@@ -58,21 +57,27 @@ public:
         WhichDataType which(from_type);
 
         if (which.isDate())
-            return DateTimeTransformImpl<DataTypeDate, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
+            return DateTimeTransformImpl<DataTypeDate, ResultType, ToDaysSinceYearZeroImpl>::execute(
+                arguments, result_type, input_rows_count);
         else if (which.isDate32())
-            return DateTimeTransformImpl<DataTypeDate32, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
+            return DateTimeTransformImpl<DataTypeDate32, ResultType, ToDaysSinceYearZeroImpl>::execute(
+                arguments, result_type, input_rows_count);
         else if (which.isDateTime())
-            return DateTimeTransformImpl<DataTypeDateTime, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
+            return DateTimeTransformImpl<DataTypeDateTime, ResultType, ToDaysSinceYearZeroImpl>::execute(
+                arguments, result_type, input_rows_count);
         else if (which.isDateTime64())
         {
             const auto scale = static_cast<const DataTypeDateTime64 *>(from_type)->getScale();
             const Transformer transformer(scale);
-            return DateTimeTransformImpl<DataTypeDateTime64, ResultType, Transformer>::execute(arguments, result_type, input_rows_count, transformer);
+            return DateTimeTransformImpl<DataTypeDateTime64, ResultType, Transformer>::execute(
+                arguments, result_type, input_rows_count, transformer);
         }
 
-        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+        throw Exception(
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
             "Illegal type {} of argument of function {}",
-            arguments[0].type->getName(), this->getName());
+            arguments[0].type->getName(),
+            this->getName());
     }
 };
 
@@ -80,16 +85,13 @@ public:
 
 REGISTER_FUNCTION(ToDaysSinceYearZero)
 {
-    factory.registerFunction<FunctionToDaysSinceYearZero>(
-    FunctionDocumentation{
-    .description=R"(
+    factory.registerFunction<FunctionToDaysSinceYearZero>(FunctionDocumentation{
+        .description = R"(
 Returns for a given date, the number of days passed since 1 January 0000 in the proleptic Gregorian calendar defined by ISO 8601.
 The calculation is the same as in MySQL's TO_DAYS() function.
 )",
-    .examples{
-        {"typical", "SELECT toDaysSinceYearZero(toDate('2023-09-08'))", "713569"}},
-    .categories{"Dates and Times"}
-    });
+        .examples{{"typical", "SELECT toDaysSinceYearZero(toDate('2023-09-08'))", "713569"}},
+        .categories{"Dates and Times"}});
 
     /// MySQL compatibility alias.
     factory.registerAlias("TO_DAYS", FunctionToDaysSinceYearZero::name, FunctionFactory::CaseInsensitive);

From 78b9d00966450bbbc02fb1e4aab8ebece085a384 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 21 Sep 2023 13:14:23 +0200
Subject: [PATCH 210/243] Decrease timeout for fast tests with a commit

---
 tests/ci/fast_test_check.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index 281bf04a171..f56b4bc90f1 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -146,7 +146,21 @@ def main():
 
     run_log_path = logs_path / "run.log"
     timeout_expired = False
-    timeout = 90 * 60
+    # Do not increase this timeout
+    # https://pastila.nl/?146195b6/9bb99293535e3817a9ea82c3f0f7538d.link#5xtClOjkaPLEjSuZ92L2/g==
+    #
+    # SELECT toStartOfWeek(started_at) AS hour,
+    #   avg(completed_at - started_at) AS avg_runtime from default.workflow_jobs
+    # WHERE
+    #   conclusion = 'success' AND
+    #   name = 'FastTest'
+    # GROUP BY hour
+    # ORDER BY hour
+    #
+    # Our fast tests finish in less than 10 minutes average, and very rarely it builds
+    # longer, but the next run will reuse the sccache
+    # SO DO NOT INCREASE IT
+    timeout = 40 * 60
     with TeePopen(run_cmd, run_log_path, timeout=timeout) as process:
         retcode = process.wait()
         if process.timeout_exceeded:

From bc5f141a6cace8aa10dbfb97cc3f87c5c54f7aec Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Thu, 21 Sep 2023 16:40:38 +0200
Subject: [PATCH 211/243] fix {..} for File

---
 src/Storages/HDFS/StorageHDFS.cpp             |  9 +++---
 src/Storages/StorageFile.cpp                  | 30 +++++++++++--------
 ...ultidirectory_globs_storage_file.reference |  1 -
 ...02771_multidirectory_globs_storage_file.sh |  5 ----
 4 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 85d5fec3d24..cb734f0b961 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -120,11 +120,10 @@ namespace
         std::string common_suffix = for_match.substr(anchor_positions[anchor_positions.size()-1] + 1);
         for (size_t i = 1; i < anchor_positions.size(); ++i)
         {
-            std::ostringstream oss;
-            oss << common_prefix
-                << for_match.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
-                << common_suffix;
-            std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(path_for_ls, fs, oss.str());
+            std::string expanded_matcher = common_prefix
+                + for_match.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
+                + common_suffix;
+            std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(path_for_ls, fs, expanded_matcher);
             ret.insert(ret.end(), result_part.begin(), result_part.end());
         }
         return ret;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index dfb4358ef18..eb18842fdaa 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -154,11 +154,10 @@ void expandSelector(const std::string & path_for_ls,
     std::string common_suffix = for_match.substr(anchor_positions[anchor_positions.size()-1] + 1);
     for (size_t i = 1; i < anchor_positions.size(); ++i)
     {
-        std::ostringstream oss;
-        oss << common_prefix
-            << for_match.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
-            << common_suffix;
-        listFilesWithRegexpMatchingImpl(path_for_ls, oss.str(), total_bytes_to_read, result, recursive);
+        std::string expanded_matcher = common_prefix
+            + for_match.substr(anchor_positions[i-1] + 1, (anchor_positions[i] - anchor_positions[i-1] - 1))
+            + common_suffix;
+        listFilesWithRegexpMatchingImpl(path_for_ls, expanded_matcher, total_bytes_to_read, result, recursive);
     }
 }
 
@@ -178,28 +177,33 @@ void listFilesWithRegexpMatchingImpl(
     const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
     const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
 
-    /// slashes_in_glob counter is a upper-bound estimate of recursion depth
-    /// needed to process complex cases when `/` is included into glob, e.g. /pa{th1/a,th2/b}.csv
-    bool has_curly_braces = false;
+    bool has_generator = false;
+    bool range_generator = false;
+
     const size_t next_slash_after_glob_pos = [&]()
     {
         if (!has_glob)
             return suffix_with_globs.find('/', 1);
 
+        bool prev_is_dot = false;
+
         for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
         {
             if (*it == '{')
-            {
-                has_curly_braces = true;
-                return size_t(0);
-            }
+                has_generator = true;
             else if (*it == '/')
                 return size_t(std::distance(suffix_with_globs.begin(), it));
+            else if (*it == '.')
+            {
+                if (prev_is_dot)
+                    range_generator = true;
+                prev_is_dot = true;
+            }
         }
         return std::string::npos;
     }();
 
-    if (has_curly_braces)
+    if (has_generator && !range_generator)
     {
         expandSelector(path_for_ls, for_match, total_bytes_to_read, result, recursive);
         return;
diff --git a/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.reference b/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.reference
index b0746e0a1be..86152869872 100644
--- a/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.reference
+++ b/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.reference
@@ -3,4 +3,3 @@ This is file data1	data1.csv
 This is file data2	data2.csv
 This is file data1	data1.csv
 This is file data2	data2.csv
-This is file data1	data1.csv
diff --git a/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh b/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh
index b0918d4641d..46ce3d97ba2 100755
--- a/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh
+++ b/tests/queries/0_stateless/02771_multidirectory_globs_storage_file.sh
@@ -30,9 +30,4 @@ ${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CL
 ${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data1.csv';"
 ${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir?/{subdir?1/data1,subdir2?/data2}.csv', CSV) WHERE _file == 'data2.csv';"
 
-# Add a directory to test against permission_denied
-rm -rf ${user_files_path:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/dir_inaccessible/ && mkdir -p ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir_inaccessible/ && chmod 000 ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir_inaccessible/
-
-${CLICKHOUSE_CLIENT} --query "SELECT *, _file FROM file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/dir{?/subdir?1/da,2/subdir2?/da}ta1.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;"
-
 rm -rf ${user_files_path:?}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}

From f5137dd0b4019b927ecee50290f4db6c88c23a34 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 21 Sep 2023 10:54:09 +0000
Subject: [PATCH 212/243] More clang-tidy fixes

---
 .clang-tidy                                        | 14 ++++++++++++++
 base/pcg-random/pcg_extras.hpp                     |  2 +-
 src/Bridge/IBridge.cpp                             |  4 ++--
 src/Common/DNSResolver.cpp                         |  4 ++--
 src/Compression/tests/gtest_compressionCodec.cpp   |  4 +---
 src/Core/SettingsFields.h                          |  2 +-
 src/Daemon/BaseDaemon.cpp                          |  2 +-
 src/DataTypes/DataTypeLowCardinality.cpp           |  4 ++--
 src/Functions/GatherUtils/ends_with.cpp            |  4 ++--
 src/Functions/GatherUtils/has_all.cpp              |  4 ++--
 src/Functions/GatherUtils/has_any.cpp              |  4 ++--
 src/Functions/GatherUtils/has_substr.cpp           |  4 ++--
 src/Functions/GatherUtils/starts_with.cpp          |  4 ++--
 src/Functions/UTCTimestampTransform.cpp            |  4 ++--
 src/IO/parseDateTimeBestEffort.cpp                 |  2 +-
 src/Interpreters/inplaceBlockConversions.cpp       |  2 +-
 src/Processors/Formats/Impl/Parquet/Write.cpp      | 10 +++++-----
 src/Storages/AlterCommands.cpp                     |  2 +-
 src/Storages/Kafka/KafkaProducer.cpp               |  4 ++--
 .../MergeTree/MergeTreePrefetchedReadPool.cpp      |  2 +-
 src/Storages/StorageMerge.cpp                      |  2 +-
 utils/keeper-bench/Runner.cpp                      |  2 +-
 22 files changed, 49 insertions(+), 37 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index cfb42ebd4c7..cbeac377079 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -5,6 +5,9 @@
 # a) the new check is not controversial (this includes many checks in readability-* and google-*) or
 # b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).
 
+# TODO: Once clang(-tidy) 17 is the minimum, we can convert this list to YAML
+# See https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/ReleaseNotes.html#improvements-to-clang-tidy
+
 # TODO Let clang-tidy check headers in further directories
 #      --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$'
 HeaderFilterRegex: '^.*/(base)/.*(h|hpp)$'
@@ -25,6 +28,7 @@ Checks: '*,
     -bugprone-not-null-terminated-result,
     -bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
     -bugprone-unchecked-optional-access,
+    -bugprone-*, -- category temporarily disabled because some check(s) in it are slow
 
     -cert-dcl16-c,
     -cert-dcl37-c,
@@ -39,6 +43,7 @@ Checks: '*,
     -clang-analyzer-optin.portability.UnixAPI,
     -clang-analyzer-security.insecureAPI.bzero,
     -clang-analyzer-security.insecureAPI.strcpy,
+    -clang-analyzer-*, -- category temporarily disabled because some check(s) in it are slow
 
     -cppcoreguidelines-avoid-c-arrays,
     -cppcoreguidelines-avoid-const-or-ref-data-members,
@@ -67,6 +72,7 @@ Checks: '*,
     -cppcoreguidelines-pro-type-vararg,
     -cppcoreguidelines-slicing,
     -cppcoreguidelines-special-member-functions,
+    -cppcoreguidelines-*, -- category temporarily disabled because some check(s) in it are slow
 
     -darwin-*,
 
@@ -128,10 +134,12 @@ Checks: '*,
 
     -performance-inefficient-string-concatenation,
     -performance-no-int-to-ptr,
+    -performance-avoid-endl,
     -performance-unnecessary-value-param,
 
     -portability-simd-intrinsics,
 
+    -readability-avoid-unconditional-preprocessor-if,
     -readability-braces-around-statements,
     -readability-convert-member-functions-to-static,
     -readability-else-after-return,
@@ -155,6 +163,12 @@ Checks: '*,
 
 WarningsAsErrors: '*'
 
+ExtraArgs:
+# clang-tidy 17 started to complain (for unknown reasons) that various pragmas are unknown ("clang-diagnostic-unknown-pragmas").
+# This is technically a compiler error, not a clang-tidy error. We could litter the code base with more pragmas that suppress
+# this error but it is better to pass the following flag to the compiler:
+- '-Wno-unknown-pragmas'
+
 CheckOptions:
   readability-identifier-naming.ClassCase: CamelCase
   readability-identifier-naming.EnumCase: CamelCase
diff --git a/base/pcg-random/pcg_extras.hpp b/base/pcg-random/pcg_extras.hpp
index cc11d907006..32dc5c318c3 100644
--- a/base/pcg-random/pcg_extras.hpp
+++ b/base/pcg-random/pcg_extras.hpp
@@ -463,7 +463,7 @@ auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
 }
 
 template <typename Iter, typename RandType>
-void shuffle(Iter from, Iter to, RandType&& rng)
+void shuffle(Iter from, Iter to, RandType&& rng) // NOLINT(cppcoreguidelines-missing-std-forward)
 {
     typedef typename std::iterator_traits<Iter>::difference_type delta_t;
     typedef typename std::remove_reference<RandType>::type::result_type result_t;
diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp
index c913a6e80c0..87a5a0c2d52 100644
--- a/src/Bridge/IBridge.cpp
+++ b/src/Bridge/IBridge.cpp
@@ -140,7 +140,7 @@ void IBridge::initialize(Application & self)
             throw Poco::OpenFileException("Cannot attach stdout to " + stdout_path);
 
         /// Disable buffering for stdout.
-        setbuf(stdout, nullptr);
+        setbuf(stdout, nullptr); // NOLINT(cert-msc24-c,cert-msc33-c)
     }
     const auto stderr_path = config().getString("logger.stderr", "");
     if (!stderr_path.empty())
@@ -149,7 +149,7 @@ void IBridge::initialize(Application & self)
             throw Poco::OpenFileException("Cannot attach stderr to " + stderr_path);
 
         /// Disable buffering for stderr.
-        setbuf(stderr, nullptr);
+        setbuf(stderr, nullptr); // NOLINT(cert-msc24-c,cert-msc33-c)
     }
 
     buildLoggers(config(), logger(), self.commandName());
diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index 6a685b602ae..9cb352da0ba 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -310,8 +310,8 @@ static String cacheElemToString(const Poco::Net::IPAddress & addr) { return addr
 
 template <typename UpdateF, typename ElemsT>
 bool DNSResolver::updateCacheImpl(
-    UpdateF && update_func,
-    ElemsT && elems,
+    UpdateF && update_func, // NOLINT(cppcoreguidelines-missing-std-forward)
+    ElemsT && elems, // NOLINT(cppcoreguidelines-missing-std-forward)
     UInt32 max_consecutive_failures,
     FormatStringHelper<String> notfound_log_msg,
     FormatStringHelper<String> dropped_log_msg)
diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp
index fa89e2da645..24f16a55c25 100644
--- a/src/Compression/tests/gtest_compressionCodec.cpp
+++ b/src/Compression/tests/gtest_compressionCodec.cpp
@@ -36,9 +36,7 @@ using namespace DB;
 namespace
 {
 
-template <class T> using is_pod = std::is_trivial<std::is_standard_layout<T>>;
-template <class T> inline constexpr bool is_pod_v = is_pod<T>::value;
-
+template <class T> inline constexpr bool is_pod_v = std::is_trivial_v<std::is_standard_layout<T>>;
 
 template <typename T>
 struct AsHexStringHelper
diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h
index 99f280d3641..d796818e9cb 100644
--- a/src/Core/SettingsFields.h
+++ b/src/Core/SettingsFields.h
@@ -429,7 +429,7 @@ constexpr auto getEnumValues()
         if (it != map.end()) \
             return it->second; \
         throw Exception(ERROR_CODE_FOR_UNEXPECTED_NAME, \
-            "Unexpected value of " #NEW_NAME ":{}", std::to_string(std::underlying_type<EnumType>::type(value))); \
+            "Unexpected value of " #NEW_NAME ":{}", std::to_string(std::underlying_type_t<EnumType>(value))); \
     } \
     \
     typename SettingField##NEW_NAME::EnumType SettingField##NEW_NAME##Traits::fromString(std::string_view str) \
diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp
index be323dc6786..84e778ab185 100644
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@@ -843,7 +843,7 @@ void BaseDaemon::initialize(Application & self)
             throw Poco::OpenFileException("Cannot attach stderr to " + stderr_path);
 
         /// Disable buffering for stderr
-        setbuf(stderr, nullptr);
+        setbuf(stderr, nullptr); // NOLINT(cert-msc24-c,cert-msc33-c)
     }
 
     if ((!log_path.empty() && is_daemon) || config().has("logger.stdout"))
diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp
index 8293455cabc..079d9d82cd7 100644
--- a/src/DataTypes/DataTypeLowCardinality.cpp
+++ b/src/DataTypes/DataTypeLowCardinality.cpp
@@ -111,7 +111,7 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUnique(const IDataTyp
 {
     auto creator = [&](auto x)
     {
-        using ColumnType = typename std::remove_pointer<decltype(x)>::type;
+        using ColumnType = typename std::remove_pointer_t<decltype(x)>;
         return ColumnUnique<ColumnType>::create(keys_type);
     };
     return createColumnUniqueImpl(keys_type, creator);
@@ -121,7 +121,7 @@ MutableColumnUniquePtr DataTypeLowCardinality::createColumnUnique(const IDataTyp
 {
     auto creator = [&](auto x)
     {
-        using ColumnType = typename std::remove_pointer<decltype(x)>::type;
+        using ColumnType = typename std::remove_pointer_t<decltype(x)>;
         return ColumnUnique<ColumnType>::create(std::move(keys), keys_type.isNullable());
     };
     return createColumnUniqueImpl(keys_type, creator);
diff --git a/src/Functions/GatherUtils/ends_with.cpp b/src/Functions/GatherUtils/ends_with.cpp
index 579d903005a..0d9537f88df 100644
--- a/src/Functions/GatherUtils/ends_with.cpp
+++ b/src/Functions/GatherUtils/ends_with.cpp
@@ -15,7 +15,7 @@ struct ArrayEndsWithSelectArraySourcePair : public ArraySourcePairSelector<Array
                              bool is_second_const, bool is_second_nullable, SecondSource && second,
                              ColumnUInt8 & result)
     {
-        using SourceType = typename std::decay<SecondSource>::type;
+        using SourceType = typename std::decay_t<SecondSource>;
 
         if (is_second_nullable)
         {
@@ -40,7 +40,7 @@ struct ArrayEndsWithSelectArraySourcePair : public ArraySourcePairSelector<Array
                                  bool is_second_const, bool is_second_nullable, SecondSource && second,
                                  ColumnUInt8 & result)
     {
-        using SourceType = typename std::decay<FirstSource>::type;
+        using SourceType = typename std::decay_t<FirstSource>;
 
         if (is_first_nullable)
         {
diff --git a/src/Functions/GatherUtils/has_all.cpp b/src/Functions/GatherUtils/has_all.cpp
index 6e34a851c02..c0be9c9273a 100644
--- a/src/Functions/GatherUtils/has_all.cpp
+++ b/src/Functions/GatherUtils/has_all.cpp
@@ -15,7 +15,7 @@ struct ArrayHasAllSelectArraySourcePair : public ArraySourcePairSelector<ArrayHa
                              bool is_second_const, bool is_second_nullable, SecondSource && second,
                              ColumnUInt8 & result)
     {
-        using SourceType = typename std::decay<SecondSource>::type;
+        using SourceType = typename std::decay_t<SecondSource>;
 
         if (is_second_nullable)
         {
@@ -40,7 +40,7 @@ struct ArrayHasAllSelectArraySourcePair : public ArraySourcePairSelector<ArrayHa
                                  bool is_second_const, bool is_second_nullable, SecondSource && second,
                                  ColumnUInt8 & result)
     {
-        using SourceType = typename std::decay<FirstSource>::type;
+        using SourceType = typename std::decay_t<FirstSource>;
 
         if (is_first_nullable)
         {
diff --git a/src/Functions/GatherUtils/has_any.cpp b/src/Functions/GatherUtils/has_any.cpp
index b7a8c9f620d..fa8df466096 100644
--- a/src/Functions/GatherUtils/has_any.cpp
+++ b/src/Functions/GatherUtils/has_any.cpp
@@ -15,7 +15,7 @@ struct ArrayHasAnySelectArraySourcePair : public ArraySourcePairSelector<ArrayHa
                              bool is_second_const, bool is_second_nullable, SecondSource && second,
                              ColumnUInt8 & result)
     {
-        using SourceType = typename std::decay<SecondSource>::type;
+        using SourceType = typename std::decay_t<SecondSource>;
 
         if (is_second_nullable)
         {
@@ -40,7 +40,7 @@ struct ArrayHasAnySelectArraySourcePair : public ArraySourcePairSelector<ArrayHa
                                  bool is_second_const, bool is_second_nullable, SecondSource && second,
                                  ColumnUInt8 & result)
     {
-        using SourceType = typename std::decay<FirstSource>::type;
+        using SourceType = typename std::decay_t<FirstSource>;
 
         if (is_first_nullable)
         {
diff --git a/src/Functions/GatherUtils/has_substr.cpp b/src/Functions/GatherUtils/has_substr.cpp
index 244a1d21633..0d579a8460a 100644
--- a/src/Functions/GatherUtils/has_substr.cpp
+++ b/src/Functions/GatherUtils/has_substr.cpp
@@ -15,7 +15,7 @@ struct ArrayHasSubstrSelectArraySourcePair : public ArraySourcePairSelector<Arra
                              bool is_second_const, bool is_second_nullable, SecondSource && second,
                              ColumnUInt8 & result)
     {
-        using SourceType = typename std::decay<SecondSource>::type;
+        using SourceType = typename std::decay_t<SecondSource>;
 
         if (is_second_nullable)
         {
@@ -40,7 +40,7 @@ struct ArrayHasSubstrSelectArraySourcePair : public ArraySourcePairSelector<Arra
                                  bool is_second_const, bool is_second_nullable, SecondSource && second,
                                  ColumnUInt8 & result)
     {
-        using SourceType = typename std::decay<FirstSource>::type;
+        using SourceType = typename std::decay_t<FirstSource>;
 
         if (is_first_nullable)
         {
diff --git a/src/Functions/GatherUtils/starts_with.cpp b/src/Functions/GatherUtils/starts_with.cpp
index 813294bc092..790e28f9528 100644
--- a/src/Functions/GatherUtils/starts_with.cpp
+++ b/src/Functions/GatherUtils/starts_with.cpp
@@ -15,7 +15,7 @@ struct ArrayStartsWithSelectArraySourcePair : public ArraySourcePairSelector<Arr
                              bool is_second_const, bool is_second_nullable, SecondSource && second,
                              ColumnUInt8 & result)
     {
-        using SourceType = typename std::decay<SecondSource>::type;
+        using SourceType = typename std::decay_t<SecondSource>;
 
         if (is_second_nullable)
         {
@@ -40,7 +40,7 @@ struct ArrayStartsWithSelectArraySourcePair : public ArraySourcePairSelector<Arr
                                  bool is_second_const, bool is_second_nullable, SecondSource && second,
                                  ColumnUInt8 & result)
     {
-        using SourceType = typename std::decay<FirstSource>::type;
+        using SourceType = typename std::decay_t<FirstSource>;
 
         if (is_first_nullable)
         {
diff --git a/src/Functions/UTCTimestampTransform.cpp b/src/Functions/UTCTimestampTransform.cpp
index ff3c9c27ffc..77cc1826941 100644
--- a/src/Functions/UTCTimestampTransform.cpp
+++ b/src/Functions/UTCTimestampTransform.cpp
@@ -71,8 +71,8 @@ namespace
         {
             if (arguments.size() != 2)
                 throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {}'s arguments number must be 2.", name);
-            ColumnWithTypeAndName arg1 = arguments[0];
-            ColumnWithTypeAndName arg2 = arguments[1];
+            const ColumnWithTypeAndName & arg1 = arguments[0];
+            const ColumnWithTypeAndName & arg2 = arguments[1];
             const auto * time_zone_const_col = checkAndGetColumnConstData<ColumnString>(arg2.column.get());
             if (!time_zone_const_col)
                 throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of 2nd argument of function {}. Excepted const(String).", arg2.column->getName(), name);
diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp
index 6bdba251c36..83fde8e8830 100644
--- a/src/IO/parseDateTimeBestEffort.cpp
+++ b/src/IO/parseDateTimeBestEffort.cpp
@@ -434,7 +434,7 @@ ReturnType parseDateTimeBestEffortImpl(
                 num_digits = readDigits(digits, sizeof(digits), in);
                 if (fractional)
                 {
-                    using FractionalType = typename std::decay<decltype(fractional->value)>::type;
+                    using FractionalType = typename std::decay_t<decltype(fractional->value)>;
                     // Reading more decimal digits than fits into FractionalType would case an
                     // overflow, so it is better to skip all digits from the right side that do not
                     // fit into result type. To provide less precise value rather than bogus one.
diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp
index 4cac2f0e20c..f3a65154524 100644
--- a/src/Interpreters/inplaceBlockConversions.cpp
+++ b/src/Interpreters/inplaceBlockConversions.cpp
@@ -103,7 +103,7 @@ void addDefaultRequiredExpressionsRecursively(
         /// and this identifier will be in required columns. If such column is not in ColumnsDescription we ignore it.
 
         /// This column is required, but doesn't have default expression, so lets use "default default"
-        auto column = columns.get(required_column_name);
+        const auto & column = columns.get(required_column_name);
         auto default_value = column.type->getDefault();
         ASTPtr expr = std::make_shared<ASTLiteral>(default_value);
         if (is_column_in_query && convert_null_to_default)
diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp
index d9cfd40e168..978787d07cd 100644
--- a/src/Processors/Formats/Impl/Parquet/Write.cpp
+++ b/src/Processors/Formats/Impl/Parquet/Write.cpp
@@ -229,10 +229,10 @@ struct StatisticsStringRef
 /// The Coverter* structs below are responsible for that.
 /// When conversion is not needed, getBatch() will just return pointer into original data.
 
-template <typename Col, typename To, typename MinMaxType = typename std::conditional<
-        std::is_signed<typename Col::Container::value_type>::value,
+template <typename Col, typename To, typename MinMaxType = typename std::conditional_t<
+        std::is_signed_v<typename Col::Container::value_type>,
         To,
-        typename std::make_unsigned<To>::type>::type>
+        typename std::make_unsigned_t<To>>>
 struct ConverterNumeric
 {
     using Statistics = StatisticsNumeric<MinMaxType, To>;
@@ -517,14 +517,14 @@ void writeColumnImpl(
     bool use_dictionary = options.use_dictionary_encoding && !s.is_bool;
 
     std::optional<parquet::ColumnDescriptor> fixed_string_descr;
-    if constexpr (std::is_same<ParquetDType, parquet::FLBAType>::value)
+    if constexpr (std::is_same_v<ParquetDType, parquet::FLBAType>)
     {
         /// This just communicates one number to MakeTypedEncoder(): the fixed string length.
         fixed_string_descr.emplace(parquet::schema::PrimitiveNode::Make(
             "", parquet::Repetition::REQUIRED, parquet::Type::FIXED_LEN_BYTE_ARRAY,
             parquet::ConvertedType::NONE, static_cast<int>(converter.fixedStringSize())), 0, 0);
 
-        if constexpr (std::is_same<typename Converter::Statistics, StatisticsFixedStringRef>::value)
+        if constexpr (std::is_same_v<typename Converter::Statistics, StatisticsFixedStringRef>)
             page_statistics.fixed_string_size = converter.fixedStringSize();
     }
 
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index f38fc1f3734..b99a712c672 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -1154,7 +1154,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
                         "Column {} doesn't have MATERIALIZED, cannot remove it",
                         backQuote(column_name));
 
-                auto column_from_table = all_columns.get(column_name);
+                const auto & column_from_table = all_columns.get(column_name);
                 if (command.to_remove == AlterCommand::RemoveProperty::TTL && column_from_table.ttl == nullptr)
                     throw Exception(
                         ErrorCodes::BAD_ARGUMENTS,
diff --git a/src/Storages/Kafka/KafkaProducer.cpp b/src/Storages/Kafka/KafkaProducer.cpp
index c36f9b48315..edbfc76ef93 100644
--- a/src/Storages/Kafka/KafkaProducer.cpp
+++ b/src/Storages/Kafka/KafkaProducer.cpp
@@ -27,7 +27,7 @@ KafkaProducer::KafkaProducer(
     if (header.has("_key"))
     {
         auto column_index = header.getPositionByName("_key");
-        auto column_info = header.getByPosition(column_index);
+        const auto & column_info = header.getByPosition(column_index);
         if (isString(column_info.type))
             key_column_index = column_index;
         // else ? (not sure it's a good place to report smth to user)
@@ -36,7 +36,7 @@ KafkaProducer::KafkaProducer(
     if (header.has("_timestamp"))
     {
         auto column_index = header.getPositionByName("_timestamp");
-        auto column_info = header.getByPosition(column_index);
+        const auto & column_info = header.getByPosition(column_index);
         if (isDateTime(column_info.type))
             timestamp_column_index = column_index;
     }
diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
index 75f2fd26600..3716ee08bed 100644
--- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
@@ -95,7 +95,7 @@ MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
         context_)
     , WithContext(context_)
     , prefetch_threadpool(getContext()->getPrefetchThreadpool())
-    , log(&Poco::Logger::get("MergeTreePrefetchedReadPool(" + (parts_.empty() ? "" : parts_.front().data_part->storage.getStorageID().getNameForLogs()) + ")"))
+    , log(&Poco::Logger::get("MergeTreePrefetchedReadPool(" + (parts_ranges.empty() ? "" : parts_ranges.front().data_part->storage.getStorageID().getNameForLogs()) + ")"))
 {
     /// Tasks creation might also create a lost of readers - check they do not
     /// do any time consuming operations in ctor.
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 272f35303bd..c14abfc9ab2 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -490,7 +490,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
                         replaceAliasColumnsInQuery(column_expr, storage_metadata_snapshot->getColumns(),
                                                 syntax_result->array_join_result_to_source, context);
 
-                        auto column_description = storage_columns.get(column);
+                        const auto & column_description = storage_columns.get(column);
                         column_expr = addTypeConversionToAST(std::move(column_expr), column_description.type->getName(),
                                                             storage_metadata_snapshot->getColumns().getAll(), context);
                         column_expr = setAlias(column_expr, column);
diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index f86d2b44dd7..12f4c87c5ae 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -336,7 +336,7 @@ void Runner::runBenchmark()
         for (size_t i = 0; i < concurrency; ++i)
         {
             auto thread_connections = connections;
-            pool->scheduleOrThrowOnError([this, connections = std::move(thread_connections)]() mutable { thread(connections); });
+            pool->scheduleOrThrowOnError([this, connections_ = std::move(thread_connections)]() mutable { thread(connections_); });
         }
     }
     catch (...)

From e2b40749a612e7237c7c94c0bf21e2b10b9fe2bb Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Thu, 21 Sep 2023 16:45:47 +0200
Subject: [PATCH 213/243] the same for HDFS

---
 src/Storages/HDFS/StorageHDFS.cpp | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index cb734f0b961..8563a777bb2 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -144,26 +144,33 @@ namespace
         const String suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
         const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
 
-        bool has_curly_braces = false;
+        bool has_generator = false;
+        bool range_generator = false;
+
         const size_t next_slash_after_glob_pos = [&]()
         {
             if (!has_glob)
                 return suffix_with_globs.find('/', 1);
 
+            bool prev_is_dot = false;
+
             for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
-        {
+            {
                 if (*it == '{')
-                {
-                    has_curly_braces = true;
-                    return size_t(0);
-                }
+                    has_generator = true;
                 else if (*it == '/')
                     return size_t(std::distance(suffix_with_globs.begin(), it));
-        }
+                else if (*it == '.')
+                {
+                    if (prev_is_dot)
+                        range_generator = true;
+                    prev_is_dot = true;
+                }
+            }
             return std::string::npos;
         }();
 
-        if (has_curly_braces)
+        if (has_generator && !range_generator)
             return expandSelector(path_for_ls, fs, for_match);
 
         const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);

From 973cd5e972192a2139c20191bf26c75c8a03183f Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Thu, 21 Sep 2023 22:46:16 +0800
Subject: [PATCH 214/243] Always allow nullable keys for projections

---
 src/Storages/MergeTree/MergeTreeData.cpp           |  9 +++++----
 src/Storages/MergeTree/MergeTreeData.h             | 14 ++++++++++++--
 .../01710_projection_with_nullable_keys.reference  |  1 +
 .../01710_projection_with_nullable_keys.sql        |  9 +++++++++
 4 files changed, 27 insertions(+), 6 deletions(-)
 create mode 100644 tests/queries/0_stateless/01710_projection_with_nullable_keys.reference
 create mode 100644 tests/queries/0_stateless/01710_projection_with_nullable_keys.sql

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 14c9961f6c3..1612eca217c 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -481,6 +481,7 @@ void MergeTreeData::checkProperties(
     const StorageInMemoryMetadata & old_metadata,
     bool attach,
     bool allow_empty_sorting_key,
+    bool allow_nullable_key_,
     ContextPtr local_context) const
 {
     if (!new_metadata.sorting_key.definition_ast && !allow_empty_sorting_key)
@@ -598,12 +599,12 @@ void MergeTreeData::checkProperties(
 
             /// We cannot alter a projection so far. So here we do not try to find a projection in old metadata.
             bool is_aggregate = projection.type == ProjectionDescription::Type::Aggregate;
-            checkProperties(*projection.metadata, *projection.metadata, attach, is_aggregate, local_context);
+            checkProperties(*projection.metadata, *projection.metadata, attach, is_aggregate, true /* allow_nullable_key */, local_context);
             projections_names.insert(projection.name);
         }
     }
 
-    checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key);
+    checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key_);
 }
 
 void MergeTreeData::setProperties(
@@ -612,7 +613,7 @@ void MergeTreeData::setProperties(
     bool attach,
     ContextPtr local_context)
 {
-    checkProperties(new_metadata, old_metadata, attach, false, local_context);
+    checkProperties(new_metadata, old_metadata, attach, false, allow_nullable_key, local_context);
     setInMemoryMetadata(new_metadata);
 }
 
@@ -3350,7 +3351,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
         }
     }
 
-    checkProperties(new_metadata, old_metadata, false, false, local_context);
+    checkProperties(new_metadata, old_metadata, false, false, allow_nullable_key, local_context);
     checkTTLExpressions(new_metadata, old_metadata);
 
     if (!columns_to_check_conversion.empty())
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 6f9779bde00..54b13b0b93d 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1246,9 +1246,19 @@ protected:
     /// The same for clearOldTemporaryDirectories.
     std::mutex clear_old_temporary_directories_mutex;
 
-    void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach, bool allow_empty_sorting_key, ContextPtr local_context) const;
+    void checkProperties(
+        const StorageInMemoryMetadata & new_metadata,
+        const StorageInMemoryMetadata & old_metadata,
+        bool attach,
+        bool allow_empty_sorting_key,
+        bool allow_nullable_key_,
+        ContextPtr local_context) const;
 
-    void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr);
+    void setProperties(
+        const StorageInMemoryMetadata & new_metadata,
+        const StorageInMemoryMetadata & old_metadata,
+        bool attach = false,
+        ContextPtr local_context = nullptr);
 
     void checkPartitionKeyAndInitMinMax(const KeyDescription & new_partition_key);
 
diff --git a/tests/queries/0_stateless/01710_projection_with_nullable_keys.reference b/tests/queries/0_stateless/01710_projection_with_nullable_keys.reference
new file mode 100644
index 00000000000..d1d9e99b4ed
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projection_with_nullable_keys.reference
@@ -0,0 +1 @@
+CREATE TABLE default.sales\n(\n    `DATE_SOLD` DateTime64(3, \'UTC\'),\n    `PRODUCT_ID` Nullable(String),\n    PROJECTION test\n    (\n        SELECT toInt64(count(*))\n        GROUP BY \n            PRODUCT_ID,\n            DATE_SOLD\n    )\n)\nENGINE = MergeTree\nPARTITION BY toYYYYMM(DATE_SOLD)\nORDER BY DATE_SOLD\nSETTINGS index_granularity = 8192
diff --git a/tests/queries/0_stateless/01710_projection_with_nullable_keys.sql b/tests/queries/0_stateless/01710_projection_with_nullable_keys.sql
new file mode 100644
index 00000000000..72757a1d789
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projection_with_nullable_keys.sql
@@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS sales;
+
+CREATE TABLE sales (DATE_SOLD DateTime64(3, 'UTC'), PRODUCT_ID Nullable(String)) Engine MergeTree() PARTITION BY toYYYYMM(DATE_SOLD) ORDER BY DATE_SOLD;
+
+ALTER TABLE sales ADD PROJECTION test (SELECT toInt64(COUNT(*)) GROUP BY PRODUCT_ID, DATE_SOLD);
+
+SHOW CREATE sales;
+
+DROP TABLE sales;

From 9af9b4a08542812694f171833a7afe08f5aaaafb Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Thu, 21 Sep 2023 09:27:20 -0700
Subject: [PATCH 215/243] Enable connection pooling for s3 table function
 (#54812)

Enable connection pooling for s3 table function
---
 src/Core/Settings.h                           |  1 +
 src/IO/HTTPCommon.h                           | 18 +++++++++-
 src/IO/ReadBufferFromS3.cpp                   | 33 +++++++++++++++++--
 .../Formats/Impl/ParquetBlockInputFormat.cpp  |  2 +-
 src/Storages/StorageS3.cpp                    |  3 +-
 5 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 94968735800..5b68b21a84f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -105,6 +105,7 @@ class IColumn;
     M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
     M(UInt64, s3_retry_attempts, 10, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
     M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
+    M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. Only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \
     M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
     M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
     M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h
index 04ca85925af..cb7ef47dba9 100644
--- a/src/IO/HTTPCommon.h
+++ b/src/IO/HTTPCommon.h
@@ -58,6 +58,11 @@ using HTTPSessionPtr = std::shared_ptr<Poco::Net::HTTPClientSession>;
 /// All pooled sessions don't have this tag attached after being taken from a pool.
 /// If the request and the response were fully written/read, the client code should add this tag
 /// explicitly by calling `markSessionForReuse()`.
+///
+/// Note that HTTP response may contain extra bytes after the last byte of the payload. Specifically,
+/// when chunked encoding is used, there's an empty chunk at the end. Those extra bytes must also be
+/// read before the session can be reused. So we usually put an `istr->ignore(INT64_MAX)` call
+/// before `markSessionForReuse()`.
 struct HTTPSessionReuseTag
 {
 };
@@ -76,7 +81,18 @@ HTTPSessionPtr makeHTTPSession(
     Poco::Net::HTTPClientSession::ProxyConfig proxy_config = {}
 );
 
-/// As previous method creates session, but tooks it from pool, without and with proxy uri.
+/// As previous method creates session, but takes it from pool, without and with proxy uri.
+///
+/// The max_connections_per_endpoint parameter makes it look like the pool size can be different for
+/// different requests (whatever that means), but actually we just assign the endpoint's connection
+/// pool size when we see the endpoint for the first time, then we never change it.
+/// We should probably change how this configuration works, and how this pooling works in general:
+///  * Make the per_endpoint_pool_size be a global server setting instead of per-disk or per-query.
+///  * Have boolean per-disk/per-query settings for enabling/disabling pooling.
+///  * Add a limit on the number of endpoints and the total number of sessions across all endpoints.
+///  * Enable pooling by default everywhere. In particular StorageURL and StorageS3.
+///    (Enabling it for StorageURL is scary without the previous item - the user may query lots of
+///     different endpoints. So currently pooling is mainly used for S3.)
 PooledHTTPSessionPtr makePooledHTTPSession(
     const Poco::URI & uri,
     const ConnectionTimeouts & timeouts,
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index c038523bdaa..f346b6fb3f9 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -230,10 +230,35 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
 
         ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::ReadBufferFromS3Microseconds);
 
+        std::optional<Aws::S3::Model::GetObjectResult> result;
+        /// Connection is reusable if we've read the full response.
+        bool session_is_reusable = false;
+        SCOPE_EXIT(
+        {
+            if (!result.has_value())
+                return;
+            if (session_is_reusable)
+            {
+                auto session = getSession(*result);
+                if (!session.isNull())
+                {
+                    DB::markSessionForReuse(session);
+                    ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions);
+                }
+                else
+                    session_is_reusable = false;
+            }
+            if (!session_is_reusable)
+            {
+                resetSession(*result);
+                ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions);
+            }
+        });
+
         try
         {
-            auto result = sendRequest(range_begin, range_begin + n - 1);
-            std::istream & istr = result.GetBody();
+            result = sendRequest(range_begin, range_begin + n - 1);
+            std::istream & istr = result->GetBody();
 
             copyFromIStreamWithProgressCallback(istr, to, n, progress_callback, &bytes_copied);
 
@@ -241,6 +266,10 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons
 
             if (read_settings.remote_throttler)
                 read_settings.remote_throttler->add(bytes_copied, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds);
+
+            /// Read remaining bytes after the end of the payload, see HTTPSessionReuseTag.
+            istr.ignore(INT64_MAX);
+            session_is_reusable = true;
         }
         catch (Poco::Exception & e)
         {
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index 8f8c909d042..c947eda42c7 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -766,7 +766,7 @@ void registerInputFormatParquet(FormatFactory & factory)
             [](ReadBuffer & buf,
                const Block & sample,
                const FormatSettings & settings,
-               const ReadSettings& read_settings,
+               const ReadSettings & read_settings,
                bool is_remote_fs,
                size_t /* max_download_threads */,
                size_t max_parsing_threads)
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 41e724109b3..ec9cf84ce4b 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1278,6 +1278,7 @@ void StorageS3::Configuration::connect(ContextPtr context)
 
     client_configuration.endpointOverride = url.endpoint;
     client_configuration.maxConnections = static_cast<unsigned>(request_settings.max_connections);
+    client_configuration.http_connection_pool_size = context->getGlobalContext()->getSettingsRef().s3_http_connection_pool_size;
     auto headers = auth_settings.headers;
     if (!headers_from_ast.empty())
         headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end());
@@ -1298,7 +1299,7 @@ void StorageS3::Configuration::connect(ContextPtr context)
             auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
             auth_settings.expiration_window_seconds.value_or(
                 context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
-                auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
+            auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
         });
 
     client_with_long_timeout = client->clone(std::nullopt, request_settings.long_request_timeout_ms);

From 5c674e553dd813ecdc5199ab2f5b97298f71b14d Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Thu, 21 Sep 2023 18:52:05 +0200
Subject: [PATCH 216/243] Docs. Fix column name and type

---
 docs/en/operations/system-tables/trace_log.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md
index 89d54adc30d..1396244a12a 100644
--- a/docs/en/operations/system-tables/trace_log.md
+++ b/docs/en/operations/system-tables/trace_log.md
@@ -33,7 +33,7 @@ Columns:
     - `MemoryPeak` represents collecting updates of peak memory usage.
     - `ProfileEvent` represents collecting of increments of profile events.
 
-- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
+- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
 
 - `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table.
 

From be0c5123291147b26466c3d21a70ad8ddf28d2b9 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Thu, 21 Sep 2023 10:14:28 -0700
Subject: [PATCH 217/243] Fix virtual columns having incorrect values after
 ORDER BY (#54811)

Fix virtual columns having incorrect values after ORDER BY
---
 src/Storages/VirtualColumnUtils.cpp                         | 6 +++---
 .../0_stateless/01825_type_json_multiple_files.reference    | 4 ++--
 .../0_stateless/02884_virtual_column_order_by.reference     | 2 ++
 tests/queries/0_stateless/02884_virtual_column_order_by.sql | 4 ++++
 4 files changed, 11 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02884_virtual_column_order_by.reference
 create mode 100644 tests/queries/0_stateless/02884_virtual_column_order_by.sql

diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index dbb424ee957..3091a064de0 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -389,19 +389,19 @@ void addRequestedPathAndFileVirtualsToChunk(
     {
         if (virtual_column.name == "_path")
         {
-            chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), path));
+            chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), path)->convertToFullColumnIfConst());
         }
         else if (virtual_column.name == "_file")
         {
             if (filename)
             {
-                chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *filename));
+                chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *filename)->convertToFullColumnIfConst());
             }
             else
             {
                 size_t last_slash_pos = path.find_last_of('/');
                 auto filename_from_path = path.substr(last_slash_pos + 1);
-                chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), filename_from_path));
+                chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), filename_from_path)->convertToFullColumnIfConst());
             }
         }
     }
diff --git a/tests/queries/0_stateless/01825_type_json_multiple_files.reference b/tests/queries/0_stateless/01825_type_json_multiple_files.reference
index 31b10035614..b887abc8590 100644
--- a/tests/queries/0_stateless/01825_type_json_multiple_files.reference
+++ b/tests/queries/0_stateless/01825_type_json_multiple_files.reference
@@ -5,9 +5,9 @@
 {"data":{"k0":0,"k1":0,"k2":0,"k3":0,"k4":100,"k5":0}}
 {"data":{"k0":0,"k1":0,"k2":0,"k3":0,"k4":0,"k5":100}}
 Tuple(k0 Int8, k1 Int8, k2 Int8, k3 Int8, k4 Int8, k5 Int8)
-{"data":{"k0":0,"k1":0,"k2":100}}
-{"data":{"k0":0,"k1":100,"k2":0}}
 {"data":{"k0":100,"k1":0,"k2":0}}
+{"data":{"k0":0,"k1":100,"k2":0}}
+{"data":{"k0":0,"k1":0,"k2":100}}
 Tuple(k0 Int8, k1 Int8, k2 Int8)
 {"data":{"k1":100,"k3":0}}
 {"data":{"k1":0,"k3":100}}
diff --git a/tests/queries/0_stateless/02884_virtual_column_order_by.reference b/tests/queries/0_stateless/02884_virtual_column_order_by.reference
new file mode 100644
index 00000000000..228d40d8443
--- /dev/null
+++ b/tests/queries/0_stateless/02884_virtual_column_order_by.reference
@@ -0,0 +1,2 @@
+02884_1.csv	1
+02884_2.csv	2
diff --git a/tests/queries/0_stateless/02884_virtual_column_order_by.sql b/tests/queries/0_stateless/02884_virtual_column_order_by.sql
new file mode 100644
index 00000000000..3c73f848196
--- /dev/null
+++ b/tests/queries/0_stateless/02884_virtual_column_order_by.sql
@@ -0,0 +1,4 @@
+-- Tags: no-fasttest
+insert into function file('02884_1.csv') select 1 as x settings engine_file_truncate_on_insert=1;
+insert into function file('02884_2.csv') select 2 as x settings engine_file_truncate_on_insert=1;
+select _file, * from file('02884_{1,2}.csv') order by _file settings max_threads=1;

From a46e0deb85b9930859ce07534529fb0400f8d427 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 21 Sep 2023 17:40:16 +0000
Subject: [PATCH 218/243] Resolve bad caching of INCBIN'd external data

---
 .../System/InformationSchema/columns.sql      | 105 -----
 .../InformationSchema/key_column_usage.sql    |  54 ---
 .../referential_constraints.sql               |  25 --
 .../System/InformationSchema/schemata.sql     |  33 --
 .../System/InformationSchema/tables.sql       |  34 --
 .../System/InformationSchema/views.sql        |  46 ---
 .../System/attachInformationSchemaTables.cpp  | 371 +++++++++++++++++-
 7 files changed, 355 insertions(+), 313 deletions(-)
 delete mode 100644 src/Storages/System/InformationSchema/columns.sql
 delete mode 100644 src/Storages/System/InformationSchema/key_column_usage.sql
 delete mode 100644 src/Storages/System/InformationSchema/referential_constraints.sql
 delete mode 100644 src/Storages/System/InformationSchema/schemata.sql
 delete mode 100644 src/Storages/System/InformationSchema/tables.sql
 delete mode 100644 src/Storages/System/InformationSchema/views.sql

diff --git a/src/Storages/System/InformationSchema/columns.sql b/src/Storages/System/InformationSchema/columns.sql
deleted file mode 100644
index f95899d1cbf..00000000000
--- a/src/Storages/System/InformationSchema/columns.sql
+++ /dev/null
@@ -1,105 +0,0 @@
-ATTACH VIEW columns
-(
-    `table_catalog` String,
-    `table_schema` String,
-    `table_name` String,
-    `column_name` String,
-    `ordinal_position` UInt64,
-    `column_default` String,
-    `is_nullable` String,
-    `data_type` String,
-    `character_maximum_length` Nullable(UInt64),
-    `character_octet_length` Nullable(UInt64),
-    `numeric_precision` Nullable(UInt64),
-    `numeric_precision_radix` Nullable(UInt64),
-    `numeric_scale` Nullable(UInt64),
-    `datetime_precision` Nullable(UInt64),
-    `character_set_catalog` Nullable(String),
-    `character_set_schema` Nullable(String),
-    `character_set_name` Nullable(String),
-    `collation_catalog` Nullable(String),
-    `collation_schema` Nullable(String),
-    `collation_name` Nullable(String),
-    `domain_catalog` Nullable(String),
-    `domain_schema` Nullable(String),
-    `domain_name` Nullable(String),
-    `column_comment` String,
-    `column_type` String,
-    `TABLE_CATALOG` String,
-    `TABLE_SCHEMA` String,
-    `TABLE_NAME` String,
-    `COLUMN_NAME` String,
-    `ORDINAL_POSITION` UInt64,
-    `COLUMN_DEFAULT` String,
-    `IS_NULLABLE` String,
-    `DATA_TYPE` String,
-    `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64),
-    `CHARACTER_OCTET_LENGTH` Nullable(UInt64),
-    `NUMERIC_PRECISION` Nullable(UInt64),
-    `NUMERIC_PRECISION_RADIX` Nullable(UInt64),
-    `NUMERIC_SCALE` Nullable(UInt64),
-    `DATETIME_PRECISION` Nullable(UInt64),
-    `CHARACTER_SET_CATALOG` Nullable(String),
-    `CHARACTER_SET_SCHEMA` Nullable(String),
-    `CHARACTER_SET_NAME` Nullable(String),
-    `COLLATION_CATALOG` Nullable(String),
-    `COLLATION_SCHEMA` Nullable(String),
-    `COLLATION_NAME` Nullable(String),
-    `DOMAIN_CATALOG` Nullable(String),
-    `DOMAIN_SCHEMA` Nullable(String),
-    `DOMAIN_NAME` Nullable(String),
-    `COLUMN_COMMENT` String,
-    `COLUMN_TYPE` String
-) AS
-SELECT
-    database AS table_catalog,
-    database AS table_schema,
-    table AS table_name,
-    name AS column_name,
-    position AS ordinal_position,
-    default_expression AS column_default,
-    type LIKE 'Nullable(%)' AS is_nullable,
-    type AS data_type,
-    character_octet_length AS character_maximum_length,
-    character_octet_length,
-    numeric_precision,
-    numeric_precision_radix,
-    numeric_scale,
-    datetime_precision,
-    NULL AS character_set_catalog,
-    NULL AS character_set_schema,
-    NULL AS character_set_name,
-    NULL AS collation_catalog,
-    NULL AS collation_schema,
-    NULL AS collation_name,
-    NULL AS domain_catalog,
-    NULL AS domain_schema,
-    NULL AS domain_name,
-    comment AS column_comment,
-    type AS column_type,
-    table_catalog AS TABLE_CATALOG,
-    table_schema AS TABLE_SCHEMA,
-    table_name AS TABLE_NAME,
-    column_name AS COLUMN_NAME,
-    ordinal_position AS ORDINAL_POSITION,
-    column_default AS COLUMN_DEFAULT,
-    is_nullable AS IS_NULLABLE,
-    data_type AS DATA_TYPE,
-    character_maximum_length AS CHARACTER_MAXIMUM_LENGTH,
-    character_octet_length AS CHARACTER_OCTET_LENGTH,
-    numeric_precision AS NUMERIC_PRECISION,
-    numeric_precision_radix AS NUMERIC_PRECISION_RADIX,
-    numeric_scale AS NUMERIC_SCALE,
-    datetime_precision AS DATETIME_PRECISION,
-    character_set_catalog AS CHARACTER_SET_CATALOG,
-    character_set_schema AS CHARACTER_SET_SCHEMA,
-    character_set_name AS CHARACTER_SET_NAME,
-    collation_catalog AS COLLATION_CATALOG,
-    collation_schema AS COLLATION_SCHEMA,
-    collation_name AS COLLATION_NAME,
-    domain_catalog AS DOMAIN_CATALOG,
-    domain_schema AS DOMAIN_SCHEMA,
-    domain_name AS DOMAIN_NAME,
-    column_comment AS COLUMN_COMMENT,
-    column_type AS COLUMN_TYPE
-FROM system.columns
diff --git a/src/Storages/System/InformationSchema/key_column_usage.sql b/src/Storages/System/InformationSchema/key_column_usage.sql
deleted file mode 100644
index 32152886706..00000000000
--- a/src/Storages/System/InformationSchema/key_column_usage.sql
+++ /dev/null
@@ -1,54 +0,0 @@
-ATTACH VIEW key_column_usage
-    (
-     `constraint_catalog` String,
-     `constraint_schema` String,
-     `constraint_name` Nullable(String),
-     `table_catalog` String,
-     `table_schema` String,
-     `table_name` String,
-     `column_name` Nullable(String),
-     `ordinal_position` UInt32,
-     `position_in_unique_constraint` Nullable(UInt32),
-     `referenced_table_schema` Nullable(String),
-     `referenced_table_name` Nullable(String),
-     `referenced_column_name` Nullable(String),
-     `CONSTRAINT_CATALOG` Nullable(String), 
-     `CONSTRAINT_SCHEMA` Nullable(String),
-     `CONSTRAINT_NAME` Nullable(String),
-     `TABLE_CATALOG` String,
-     `TABLE_SCHEMA` String,
-     `TABLE_NAME` String,
-     `COLUMN_NAME` Nullable(String),
-     `ORDINAL_POSITION` UInt32,
-     `POSITION_IN_UNIQUE_CONSTRAINT` Nullable(UInt32),
-     `REFERENCED_TABLE_SCHEMA` Nullable(String),
-     `REFERENCED_TABLE_NAME` Nullable(String),
-     `REFERENCED_COLUMN_NAME` Nullable(String)
-) AS
-SELECT 
-    'def'                         AS constraint_catalog,
-    database                      AS constraint_schema,
-    'PRIMARY'                     AS constraint_name,
-    'def'                         AS table_catalog,
-    database                      AS table_schema,
-    table                         AS table_name,
-    name                          AS column_name,
-    1                             AS ordinal_position,
-    NULL                          AS position_in_unique_constraint,
-    NULL                          AS referenced_table_schema,
-    NULL                          AS referenced_table_name,
-    NULL                          AS referenced_column_name,
-    constraint_catalog            AS CONSTRAINT_CATALOG,
-    constraint_schema             AS CONSTRAINT_SCHEMA,
-    constraint_name               AS CONSTRAINT_NAME,
-    table_catalog                 AS TABLE_CATALOG,
-    table_schema                  AS TABLE_SCHEMA,
-    table_name                    AS TABLE_NAME,
-    column_name                   AS COLUMN_NAME,
-    ordinal_position              AS ORDINAL_POSITION,
-    position_in_unique_constraint AS POSITION_IN_UNIQUE_CONSTRAINT,
-    referenced_table_schema       AS REFERENCED_TABLE_SCHEMA,
-    referenced_table_name         AS REFERENCED_TABLE_NAME,
-    referenced_column_name        AS REFERENCED_COLUMN_NAME
-FROM system.columns
-WHERE is_in_primary_key;
diff --git a/src/Storages/System/InformationSchema/referential_constraints.sql b/src/Storages/System/InformationSchema/referential_constraints.sql
deleted file mode 100644
index 7683865a3c9..00000000000
--- a/src/Storages/System/InformationSchema/referential_constraints.sql
+++ /dev/null
@@ -1,25 +0,0 @@
-CREATE TABLE referential_constraints
-    (
-     `constraint_catalog` String,
-     `constraint_schema` String,
-     `constraint_name` Nullable(String),
-     `unique_constraint_catalog` String,
-     `unique_constraint_schema` String,
-     `unique_constraint_name` Nullable(String),
-     `match_option` String,
-     `update_rule` String,
-     `delete_rule` String,
-     `table_name` String,
-     `referenced_table_name` String,
-     `CONSTRAINT_CATALOG` String,
-     `CONSTRAINT_SCHEMA` String,
-     `CONSTRAINT_NAME` Nullable(String),
-     `UNIQUE_CONSTRAINT_CATALOG` String,
-     `UNIQUE_CONSTRAINT_SCHEMA` String,
-     `UNIQUE_CONSTRAINT_NAME` Nullable(String),
-     `MATCH_OPTION` String,
-     `UPDATE_RULE` String,
-     `DELETE_RULE` String,
-     `TABLE_NAME` String,
-     `REFERENCED_TABLE_NAME` String
-) ENGINE Memory;
diff --git a/src/Storages/System/InformationSchema/schemata.sql b/src/Storages/System/InformationSchema/schemata.sql
deleted file mode 100644
index 887a27537e9..00000000000
--- a/src/Storages/System/InformationSchema/schemata.sql
+++ /dev/null
@@ -1,33 +0,0 @@
-ATTACH VIEW schemata
-(
-    `catalog_name` String,
-    `schema_name` String,
-    `schema_owner` String,
-    `default_character_set_catalog` Nullable(String),
-    `default_character_set_schema` Nullable(String),
-    `default_character_set_name` Nullable(String),
-    `sql_path` Nullable(String),
-    `CATALOG_NAME` String,
-    `SCHEMA_NAME` String,
-    `SCHEMA_OWNER` String,
-    `DEFAULT_CHARACTER_SET_CATALOG` Nullable(String),
-    `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String),
-    `DEFAULT_CHARACTER_SET_NAME` Nullable(String),
-    `SQL_PATH` Nullable(String)
-) AS
-SELECT
-    name                          AS catalog_name,
-    name                          AS schema_name,
-    'default'                     AS schema_owner,
-    NULL                          AS default_character_set_catalog,
-    NULL                          AS default_character_set_schema,
-    NULL                          AS default_character_set_name,
-    NULL                          AS sql_path,
-    catalog_name                  AS CATALOG_NAME,
-    schema_name                   AS SCHEMA_NAME,
-    schema_owner                  AS SCHEMA_OWNER,
-    default_character_set_catalog AS DEFAULT_CHARACTER_SET_CATALOG,
-    default_character_set_schema  AS DEFAULT_CHARACTER_SET_SCHEMA,
-    default_character_set_name    AS DEFAULT_CHARACTER_SET_NAME,
-    sql_path                      AS SQL_PATH
-FROM system.databases
diff --git a/src/Storages/System/InformationSchema/tables.sql b/src/Storages/System/InformationSchema/tables.sql
deleted file mode 100644
index becd609f94c..00000000000
--- a/src/Storages/System/InformationSchema/tables.sql
+++ /dev/null
@@ -1,34 +0,0 @@
-ATTACH VIEW tables
-(
-    `table_catalog` String,
-    `table_schema` String,
-    `table_name` String,
-    `table_type` String,
-    `table_collation` Nullable(String),
-    `table_comment` Nullable(String),
-    `TABLE_CATALOG` String,
-    `TABLE_SCHEMA` String,
-    `TABLE_NAME` String,
-    `TABLE_TYPE` String,
-    `TABLE_COLLATION` Nullable(String),
-    `TABLE_COMMENT` Nullable(String)
-) AS
-SELECT
-    database             AS table_catalog,
-    database             AS table_schema,
-    name                 AS table_name,
-    multiIf(is_temporary,          'LOCAL TEMPORARY', 
-            engine LIKE '%View',   'VIEW', 
-            engine LIKE 'System%', 'SYSTEM VIEW', 
-            has_own_data = 0,      'FOREIGN TABLE', 
-            'BASE TABLE'
-            )            AS table_type,
-    'utf8mb4_0900_ai_ci' AS table_collation,
-    comment              AS table_comment,
-    table_catalog        AS TABLE_CATALOG,
-    table_schema         AS TABLE_SCHEMA,
-    table_name           AS TABLE_NAME,
-    table_type           AS TABLE_TYPE,
-    table_collation      AS TABLE_COLLATION,
-    table_comment        AS TABLE_COMMENT
-FROM system.tables
diff --git a/src/Storages/System/InformationSchema/views.sql b/src/Storages/System/InformationSchema/views.sql
deleted file mode 100644
index 7c6f65e120b..00000000000
--- a/src/Storages/System/InformationSchema/views.sql
+++ /dev/null
@@ -1,46 +0,0 @@
-ATTACH VIEW views
-(
-    `table_catalog` String,
-    `table_schema` String,
-    `table_name` String,
-    `view_definition` String,
-    `check_option` String,
-    `is_updatable` Enum8('NO' = 0, 'YES' = 1),
-    `is_insertable_into` Enum8('NO' = 0, 'YES' = 1),
-    `is_trigger_updatable` Enum8('NO' = 0, 'YES' = 1),
-    `is_trigger_deletable` Enum8('NO' = 0, 'YES' = 1),
-    `is_trigger_insertable_into` Enum8('NO' = 0, 'YES' = 1),
-    `TABLE_CATALOG` String,
-    `TABLE_SCHEMA` String,
-    `TABLE_NAME` String,
-    `VIEW_DEFINITION` String,
-    `CHECK_OPTION` String,
-    `IS_UPDATABLE` Enum8('NO' = 0, 'YES' = 1),
-    `IS_INSERTABLE_INTO` Enum8('NO' = 0, 'YES' = 1),
-    `IS_TRIGGER_UPDATABLE` Enum8('NO' = 0, 'YES' = 1),
-    `IS_TRIGGER_DELETABLE` Enum8('NO' = 0, 'YES' = 1),
-    `IS_TRIGGER_INSERTABLE_INTO` Enum8('NO' = 0, 'YES' = 1)
-) AS
-SELECT
-    database AS table_catalog,
-    database AS table_schema,
-    name AS table_name,
-    as_select AS view_definition,
-    'NONE' AS check_option,
-    0 AS is_updatable,
-    engine = 'MaterializedView' AS is_insertable_into,
-    0 AS is_trigger_updatable,
-    0 AS is_trigger_deletable,
-    0 AS is_trigger_insertable_into,
-    table_catalog AS TABLE_CATALOG,
-    table_schema AS TABLE_SCHEMA,
-    table_name AS TABLE_NAME,
-    view_definition AS VIEW_DEFINITION,
-    check_option AS CHECK_OPTION,
-    is_updatable AS IS_UPDATABLE,
-    is_insertable_into AS IS_INSERTABLE_INTO,
-    is_trigger_updatable AS IS_TRIGGER_UPDATABLE,
-    is_trigger_deletable AS IS_TRIGGER_DELETABLE,
-    is_trigger_insertable_into AS IS_TRIGGER_INSERTABLE_INTO
-FROM system.tables
-WHERE engine LIKE '%View'
diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp
index d4775bf0d4a..48cf8c0a69f 100644
--- a/src/Storages/System/attachInformationSchemaTables.cpp
+++ b/src/Storages/System/attachInformationSchemaTables.cpp
@@ -3,21 +3,360 @@
 #include <Storages/System/attachSystemTablesImpl.h>
 #include <Parsers/ParserCreateQuery.h>
 #include <Parsers/parseQuery.h>
-#include <incbin.h>
 
-#include "config.h"
-
-/// Embedded SQL definitions
-INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/schemata.sql");
-INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql");
-INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql");
-INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql");
-INCBIN(resource_key_column_usage_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/key_column_usage.sql");
-INCBIN(resource_referential_constraints_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/referential_constraints.sql");
 
 namespace DB
 {
 
+/// Below are SQL definitions for views in "information_schema". Perhaps it would be more aesthetic to have them in .sql files
+/// and embed them here instead. In fact, it has been that way using INCBIN macros until #54773. The problem was that when
+/// existing .sql files were changed, the build system did not recognize that this source (.cpp) file changed and instead used
+/// cached object files from previous builds.
+///
+/// INCBIN is one of many libraries to embed external data. We might wait a little bit longer and try #embed (*) which should
+/// solve the problem once and for all after 40 years.
+///
+/// (*) https://thephd.dev/finally-embed-in-c23
+
+static constexpr std::string_view schemata = R"(
+    ATTACH VIEW schemata
+    (
+        `catalog_name` String,
+        `schema_name` String,
+        `schema_owner` String,
+        `default_character_set_catalog` Nullable(String),
+        `default_character_set_schema` Nullable(String),
+        `default_character_set_name` Nullable(String),
+        `sql_path` Nullable(String),
+        `CATALOG_NAME` String,
+        `SCHEMA_NAME` String,
+        `SCHEMA_OWNER` String,
+        `DEFAULT_CHARACTER_SET_CATALOG` Nullable(String),
+        `DEFAULT_CHARACTER_SET_SCHEMA` Nullable(String),
+        `DEFAULT_CHARACTER_SET_NAME` Nullable(String),
+        `SQL_PATH` Nullable(String)
+    ) AS
+    SELECT
+        name                          AS catalog_name,
+        name                          AS schema_name,
+        'default'                     AS schema_owner,
+        NULL                          AS default_character_set_catalog,
+        NULL                          AS default_character_set_schema,
+        NULL                          AS default_character_set_name,
+        NULL                          AS sql_path,
+        catalog_name                  AS CATALOG_NAME,
+        schema_name                   AS SCHEMA_NAME,
+        schema_owner                  AS SCHEMA_OWNER,
+        default_character_set_catalog AS DEFAULT_CHARACTER_SET_CATALOG,
+        default_character_set_schema  AS DEFAULT_CHARACTER_SET_SCHEMA,
+        default_character_set_name    AS DEFAULT_CHARACTER_SET_NAME,
+        sql_path                      AS SQL_PATH
+    FROM system.databases
+)";
+
+static constexpr std::string_view tables = R"(
+    ATTACH VIEW tables
+    (
+        `table_catalog` String,
+        `table_schema` String,
+        `table_name` String,
+        `table_type` String,
+        `table_collation` Nullable(String),
+        `table_comment` Nullable(String),
+        `TABLE_CATALOG` String,
+        `TABLE_SCHEMA` String,
+        `TABLE_NAME` String,
+        `TABLE_TYPE` String,
+        `TABLE_COLLATION` Nullable(String),
+        `TABLE_COMMENT` Nullable(String)
+    ) AS
+    SELECT
+        database             AS table_catalog,
+        database             AS table_schema,
+        name                 AS table_name,
+        multiIf(is_temporary,          'LOCAL TEMPORARY',
+                engine LIKE '%View',   'VIEW',
+                engine LIKE 'System%', 'SYSTEM VIEW',
+                has_own_data = 0,      'FOREIGN TABLE',
+                'BASE TABLE'
+                )            AS table_type,
+        'utf8mb4_0900_ai_ci' AS table_collation,
+        comment              AS table_comment,
+        table_catalog        AS TABLE_CATALOG,
+        table_schema         AS TABLE_SCHEMA,
+        table_name           AS TABLE_NAME,
+        table_type           AS TABLE_TYPE,
+        table_collation      AS TABLE_COLLATION,
+        table_comment        AS TABLE_COMMENT
+    FROM system.tables
+)";
+
+static constexpr std::string_view views = R"(
+    ATTACH VIEW views
+    (
+        `table_catalog` String,
+        `table_schema` String,
+        `table_name` String,
+        `view_definition` String,
+        `check_option` String,
+        `is_updatable` Enum8('NO' = 0, 'YES' = 1),
+        `is_insertable_into` Enum8('NO' = 0, 'YES' = 1),
+        `is_trigger_updatable` Enum8('NO' = 0, 'YES' = 1),
+        `is_trigger_deletable` Enum8('NO' = 0, 'YES' = 1),
+        `is_trigger_insertable_into` Enum8('NO' = 0, 'YES' = 1),
+        `TABLE_CATALOG` String,
+        `TABLE_SCHEMA` String,
+        `TABLE_NAME` String,
+        `VIEW_DEFINITION` String,
+        `CHECK_OPTION` String,
+        `IS_UPDATABLE` Enum8('NO' = 0, 'YES' = 1),
+        `IS_INSERTABLE_INTO` Enum8('NO' = 0, 'YES' = 1),
+        `IS_TRIGGER_UPDATABLE` Enum8('NO' = 0, 'YES' = 1),
+        `IS_TRIGGER_DELETABLE` Enum8('NO' = 0, 'YES' = 1),
+        `IS_TRIGGER_INSERTABLE_INTO` Enum8('NO' = 0, 'YES' = 1)
+    ) AS
+    SELECT
+        database AS table_catalog,
+        database AS table_schema,
+        name AS table_name,
+        as_select AS view_definition,
+        'NONE' AS check_option,
+        0 AS is_updatable,
+        engine = 'MaterializedView' AS is_insertable_into,
+        0 AS is_trigger_updatable,
+        0 AS is_trigger_deletable,
+        0 AS is_trigger_insertable_into,
+        table_catalog AS TABLE_CATALOG,
+        table_schema AS TABLE_SCHEMA,
+        table_name AS TABLE_NAME,
+        view_definition AS VIEW_DEFINITION,
+        check_option AS CHECK_OPTION,
+        is_updatable AS IS_UPDATABLE,
+        is_insertable_into AS IS_INSERTABLE_INTO,
+        is_trigger_updatable AS IS_TRIGGER_UPDATABLE,
+        is_trigger_deletable AS IS_TRIGGER_DELETABLE,
+        is_trigger_insertable_into AS IS_TRIGGER_INSERTABLE_INTO
+    FROM system.tables
+    WHERE engine LIKE '%View'
+)";
+
+static constexpr std::string_view columns = R"(
+    ATTACH VIEW columns
+    (
+        `table_catalog` String,
+        `table_schema` String,
+        `table_name` String,
+        `column_name` String,
+        `ordinal_position` UInt64,
+        `column_default` String,
+        `is_nullable` String,
+        `data_type` String,
+        `character_maximum_length` Nullable(UInt64),
+        `character_octet_length` Nullable(UInt64),
+        `numeric_precision` Nullable(UInt64),
+        `numeric_precision_radix` Nullable(UInt64),
+        `numeric_scale` Nullable(UInt64),
+        `datetime_precision` Nullable(UInt64),
+        `character_set_catalog` Nullable(String),
+        `character_set_schema` Nullable(String),
+        `character_set_name` Nullable(String),
+        `collation_catalog` Nullable(String),
+        `collation_schema` Nullable(String),
+        `collation_name` Nullable(String),
+        `domain_catalog` Nullable(String),
+        `domain_schema` Nullable(String),
+        `domain_name` Nullable(String),
+        `column_comment` String,
+        `column_type` String,
+        `TABLE_CATALOG` String,
+        `TABLE_SCHEMA` String,
+        `TABLE_NAME` String,
+        `COLUMN_NAME` String,
+        `ORDINAL_POSITION` UInt64,
+        `COLUMN_DEFAULT` String,
+        `IS_NULLABLE` String,
+        `DATA_TYPE` String,
+        `CHARACTER_MAXIMUM_LENGTH` Nullable(UInt64),
+        `CHARACTER_OCTET_LENGTH` Nullable(UInt64),
+        `NUMERIC_PRECISION` Nullable(UInt64),
+        `NUMERIC_PRECISION_RADIX` Nullable(UInt64),
+        `NUMERIC_SCALE` Nullable(UInt64),
+        `DATETIME_PRECISION` Nullable(UInt64),
+        `CHARACTER_SET_CATALOG` Nullable(String),
+        `CHARACTER_SET_SCHEMA` Nullable(String),
+        `CHARACTER_SET_NAME` Nullable(String),
+        `COLLATION_CATALOG` Nullable(String),
+        `COLLATION_SCHEMA` Nullable(String),
+        `COLLATION_NAME` Nullable(String),
+        `DOMAIN_CATALOG` Nullable(String),
+        `DOMAIN_SCHEMA` Nullable(String),
+        `DOMAIN_NAME` Nullable(String),
+        `COLUMN_COMMENT` String,
+        `COLUMN_TYPE` String
+    ) AS
+    SELECT
+        database AS table_catalog,
+        database AS table_schema,
+        table AS table_name,
+        name AS column_name,
+        position AS ordinal_position,
+        default_expression AS column_default,
+        type LIKE 'Nullable(%)' AS is_nullable,
+        type AS data_type,
+        character_octet_length AS character_maximum_length,
+        character_octet_length,
+        numeric_precision,
+        numeric_precision_radix,
+        numeric_scale,
+        datetime_precision,
+        NULL AS character_set_catalog,
+        NULL AS character_set_schema,
+        NULL AS character_set_name,
+        NULL AS collation_catalog,
+        NULL AS collation_schema,
+        NULL AS collation_name,
+        NULL AS domain_catalog,
+        NULL AS domain_schema,
+        NULL AS domain_name,
+        comment AS column_comment,
+        type AS column_type,
+        table_catalog AS TABLE_CATALOG,
+        table_schema AS TABLE_SCHEMA,
+        table_name AS TABLE_NAME,
+        column_name AS COLUMN_NAME,
+        ordinal_position AS ORDINAL_POSITION,
+        column_default AS COLUMN_DEFAULT,
+        is_nullable AS IS_NULLABLE,
+        data_type AS DATA_TYPE,
+        character_maximum_length AS CHARACTER_MAXIMUM_LENGTH,
+        character_octet_length AS CHARACTER_OCTET_LENGTH,
+        numeric_precision AS NUMERIC_PRECISION,
+        numeric_precision_radix AS NUMERIC_PRECISION_RADIX,
+        numeric_scale AS NUMERIC_SCALE,
+        datetime_precision AS DATETIME_PRECISION,
+        character_set_catalog AS CHARACTER_SET_CATALOG,
+        character_set_schema AS CHARACTER_SET_SCHEMA,
+        character_set_name AS CHARACTER_SET_NAME,
+        collation_catalog AS COLLATION_CATALOG,
+        collation_schema AS COLLATION_SCHEMA,
+        collation_name AS COLLATION_NAME,
+        domain_catalog AS DOMAIN_CATALOG,
+        domain_schema AS DOMAIN_SCHEMA,
+        domain_name AS DOMAIN_NAME,
+        column_comment AS COLUMN_COMMENT,
+        column_type AS COLUMN_TYPE
+    FROM system.columns
+)";
+
+static constexpr std::string_view key_column_usage = R"(
+    ATTACH VIEW key_column_usage
+        (
+         `constraint_catalog` String,
+         `constraint_schema` String,
+         `constraint_name` Nullable(String),
+         `table_catalog` String,
+         `table_schema` String,
+         `table_name` String,
+         `column_name` Nullable(String),
+         `ordinal_position` UInt32,
+         `position_in_unique_constraint` Nullable(UInt32),
+         `referenced_table_schema` Nullable(String),
+         `referenced_table_name` Nullable(String),
+         `referenced_column_name` Nullable(String),
+         `CONSTRAINT_CATALOG` Nullable(String),
+         `CONSTRAINT_SCHEMA` Nullable(String),
+         `CONSTRAINT_NAME` Nullable(String),
+         `TABLE_CATALOG` String,
+         `TABLE_SCHEMA` String,
+         `TABLE_NAME` String,
+         `COLUMN_NAME` Nullable(String),
+         `ORDINAL_POSITION` UInt32,
+         `POSITION_IN_UNIQUE_CONSTRAINT` Nullable(UInt32),
+         `REFERENCED_TABLE_SCHEMA` Nullable(String),
+         `REFERENCED_TABLE_NAME` Nullable(String),
+         `REFERENCED_COLUMN_NAME` Nullable(String)
+    ) AS
+    SELECT
+        'def'                         AS constraint_catalog,
+        database                      AS constraint_schema,
+        'PRIMARY'                     AS constraint_name,
+        'def'                         AS table_catalog,
+        database                      AS table_schema,
+        table                         AS table_name,
+        name                          AS column_name,
+        1                             AS ordinal_position,
+        NULL                          AS position_in_unique_constraint,
+        NULL                          AS referenced_table_schema,
+        NULL                          AS referenced_table_name,
+        NULL                          AS referenced_column_name,
+        constraint_catalog            AS CONSTRAINT_CATALOG,
+        constraint_schema             AS CONSTRAINT_SCHEMA,
+        constraint_name               AS CONSTRAINT_NAME,
+        table_catalog                 AS TABLE_CATALOG,
+        table_schema                  AS TABLE_SCHEMA,
+        table_name                    AS TABLE_NAME,
+        column_name                   AS COLUMN_NAME,
+        ordinal_position              AS ORDINAL_POSITION,
+        position_in_unique_constraint AS POSITION_IN_UNIQUE_CONSTRAINT,
+        referenced_table_schema       AS REFERENCED_TABLE_SCHEMA,
+        referenced_table_name         AS REFERENCED_TABLE_NAME,
+        referenced_column_name        AS REFERENCED_COLUMN_NAME
+    FROM system.columns
+    WHERE is_in_primary_key;
+)";
+
+static constexpr std::string_view referential_constraints = R"(
+    ATTACH VIEW referential_constraints
+        (
+         `constraint_catalog` String,
+         `constraint_schema` String,
+         `constraint_name` Nullable(String),
+         `unique_constraint_catalog` String,
+         `unique_constraint_schema` String,
+         `unique_constraint_name` Nullable(String),
+         `match_option` String,
+         `update_rule` String,
+         `delete_rule` String,
+         `table_name` String,
+         `referenced_table_name` String,
+         `CONSTRAINT_CATALOG` String,
+         `CONSTRAINT_SCHEMA` String,
+         `CONSTRAINT_NAME` Nullable(String),
+         `UNIQUE_CONSTRAINT_CATALOG` String,
+         `UNIQUE_CONSTRAINT_SCHEMA` String,
+         `UNIQUE_CONSTRAINT_NAME` Nullable(String),
+         `MATCH_OPTION` String,
+         `UPDATE_RULE` String,
+         `DELETE_RULE` String,
+         `TABLE_NAME` String,
+         `REFERENCED_TABLE_NAME` String
+    ) AS
+    SELECT
+        ''                        AS constraint_catalog,
+        NULL                      AS constraint_name,
+        ''                        AS constraint_schema,
+        ''                        AS unique_constraint_catalog,
+        NULL                      AS unique_constraint_name,
+        ''                        AS unique_constraint_schema,
+        ''                        AS match_option,
+        ''                        AS update_rule,
+        ''                        AS delete_rule,
+        ''                        AS table_name,
+        ''                        AS referenced_table_name,
+        constraint_catalog        AS CONSTRAINT_CATALOG,
+        constraint_name           AS CONSTRAINT_NAME,
+        constraint_schema         AS CONSTRAINT_SCHEMA,
+        unique_constraint_catalog AS UNIQUE_CONSTRAINT_CATALOG,
+        unique_constraint_name    AS UNIQUE_CONSTRAINT_NAME,
+        unique_constraint_schema  AS UNIQUE_CONSTRAINT_SCHEMA,
+        match_option              AS MATCH_OPTION,
+        update_rule               AS UPDATE_RULE,
+        delete_rule               AS DELETE_RULE,
+        table_name                AS TABLE_NAME,
+        referenced_table_name     AS REFERENCED_TABLE_NAME
+    WHERE false; -- make sure this view is always empty
+)";
+
 /// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
 
 static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query)
@@ -63,12 +402,12 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
 
 void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database)
 {
-    createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast<const char *>(gresource_schemata_sqlData), gresource_schemata_sqlSize));
-    createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
-    createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
-    createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
-    createInformationSchemaView(context, information_schema_database, "key_column_usage", std::string_view(reinterpret_cast<const char *>(gresource_key_column_usage_sqlData), gresource_key_column_usage_sqlSize));
-    createInformationSchemaView(context, information_schema_database, "referential_constraints", std::string_view(reinterpret_cast<const char *>(gresource_referential_constraints_sqlData), gresource_referential_constraints_sqlSize));
+    createInformationSchemaView(context, information_schema_database, "schemata", schemata);
+    createInformationSchemaView(context, information_schema_database, "tables", tables);
+    createInformationSchemaView(context, information_schema_database, "views", views);
+    createInformationSchemaView(context, information_schema_database, "columns", columns);
+    createInformationSchemaView(context, information_schema_database, "key_column_usage", key_column_usage);
+    createInformationSchemaView(context, information_schema_database, "referential_constraints", referential_constraints);
 }
 
 }

From 8339c33273dec9c4b1e3dea5deefb00b853ba13c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 21 Sep 2023 18:39:02 +0000
Subject: [PATCH 219/243] Try to fix clang-tidy issue with
 -Wunused-command-line-argument (didn't reproduce locally)

---
 .clang-tidy | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.clang-tidy b/.clang-tidy
index cbeac377079..afc2ca04e2e 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -168,6 +168,7 @@ ExtraArgs:
 # This is technically a compiler error, not a clang-tidy error. We could litter the code base with more pragmas that suppress
 # this error but it is better to pass the following flag to the compiler:
 - '-Wno-unknown-pragmas'
+- '-Wno-unused-command-line-argument' # similar issue
 
 CheckOptions:
   readability-identifier-naming.ClassCase: CamelCase

From 01b8825733c0bb8fcde6bd32341c6d7f0775cefb Mon Sep 17 00:00:00 2001
From: slvrtrn <hypnoash@gmail.com>
Date: Thu, 21 Sep 2023 20:55:08 +0200
Subject: [PATCH 220/243] Fix 01161_information_schema test

---
 tests/queries/0_stateless/01161_information_schema.reference | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/01161_information_schema.reference b/tests/queries/0_stateless/01161_information_schema.reference
index 8139d327e31..7531af6a365 100644
--- a/tests/queries/0_stateless/01161_information_schema.reference
+++ b/tests/queries/0_stateless/01161_information_schema.reference
@@ -53,5 +53,3 @@ default	default	v	f	2		0	Float64	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N		F
 def	default	PRIMARY	def	default	kcu	i	1	\N	\N	\N	\N	def	default	PRIMARY	def	default	kcu	i	1	\N	\N	\N	\N
 def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N	def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N
 def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N	def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N
-def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N	def	default	PRIMARY	def	default	kcu2	d	1	\N	\N	\N	\N
-def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N	def	default	PRIMARY	def	default	kcu2	u	1	\N	\N	\N	\N

From 99c1d7660423a67abfa5690651b60f36c5713c95 Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Thu, 21 Sep 2023 15:55:42 -0400
Subject: [PATCH 221/243] Fix division by zero in StorageS3

---
 src/Storages/StorageS3.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index ec9cf84ce4b..42c62a3e8f8 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1085,7 +1085,7 @@ Pipe StorageS3::read(
         && local_context->getSettingsRef().optimize_count_from_files;
 
     const size_t max_threads = local_context->getSettingsRef().max_threads;
-    const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / num_streams);
+    const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul));
     LOG_DEBUG(&Poco::Logger::get("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads);
 
     pipes.reserve(num_streams);

From 2eb63649bdd884293cc604eb3534e6ea1aea1ad4 Mon Sep 17 00:00:00 2001
From: zvonand <azvonov@altinity.com>
Date: Fri, 22 Sep 2023 00:30:46 +0200
Subject: [PATCH 222/243] fix old workflows

---
 src/Storages/HDFS/StorageHDFS.cpp | 42 ++++++++------------------
 src/Storages/StorageFile.cpp      | 49 ++++++++++---------------------
 2 files changed, 29 insertions(+), 62 deletions(-)

diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index 8563a777bb2..ac2635aafaf 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -137,41 +137,25 @@ namespace
         const HDFSFSPtr & fs,
         const String & for_match)
     {
+        /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and ","
+        static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})");
+
+        std::string_view for_match_view(for_match);
+        std::string_view matched;
+        if (RE2::FindAndConsume(&for_match_view, enum_or_range, &matched))
+        {
+            std::string buffer(matched);
+            if (buffer.find(',') != std::string::npos)
+                return expandSelector(path_for_ls, fs, for_match);
+        }
+
         const size_t first_glob_pos = for_match.find_first_of("*?{");
-        const bool has_glob = first_glob_pos != std::string::npos;
 
         const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
         const String suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
         const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
 
-        bool has_generator = false;
-        bool range_generator = false;
-
-        const size_t next_slash_after_glob_pos = [&]()
-        {
-            if (!has_glob)
-                return suffix_with_globs.find('/', 1);
-
-            bool prev_is_dot = false;
-
-            for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
-            {
-                if (*it == '{')
-                    has_generator = true;
-                else if (*it == '/')
-                    return size_t(std::distance(suffix_with_globs.begin(), it));
-                else if (*it == '.')
-                {
-                    if (prev_is_dot)
-                        range_generator = true;
-                    prev_is_dot = true;
-                }
-            }
-            return std::string::npos;
-        }();
-
-        if (has_generator && !range_generator)
-            return expandSelector(path_for_ls, fs, for_match);
+        const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1);
 
         const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
 
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index eb18842fdaa..5b05dfa687f 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -106,7 +106,6 @@ namespace ErrorCodes
 
 namespace
 {
-
 /// Forward-declare to use in expandSelector()
 void listFilesWithRegexpMatchingImpl(
     const std::string & path_for_ls,
@@ -171,43 +170,27 @@ void listFilesWithRegexpMatchingImpl(
     std::vector<std::string> & result,
     bool recursive)
 {
+    /// regexp for {expr1,expr2,expr3} or {M..N}, where M and N - non-negative integers, expr's should be without "{", "}", "*" and ","
+    static const re2::RE2 enum_or_range(R"({([\d]+\.\.[\d]+|[^{}*,]+,[^{}*]*[^{}*,])})");
+
+    std::string_view for_match_view(for_match);
+    std::string_view matched;
+    if (RE2::FindAndConsume(&for_match_view, enum_or_range, &matched))
+    {
+        std::string buffer(matched);
+        if (buffer.find(',') != std::string::npos)
+        {
+            expandSelector(path_for_ls, for_match, total_bytes_to_read, result, recursive);
+            return;
+        }
+    }
+
     const size_t first_glob_pos = for_match.find_first_of("*?{");
-    const bool has_glob = first_glob_pos != std::string::npos;
 
     const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
     const std::string suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
 
-    bool has_generator = false;
-    bool range_generator = false;
-
-    const size_t next_slash_after_glob_pos = [&]()
-    {
-        if (!has_glob)
-            return suffix_with_globs.find('/', 1);
-
-        bool prev_is_dot = false;
-
-        for (std::string::const_iterator it = ++suffix_with_globs.begin(); it != suffix_with_globs.end(); it++)
-        {
-            if (*it == '{')
-                has_generator = true;
-            else if (*it == '/')
-                return size_t(std::distance(suffix_with_globs.begin(), it));
-            else if (*it == '.')
-            {
-                if (prev_is_dot)
-                    range_generator = true;
-                prev_is_dot = true;
-            }
-        }
-        return std::string::npos;
-    }();
-
-    if (has_generator && !range_generator)
-    {
-        expandSelector(path_for_ls, for_match, total_bytes_to_read, result, recursive);
-        return;
-    }
+    const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1);
 
     const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
 

From 31e6893e01d3bbfbe15e0b888a4ba29852fc44a3 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 22 Sep 2023 09:02:09 +0000
Subject: [PATCH 223/243] adjust path/path-ignore in docs and pr workflows

---
 .github/workflows/docs_check.yml   | 4 +---
 .github/workflows/pull_request.yml | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml
index a6be21fb14a..f41bc23bc22 100644
--- a/.github/workflows/docs_check.yml
+++ b/.github/workflows/docs_check.yml
@@ -13,9 +13,7 @@ on:  # yamllint disable-line rule:truthy
     branches:
       - master
     paths:
-      - 'CHANGELOG.md'
-      - 'README.md'
-      - 'SECURITY.md'
+      - '**.md'
       - 'docker/docs/**'
       - 'docs/**'
       - 'utils/check-style/aspell-ignore/**'
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index ea937eb040f..bbc58e17f9b 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -13,9 +13,7 @@ on:  # yamllint disable-line rule:truthy
     branches:
       - master
     paths-ignore:
-      - 'CHANGELOG.md'
-      - 'README.md'
-      - 'SECURITY.md'
+      - '**.md'
       - 'docker/docs/**'
       - 'docs/**'
       - 'utils/check-style/aspell-ignore/**'

From 2d36a02ff102f0d714bd2894aa14ce7bf4d9cf90 Mon Sep 17 00:00:00 2001
From: Max Kainov <max.kainov@clickhouse.com>
Date: Fri, 22 Sep 2023 09:23:05 +0000
Subject: [PATCH 224/243] minor md fixes to test wf

---
 tests/config/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/config/README.md b/tests/config/README.md
index 8dd775a275a..5c65a967183 100644
--- a/tests/config/README.md
+++ b/tests/config/README.md
@@ -1,8 +1,8 @@
 # ClickHouse configs for test environment
 
 ## How to use
-CI use these configs in all checks installing them with `install.sh` script. If you want to run all tests from `tests/queries/0_stateless` and `test/queries/1_stateful` on your local machine you have to set up configs from this directory for your `clickhouse-server`. The most simple way is to install them using `install.sh` script. Other option is just copy files into your clickhouse config directory.
+CI use these configs in all checks installing them with `install.sh` script. If you want to run all tests from `tests/queries/0_stateless` and `test/queries/1_stateful` on your local machine you have to set up configs from this directory for your `clickhouse-server`. The easiest way is to install them using `install.sh` script. Another option is to copy files into your clickhouse config directory.
 
 ## How to add new config
 
-Just place file `.xml` with new config into appropriate directory and add `ln` command into `install.sh` script. After that CI will use this config in all tests runs.
+Place file `.xml` with new config in the appropriate directory and add `ln` command into `install.sh` script. CI will use then this config in all test runs.

From fea886907d1e5a0544a4f560336e9850b02e5bbb Mon Sep 17 00:00:00 2001
From: Victor Krasnov <v.krasnov@arenadata.io>
Date: Fri, 22 Sep 2023 09:54:33 +0000
Subject: [PATCH 225/243] Fix data race during backup_log initialization

---
 src/Backups/BackupsWorker.cpp | 4 ++--
 src/Backups/BackupsWorker.h   | 2 +-
 src/Interpreters/Context.cpp  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp
index e5cd905fbd1..b554078bb22 100644
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@@ -218,20 +218,20 @@ namespace
 }
 
 
-BackupsWorker::BackupsWorker(size_t num_backup_threads, size_t num_restore_threads, bool allow_concurrent_backups_, bool allow_concurrent_restores_)
+BackupsWorker::BackupsWorker(ContextPtr global_context, size_t num_backup_threads, size_t num_restore_threads, bool allow_concurrent_backups_, bool allow_concurrent_restores_)
     : backups_thread_pool(std::make_unique<ThreadPool>(CurrentMetrics::BackupsThreads, CurrentMetrics::BackupsThreadsActive, num_backup_threads, /* max_free_threads = */ 0, num_backup_threads))
     , restores_thread_pool(std::make_unique<ThreadPool>(CurrentMetrics::RestoreThreads, CurrentMetrics::RestoreThreadsActive, num_restore_threads, /* max_free_threads = */ 0, num_restore_threads))
     , log(&Poco::Logger::get("BackupsWorker"))
     , allow_concurrent_backups(allow_concurrent_backups_)
     , allow_concurrent_restores(allow_concurrent_restores_)
 {
+    backup_log = global_context->getBackupLog();
     /// We set max_free_threads = 0 because we don't want to keep any threads if there is no BACKUP or RESTORE query running right now.
 }
 
 
 OperationID BackupsWorker::start(const ASTPtr & backup_or_restore_query, ContextMutablePtr context)
 {
-    backup_log = context->getBackupLog();
     const ASTBackupQuery & backup_query = typeid_cast<const ASTBackupQuery &>(*backup_or_restore_query);
     if (backup_query.kind == ASTBackupQuery::Kind::BACKUP)
         return startMakingBackup(backup_or_restore_query, context);
diff --git a/src/Backups/BackupsWorker.h b/src/Backups/BackupsWorker.h
index e7207cdcbd2..3ed96af0f04 100644
--- a/src/Backups/BackupsWorker.h
+++ b/src/Backups/BackupsWorker.h
@@ -32,7 +32,7 @@ class BackupLog;
 class BackupsWorker
 {
 public:
-    BackupsWorker(size_t num_backup_threads, size_t num_restore_threads, bool allow_concurrent_backups_, bool allow_concurrent_restores_);
+    BackupsWorker(ContextPtr global_context, size_t num_backup_threads, size_t num_restore_threads, bool allow_concurrent_backups_, bool allow_concurrent_restores_);
 
     /// Waits until all tasks have been completed.
     void shutdown();
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index a1bc6cf94ee..1219b29050e 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2224,7 +2224,7 @@ BackupsWorker & Context::getBackupsWorker() const
     UInt64 restore_threads = config.getUInt64("restore_threads", settings_ref.restore_threads);
 
     if (!shared->backups_worker)
-        shared->backups_worker.emplace(backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);
+        shared->backups_worker.emplace(getGlobalContext(), backup_threads, restore_threads, allow_concurrent_backups, allow_concurrent_restores);
 
     return *shared->backups_worker;
 }

From 7b585da613c96c714a7a91e6d6e70d7bbe4bb30e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 22 Sep 2023 10:12:11 +0000
Subject: [PATCH 226/243] Fix clang-tidy

---
 utils/self-extracting-executable/compressor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/self-extracting-executable/compressor.cpp b/utils/self-extracting-executable/compressor.cpp
index 708ea535128..642a4f1ab8e 100644
--- a/utils/self-extracting-executable/compressor.cpp
+++ b/utils/self-extracting-executable/compressor.cpp
@@ -597,7 +597,7 @@ int main(int argc, char* argv[])
     std::cout << "Compression with level: " << level << std::endl;
     if (0 != compressFiles(out_name, exec, &argv[start_of_files], argc - start_of_files, output_fd, level, info_out))
     {
-        printf("Compression failed.\n"); // NOLING(modernize-use-std-print)
+        printf("Compression failed.\n"); // NOLINT(modernize-use-std-print)
         if (0 != close(output_fd))
             perror("close");
         unlink(argv[start_of_files - 1]);

From 0554cdca65de8f7491c477ac87d2dbf14245d250 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 22 Sep 2023 11:02:40 +0000
Subject: [PATCH 227/243] Try to stabilize test

---
 .../queries/0_stateless/01161_information_schema.sql | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/queries/0_stateless/01161_information_schema.sql b/tests/queries/0_stateless/01161_information_schema.sql
index 2ccdddc7a9e..98a1113a786 100644
--- a/tests/queries/0_stateless/01161_information_schema.sql
+++ b/tests/queries/0_stateless/01161_information_schema.sql
@@ -24,13 +24,13 @@ SELECT * FROM information_schema.views WHERE table_schema = currentDatabase();
 SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema = currentDatabase() OR table_schema = '') AND table_name NOT LIKE '%inner%';
 
 -- mixed upper/lowercase schema and table name:
-SELECT count() FROM information_schema.TABLES WHERE table_schema=currentDatabase() AND table_name = 't';
-SELECT count() FROM INFORMATION_SCHEMA.tables WHERE table_schema=currentDatabase() AND table_name = 't';
-SELECT count() FROM INFORMATION_schema.tables WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_DATABASE }
-SELECT count() FROM information_schema.taBLES WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_TABLE }
+SELECT count() FROM information_schema.TABLES WHERE table_schema = currentDatabase() AND table_name = 't';
+SELECT count() FROM INFORMATION_SCHEMA.tables WHERE table_schema = currentDatabase() AND table_name = 't';
+SELECT count() FROM INFORMATION_schema.tables WHERE table_schema = currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_DATABASE }
+SELECT count() FROM information_schema.taBLES WHERE table_schema  =currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_TABLE }
 
-SELECT * FROM information_schema.key_column_usage WHERE table_name = 'kcu';
-SELECT * FROM information_schema.key_column_usage WHERE table_name = 'kcu2';
+SELECT * FROM information_schema.key_column_usage WHERE table_schema = currentDatabase() AND table_name = 'kcu';
+SELECT * FROM information_schema.key_column_usage WHERE table_schema = currentDatabase() AND table_name = 'kcu2';
 
 SELECT * FROM information_schema.referential_constraints;
 

From 7f82765787c876e7d4d07e7ac285657e693dfa17 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 22 Sep 2023 11:52:02 +0000
Subject: [PATCH 228/243] Some fixups

---
 src/DataTypes/IDataType.h                     | 28 +++++++-----------
 src/Functions/DateTimeTransforms.h            | 15 +++++-----
 src/Functions/toDaysSinceYearZero.cpp         | 29 +++++++------------
 .../02874_toDaysSinceYearZero.reference       |  8 ++---
 .../0_stateless/02874_toDaysSinceYearZero.sql |  8 ++---
 5 files changed, 34 insertions(+), 54 deletions(-)

diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index 54cb3d0d5c2..782dce116a4 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -371,9 +371,11 @@ struct WhichDataType
 
     constexpr bool isDate() const { return idx == TypeIndex::Date; }
     constexpr bool isDate32() const { return idx == TypeIndex::Date32; }
+    constexpr bool isDateOrDate32() const { return isDate() || isDate32(); }
     constexpr bool isDateTime() const { return idx == TypeIndex::DateTime; }
     constexpr bool isDateTime64() const { return idx == TypeIndex::DateTime64; }
-    constexpr bool isDateOrDate32() const { return isDate() || isDate32(); }
+    constexpr bool isDateTimeOrDateTime64() const { return isDateTime() || isDateTime64(); }
+    constexpr bool isDateOrDate32OrDateTimeOrDateTime64() const { return isDateOrDate32() || isDateTimeOrDateTime64(); }
 
     constexpr bool isString() const { return idx == TypeIndex::String; }
     constexpr bool isFixedString() const { return idx == TypeIndex::FixedString; }
@@ -410,6 +412,10 @@ template <typename T>
 inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).isDateTime(); }
 template <typename T>
 inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); }
+template <typename T>
+inline bool isDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTimeOrDateTime64(); }
+template <typename T>
+inline bool isDateOrDate32OrDateTimeOrDateTime64(const T & data_type) { return WhichDataType(data_type).isDateOrDate32OrDateTimeOrDateTime64(); }
 
 template <typename T>
 inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); }
@@ -433,26 +439,14 @@ template <typename T>
 inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); }
 
 template <typename T>
-inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject();
-}
+inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); }
 
 template <typename T>
-inline bool isUInt8(const T & data_type)
-{
-    return WhichDataType(data_type).isUInt8();
-}
-
+inline bool isUInt8(const T & data_type) { return WhichDataType(data_type).isUInt8(); }
 template <typename T>
-inline bool isUInt64(const T & data_type)
-{
-    return WhichDataType(data_type).isUInt64();
-}
-
+inline bool isUInt64(const T & data_type) { return WhichDataType(data_type).isUInt64(); }
 template <typename T>
-inline bool isUnsignedInteger(const T & data_type)
-{
-    return WhichDataType(data_type).isUInt();
-}
+inline bool isUnsignedInteger(const T & data_type) { return WhichDataType(data_type).isUInt(); }
 
 template <typename T>
 inline bool isInteger(const T & data_type)
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index d74eefc70d8..ae8598fe9ea 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -928,19 +928,23 @@ struct ToDayOfYearImpl
 struct ToDaysSinceYearZeroImpl
 {
 private:
-    /// Constant is taken from Java LocalDate implementation
-    static constexpr auto DAYS_BETWEEN_YEARS_0_AND_1970 = 719'528; /// 01 January, each
+    static constexpr auto DAYS_BETWEEN_YEARS_0_AND_1970 = 719'528; /// 01 January, each. Constant taken from Java LocalDate. Consistent with MySQL's TO_DAYS().
+    static constexpr auto SECONDS_PER_DAY = 60 * 60 * 24;
 
 public:
     static constexpr auto name = "toDaysSinceYearZero";
 
+    static UInt32 execute(const DecimalUtils::DecimalComponents<DateTime64> & t, const DateLUTImpl & time_zone)
+    {
+        return DAYS_BETWEEN_YEARS_0_AND_1970 + static_cast<UInt32>(time_zone.toDayNum(t.whole));
+    }
     static UInt32 execute(Int64, const DateLUTImpl &)
     {
         throwDateTimeIsNotSupported(name);
     }
     static UInt32 execute(UInt32 d, const DateLUTImpl &)
     {
-        return DAYS_BETWEEN_YEARS_0_AND_1970 + (d / 86400);
+        return DAYS_BETWEEN_YEARS_0_AND_1970 + d / SECONDS_PER_DAY;
     }
     static UInt32 execute(Int32 d, const DateLUTImpl &)
     {
@@ -950,11 +954,6 @@ public:
     {
         return DAYS_BETWEEN_YEARS_0_AND_1970 + d;
     }
-    static UInt32 execute(const DecimalUtils::DecimalComponents<DateTime64> & t, const DateLUTImpl & time_zone)
-    {
-        return DAYS_BETWEEN_YEARS_0_AND_1970 + static_cast<UInt32>(time_zone.toDayNum(t.whole));
-    }
-
     static constexpr bool hasPreimage() { return false; }
 
     using FactorTransform = ZeroTransform;
diff --git a/src/Functions/toDaysSinceYearZero.cpp b/src/Functions/toDaysSinceYearZero.cpp
index abc2b73f31d..6688c5edcc1 100644
--- a/src/Functions/toDaysSinceYearZero.cpp
+++ b/src/Functions/toDaysSinceYearZero.cpp
@@ -25,8 +25,6 @@ namespace
 class FunctionToDaysSinceYearZero : public IFunction
 {
     using ResultType = DataTypeUInt32;
-    using Transformer = TransformDateTime64<ToDaysSinceYearZeroImpl>;
-
 public:
     static constexpr auto name = "toDaysSinceYearZero";
     static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionToDaysSinceYearZero>(context); }
@@ -41,14 +39,12 @@ public:
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
         FunctionArgumentDescriptors mandatory_args{
-            {"date",
-             [](const IDataType & dt) { return isDateOrDate32<IDataType>(dt) || isDateTime<IDataType>(dt) || isDateTime64<IDataType>(dt); },
-             nullptr,
-             "Date, Date32, DateTime or DateTime64"}};
+            {"date", &isDateOrDate32OrDateTimeOrDateTime64<IDataType>, nullptr, "Date or Date32 or DateTime or DateTime64"}
+        };
 
         validateFunctionArgumentTypes(*this, arguments, mandatory_args);
 
-        return std::make_shared<DataTypeUInt32>();
+        return std::make_shared<ResultType>();
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
@@ -57,24 +53,19 @@ public:
         WhichDataType which(from_type);
 
         if (which.isDate())
-            return DateTimeTransformImpl<DataTypeDate, ResultType, ToDaysSinceYearZeroImpl>::execute(
-                arguments, result_type, input_rows_count);
+            return DateTimeTransformImpl<DataTypeDate, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
         else if (which.isDate32())
-            return DateTimeTransformImpl<DataTypeDate32, ResultType, ToDaysSinceYearZeroImpl>::execute(
-                arguments, result_type, input_rows_count);
+            return DateTimeTransformImpl<DataTypeDate32, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
         else if (which.isDateTime())
-            return DateTimeTransformImpl<DataTypeDateTime, ResultType, ToDaysSinceYearZeroImpl>::execute(
-                arguments, result_type, input_rows_count);
+            return DateTimeTransformImpl<DataTypeDateTime, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
         else if (which.isDateTime64())
         {
             const auto scale = static_cast<const DataTypeDateTime64 *>(from_type)->getScale();
-            const Transformer transformer(scale);
-            return DateTimeTransformImpl<DataTypeDateTime64, ResultType, Transformer>::execute(
-                arguments, result_type, input_rows_count, transformer);
+            const TransformDateTime64<ToDaysSinceYearZeroImpl> transformer(scale);
+            return DateTimeTransformImpl<DataTypeDateTime64, ResultType, decltype(transformer)>::execute(arguments, result_type, input_rows_count, transformer);
         }
 
-        throw Exception(
-            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+        throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
             "Illegal type {} of argument of function {}",
             arguments[0].type->getName(),
             this->getName());
@@ -87,7 +78,7 @@ REGISTER_FUNCTION(ToDaysSinceYearZero)
 {
     factory.registerFunction<FunctionToDaysSinceYearZero>(FunctionDocumentation{
         .description = R"(
-Returns for a given date, the number of days passed since 1 January 0000 in the proleptic Gregorian calendar defined by ISO 8601.
+Returns for a given date or date with time, the number of days passed since 1 January 0000 in the proleptic Gregorian calendar defined by ISO 8601.
 The calculation is the same as in MySQL's TO_DAYS() function.
 )",
         .examples{{"typical", "SELECT toDaysSinceYearZero(toDate('2023-09-08'))", "713569"}},
diff --git a/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference b/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference
index 1a5fd5695f1..b74d4a369a1 100644
--- a/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference
+++ b/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference
@@ -4,11 +4,11 @@ Const argument
 739136
 693961
 739136
-739136
-739136
-739136
+719528
 739136
 693961
+739136
+739136
 \N
 Non-const argument
 739136
@@ -16,8 +16,6 @@ Non-const argument
 739136
 739136
 739136
-739136
-693961
 MySQL alias
 739136
 739136
diff --git a/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql b/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql
index 99bc507d311..d10674620ec 100644
--- a/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql
+++ b/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql
@@ -9,11 +9,11 @@ SELECT toDaysSinceYearZero(toDate('1970-01-01'));
 SELECT toDaysSinceYearZero(toDate('2023-09-08'));
 SELECT toDaysSinceYearZero(toDate32('1900-01-01'));
 SELECT toDaysSinceYearZero(toDate32('2023-09-08'));
+SELECT toDaysSinceYearZero(toDateTime('1970-01-01 00:00:00'));
 SELECT toDaysSinceYearZero(toDateTime('2023-09-08 11:11:11'));
-SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123', 3));
-SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123123', 6));
-SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123123123', 9));
 SELECT toDaysSinceYearZero(toDateTime64('1900-01-01 00:00:00.000', 3));
+SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123', 3));
+SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123123123', 9));
 SELECT toDaysSinceYearZero(NULL);
 
 SELECT 'Non-const argument';
@@ -21,9 +21,7 @@ SELECT toDaysSinceYearZero(materialize(toDate('2023-09-08')));
 SELECT toDaysSinceYearZero(materialize(toDate32('2023-09-08')));
 SELECT toDaysSinceYearZero(materialize(toDateTime('2023-09-08 11:11:11')));
 SELECT toDaysSinceYearZero(materialize(toDateTime64('2023-09-08 11:11:11.123', 3)));
-SELECT toDaysSinceYearZero(materialize(toDateTime64('2023-09-08 11:11:11.123123', 6)));
 SELECT toDaysSinceYearZero(materialize(toDateTime64('2023-09-08 11:11:11.123123123', 9)));
-SELECT toDaysSinceYearZero(materialize(toDateTime64('1900-01-01 00:00:00.000', 3)));
 
 SELECT 'MySQL alias';
 SELECT to_days(toDate('2023-09-08'));

From 67130c8981bb3eacf62220f748a1a4b1a9cf48db Mon Sep 17 00:00:00 2001
From: Christian Clauss <cclauss@me.com>
Date: Fri, 22 Sep 2023 14:04:06 +0200
Subject: [PATCH 229/243] Update CHANGELOG.md: Fix typos to test

Tests #54914

% `pipx run codespell`
```
./CHANGELOG.md:31: controled ==> controlled
./CHANGELOG.md:58: functins ==> functions
./CHANGELOG.md:538: Impove ==> Improve
./CHANGELOG.md:583: expection ==> exception, expectation
./CHANGELOG.md:602: auxillary ==> auxiliary
./CHANGELOG.md:658: occassional ==> occasional
./CHANGELOG.md:1362: ouptut ==> output
```
---
 CHANGELOG.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 30ac45ced03..4b0c8e61a0d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,10 +25,10 @@
 * Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#45322](https://github.com/ClickHouse/ClickHouse/pull/45322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Added table function `azureBlobStorageCluster`. The supported set of features is very similar to table function `s3Cluster`. [#50795](https://github.com/ClickHouse/ClickHouse/pull/50795) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
 * Allow using `cluster`, `clusterAllReplicas`, `remote`, and `remoteSecure` without table name in issue [#50808](https://github.com/ClickHouse/ClickHouse/issues/50808). [#50848](https://github.com/ClickHouse/ClickHouse/pull/50848) ([Yangkuan Liu](https://github.com/LiuYangkuan)).
-* A system table to monitor kafka consumers. [#50999](https://github.com/ClickHouse/ClickHouse/pull/50999) ([Ilya Golshtein](https://github.com/ilejn)).
+* A system table to monitor Kafka consumers. [#50999](https://github.com/ClickHouse/ClickHouse/pull/50999) ([Ilya Golshtein](https://github.com/ilejn)).
 * Added `max_sessions_for_user` setting. [#51724](https://github.com/ClickHouse/ClickHouse/pull/51724) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
 * New functions `toUTCTimestamp/fromUTCTimestamp` to act same as spark's `to_utc_timestamp/from_utc_timestamp`. [#52117](https://github.com/ClickHouse/ClickHouse/pull/52117) ([KevinyhZou](https://github.com/KevinyhZou)).
-* Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to input/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/outoput using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to input/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controlled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/output using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)).
 * A new field `query_cache_usage` in `system.query_log` now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)).
 * Add new function `startsWithUTF8` and `endsWithUTF8`. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)).
 * Allow variable number of columns in TSV/CustomSeparated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)).
@@ -55,7 +55,7 @@
 * Parquet filter pushdown. I.e. when reading Parquet files, row groups (chunks of the file) are skipped based on the WHERE condition and the min/max values in each column. In particular, if the file is roughly sorted by some column, queries that filter by a short range of that column will be much faster. [#52951](https://github.com/ClickHouse/ClickHouse/pull/52951) ([Michael Kolupaev](https://github.com/al13n321)).
 * Optimize reading small row groups by batching them together in Parquet. Closes [#53069](https://github.com/ClickHouse/ClickHouse/issues/53069). [#53281](https://github.com/ClickHouse/ClickHouse/pull/53281) ([Kruglov Pavel](https://github.com/Avogar)).
 * Optimize count from files in most input formats. Closes [#44334](https://github.com/ClickHouse/ClickHouse/issues/44334). [#53637](https://github.com/ClickHouse/ClickHouse/pull/53637) ([Kruglov Pavel](https://github.com/Avogar)).
-* Use filter by file/path before reading in `url`/`file`/`hdfs` table functins. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)).
+* Use filter by file/path before reading in `url`/`file`/`hdfs` table functions. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)).
 * Enable JIT compilation for AArch64, PowerPC, SystemZ, RISC-V. [#38217](https://github.com/ClickHouse/ClickHouse/pull/38217) ([Maksim Kita](https://github.com/kitaisreal)).
 * Add setting `rewrite_count_distinct_if_with_count_distinct_implementation` to rewrite `countDistinctIf` with `count_distinct_implementation`. Closes [#30642](https://github.com/ClickHouse/ClickHouse/issues/30642). [#46051](https://github.com/ClickHouse/ClickHouse/pull/46051) ([flynn](https://github.com/ucasfl)).
 * Speed up merging of states of `uniq` and `uniqExact` aggregate functions by parallelizing conversion before merge. [#50748](https://github.com/ClickHouse/ClickHouse/pull/50748) ([Jiebin Sun](https://github.com/jiebinn)).
@@ -535,7 +535,7 @@
 * Add MemoryTracker for the background tasks (merges and mutation). Introduces `merges_mutations_memory_usage_soft_limit` and `merges_mutations_memory_usage_to_ram_ratio` settings that represent the soft memory limit for merges and mutations. If this limit is reached ClickHouse won't schedule new merge or mutation tasks. Also `MergesMutationsMemoryTracking` metric is introduced to allow observing current memory usage of background tasks. Resubmit [#46089](https://github.com/ClickHouse/ClickHouse/issues/46089). Closes [#48774](https://github.com/ClickHouse/ClickHouse/issues/48774). [#48787](https://github.com/ClickHouse/ClickHouse/pull/48787) ([Dmitry Novik](https://github.com/novikd)).
 * Function `dotProduct` work for array. [#49050](https://github.com/ClickHouse/ClickHouse/pull/49050) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
 * Support statement `SHOW INDEX` to improve compatibility with MySQL. [#49158](https://github.com/ClickHouse/ClickHouse/pull/49158) ([Robert Schulze](https://github.com/rschu1ze)).
-* Add virtual column `_file` and `_path` support to table function `url`. - Impove error message for table function `url`. - resolves [#49231](https://github.com/ClickHouse/ClickHouse/issues/49231) - resolves [#49232](https://github.com/ClickHouse/ClickHouse/issues/49232). [#49356](https://github.com/ClickHouse/ClickHouse/pull/49356) ([Ziyi Tan](https://github.com/Ziy1-Tan)).
+* Add virtual column `_file` and `_path` support to table function `url`. - Improve error message for table function `url`. - resolves [#49231](https://github.com/ClickHouse/ClickHouse/issues/49231) - resolves [#49232](https://github.com/ClickHouse/ClickHouse/issues/49232). [#49356](https://github.com/ClickHouse/ClickHouse/pull/49356) ([Ziyi Tan](https://github.com/Ziy1-Tan)).
 * Adding the `grants` field in the users.xml file, which allows specifying grants for users. [#49381](https://github.com/ClickHouse/ClickHouse/pull/49381) ([pufit](https://github.com/pufit)).
 * Support full/right join by using grace hash join algorithm. [#49483](https://github.com/ClickHouse/ClickHouse/pull/49483) ([lgbo](https://github.com/lgbo-ustc)).
 * `WITH FILL` modifier groups filling by sorting prefix. Controlled by `use_with_fill_by_sorting_prefix` setting (enabled by default). Related to [#33203](https://github.com/ClickHouse/ClickHouse/issues/33203)#issuecomment-1418736794. [#49503](https://github.com/ClickHouse/ClickHouse/pull/49503) ([Igor Nikonov](https://github.com/devcrafter)).
@@ -580,7 +580,7 @@
 * `DEFLATE_QPL` codec lower the minimum simd version to SSE 4.2. [doc change in qpl](https://github.com/intel/qpl/commit/3f8f5cea27739f5261e8fd577dc233ffe88bf679) - Intel® QPL relies on a run-time kernels dispatcher and cpuid check to choose the best available implementation(sse/avx2/avx512) - restructured cmakefile for qpl build in clickhouse to align with latest upstream qpl. [#49811](https://github.com/ClickHouse/ClickHouse/pull/49811) ([jasperzhu](https://github.com/jinjunzh)).
 * Add initial support to do JOINs with pure parallel replicas. [#49544](https://github.com/ClickHouse/ClickHouse/pull/49544) ([Raúl Marín](https://github.com/Algunenano)).
 * More parallelism on `Outdated` parts removal with "zero-copy replication". [#49630](https://github.com/ClickHouse/ClickHouse/pull/49630) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Parallel Replicas: 1) Fixed an error `NOT_FOUND_COLUMN_IN_BLOCK` in case of using parallel replicas with non-replicated storage with disabled setting `parallel_replicas_for_non_replicated_merge_tree` 2) Now `allow_experimental_parallel_reading_from_replicas` have 3 possible values - 0, 1 and 2. 0 - disabled, 1 - enabled, silently disable them in case of failure (in case of FINAL or JOIN), 2 - enabled, throw an expection in case of failure. 3) If FINAL modifier is used in SELECT query and parallel replicas are enabled, ClickHouse will try to disable them if `allow_experimental_parallel_reading_from_replicas` is set to 1 and throw an exception otherwise. [#50195](https://github.com/ClickHouse/ClickHouse/pull/50195) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Parallel Replicas: 1) Fixed an error `NOT_FOUND_COLUMN_IN_BLOCK` in case of using parallel replicas with non-replicated storage with disabled setting `parallel_replicas_for_non_replicated_merge_tree` 2) Now `allow_experimental_parallel_reading_from_replicas` have 3 possible values - 0, 1 and 2. 0 - disabled, 1 - enabled, silently disable them in case of failure (in case of FINAL or JOIN), 2 - enabled, throw an exception in case of failure. 3) If FINAL modifier is used in SELECT query and parallel replicas are enabled, ClickHouse will try to disable them if `allow_experimental_parallel_reading_from_replicas` is set to 1 and throw an exception otherwise. [#50195](https://github.com/ClickHouse/ClickHouse/pull/50195) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * When parallel replicas are enabled they will always skip unavailable servers (the behavior is controlled by the setting `skip_unavailable_shards`, enabled by default and can be only disabled). This closes: [#48565](https://github.com/ClickHouse/ClickHouse/issues/48565). [#50293](https://github.com/ClickHouse/ClickHouse/pull/50293) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 
 #### Improvement
@@ -599,7 +599,7 @@
 * Add a new column `zookeeper_name` in system.replicas, to indicate on which (auxiliary) zookeeper cluster the replicated table's metadata is stored. [#48549](https://github.com/ClickHouse/ClickHouse/pull/48549) ([cangyin](https://github.com/cangyin)).
 * `IN` operator support the comparison of `Date` and `Date32`. Closes [#48736](https://github.com/ClickHouse/ClickHouse/issues/48736). [#48806](https://github.com/ClickHouse/ClickHouse/pull/48806) ([flynn](https://github.com/ucasfl)).
 * Support for erasure codes in `HDFS`, author: @M1eyu2018, @tomscut. [#48833](https://github.com/ClickHouse/ClickHouse/pull/48833) ([M1eyu](https://github.com/M1eyu2018)).
-* Implement SYSTEM DROP REPLICA from auxillary ZooKeeper clusters, may be close [#48931](https://github.com/ClickHouse/ClickHouse/issues/48931). [#48932](https://github.com/ClickHouse/ClickHouse/pull/48932) ([wangxiaobo](https://github.com/wzb5212)).
+* Implement SYSTEM DROP REPLICA from auxiliary ZooKeeper clusters, may be close [#48931](https://github.com/ClickHouse/ClickHouse/issues/48931). [#48932](https://github.com/ClickHouse/ClickHouse/pull/48932) ([wangxiaobo](https://github.com/wzb5212)).
 * Add Array data type to MongoDB. Closes [#48598](https://github.com/ClickHouse/ClickHouse/issues/48598). [#48983](https://github.com/ClickHouse/ClickHouse/pull/48983) ([Nikolay Degterinsky](https://github.com/evillique)).
 * Support storing `Interval` data types in tables. [#49085](https://github.com/ClickHouse/ClickHouse/pull/49085) ([larryluogit](https://github.com/larryluogit)).
 * Allow using `ntile` window function without explicit window frame definition: `ntile(3) OVER (ORDER BY a)`, close [#46763](https://github.com/ClickHouse/ClickHouse/issues/46763). [#49093](https://github.com/ClickHouse/ClickHouse/pull/49093) ([vdimir](https://github.com/vdimir)).
@@ -655,7 +655,7 @@
 
 #### Build/Testing/Packaging Improvement
 * New and improved `keeper-bench`. Everything can be customized from YAML/XML file: - request generator - each type of request generator can have a specific set of fields - multi requests can be generated just by doing the same under `multi` key - for each request or subrequest in multi a `weight` field can be defined to control distribution - define trees that need to be setup for a test run - hosts can be defined with all timeouts customizable and it's possible to control how many sessions to generate for each host - integers defined with `min_value` and `max_value` fields are random number generators. [#48547](https://github.com/ClickHouse/ClickHouse/pull/48547) ([Antonio Andelic](https://github.com/antonio2368)).
-* Io_uring is not supported on macos, don't choose it when running tests on local to avoid occassional failures. [#49250](https://github.com/ClickHouse/ClickHouse/pull/49250) ([Frank Chen](https://github.com/FrankChen021)).
+* Io_uring is not supported on macos, don't choose it when running tests on local to avoid occasional failures. [#49250](https://github.com/ClickHouse/ClickHouse/pull/49250) ([Frank Chen](https://github.com/FrankChen021)).
 * Support named fault injection for testing. [#49361](https://github.com/ClickHouse/ClickHouse/pull/49361) ([Han Fei](https://github.com/hanfei1991)).
 * Allow running ClickHouse in the OS where the `prctl` (process control) syscall is not available, such as AWS Lambda. [#49538](https://github.com/ClickHouse/ClickHouse/pull/49538) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Fixed the issue of build conflict between contrib/isa-l and isa-l in qpl [49296](https://github.com/ClickHouse/ClickHouse/issues/49296). [#49584](https://github.com/ClickHouse/ClickHouse/pull/49584) ([jasperzhu](https://github.com/jinjunzh)).
@@ -1359,7 +1359,7 @@ Add settings input_format_tsv/csv/custom_detect_header that enable this behaviou
 * Use already written part of the query for fuzzy search (pass to the `skim` library, which is written in Rust and linked statically to ClickHouse). [#44600](https://github.com/ClickHouse/ClickHouse/pull/44600) ([Azat Khuzhin](https://github.com/azat)).
 * Enable `input_format_json_read_objects_as_strings` by default to be able to read nested JSON objects while JSON Object type is experimental. [#44657](https://github.com/ClickHouse/ClickHouse/pull/44657) ([Kruglov Pavel](https://github.com/Avogar)).
 * Improvement for deduplication of async inserts: when users do duplicate async inserts, we should deduplicate inside the memory before we query Keeper. [#44682](https://github.com/ClickHouse/ClickHouse/pull/44682) ([Han Fei](https://github.com/hanfei1991)).
-* Input/ouptut `Avro` format will parse bool type as ClickHouse bool type. [#44684](https://github.com/ClickHouse/ClickHouse/pull/44684) ([Kruglov Pavel](https://github.com/Avogar)).
+* Input/output `Avro` format will parse bool type as ClickHouse bool type. [#44684](https://github.com/ClickHouse/ClickHouse/pull/44684) ([Kruglov Pavel](https://github.com/Avogar)).
 * Support Bool type in Arrow/Parquet/ORC. Closes [#43970](https://github.com/ClickHouse/ClickHouse/issues/43970). [#44698](https://github.com/ClickHouse/ClickHouse/pull/44698) ([Kruglov Pavel](https://github.com/Avogar)).
 * Don't greedily parse beyond the quotes when reading UUIDs - it may lead to mistakenly successful parsing of incorrect data. [#44686](https://github.com/ClickHouse/ClickHouse/pull/44686) ([Raúl Marín](https://github.com/Algunenano)).
 * Infer UInt64 in case of Int64 overflow and fix some transforms in schema inference. [#44696](https://github.com/ClickHouse/ClickHouse/pull/44696) ([Kruglov Pavel](https://github.com/Avogar)).

From b11aaecf29ddf9dc8e239bc3e6ae5505a8f64fdb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <benjamin.antal@clickhouse.com>
Date: Fri, 22 Sep 2023 12:35:52 +0000
Subject: [PATCH 230/243] Make exception message more descriptive

---
 src/Access/SettingsConstraints.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp
index e784f017b8f..59874f4e9ec 100644
--- a/src/Access/SettingsConstraints.cpp
+++ b/src/Access/SettingsConstraints.cpp
@@ -340,7 +340,12 @@ bool SettingsConstraints::Checker::check(SettingChange & change,
     if (!min_value.isNull() && !max_value.isNull() && less_or_cannot_compare(max_value, min_value))
     {
         if (reaction == THROW_ON_VIOLATION)
-            throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, "Setting {} should not be changed", setting_name);
+            throw Exception(
+                ErrorCodes::SETTING_CONSTRAINT_VIOLATION,
+                "The maximum ({}) value is less than the maximum ({}) value for setting {}",
+                max_value,
+                min_value,
+                setting_name);
         else
             return false;
     }

From f54c3c6a9f07e51463593749f451214687ffc8e1 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 22 Sep 2023 15:07:55 +0200
Subject: [PATCH 231/243] Update StorageReplicatedMergeTree.cpp

---
 src/Storages/StorageReplicatedMergeTree.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 6f406f610b7..721fb6a94db 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -9259,8 +9259,8 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
             /// It's not a problem if we have found the mutation parent (so we have files_not_to_remove).
             /// But in rare cases mutations parents could have been already removed (so we don't have the list of hardlinks).
 
-            /// I'm not 100% sure that parent_not_to_remove list cannot be incomplete (when it's not empty)
-            if (part_info.mutation && parent_not_to_remove.empty())
+            /// I'm not 100% sure that parent_not_to_remove list cannot be incomplete (when we've found a parent)
+            if (part_info.mutation && !has_parent)
                 part_has_no_more_locks = false;
 
             LOG_TRACE(logger, "Can't remove parent zookeeper lock {} for part {}, because children {} ({}) exists (can remove blobs: {})",

From a691dea6cf61805171f293033d0710ebbc46575b Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 22 Sep 2023 10:29:11 +0000
Subject: [PATCH 232/243] Set exception for promise

---
 .../Transforms/CreatingSetsTransform.cpp      | 36 ++++++++++++++-----
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp
index afce1355f7a..b2e459ee487 100644
--- a/src/Processors/Transforms/CreatingSetsTransform.cpp
+++ b/src/Processors/Transforms/CreatingSetsTransform.cpp
@@ -8,7 +8,6 @@
 #include <Storages/IStorage.h>
 
 #include <Common/logger_useful.h>
-#include <iomanip>
 
 
 namespace DB
@@ -39,16 +38,35 @@ CreatingSetsTransform::CreatingSetsTransform(
 
 void CreatingSetsTransform::work()
 {
-    if (!is_initialized)
-        init();
-
-    if (done_with_set && done_with_table)
+    try
     {
-        finishConsume();
-        input.close();
-    }
+        if (!is_initialized)
+            init();
 
-    IAccumulatingTransform::work();
+        if (done_with_set && done_with_table)
+        {
+            finishConsume();
+            input.close();
+        }
+
+        IAccumulatingTransform::work();
+    }
+    catch (...)
+    {
+        if (promise_to_build)
+        {
+            /// set_exception can also throw
+            try
+            {
+                promise_to_build->set_exception(std::current_exception());
+            }
+            catch (...)
+            {
+                tryLogCurrentException(log, "Failed to set_exception for promise");
+            }
+        }
+        throw;
+    }
 }
 
 void CreatingSetsTransform::startSubquery()

From a7c4efb845f4479f6da2b7aee5bc434fef856a8e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 22 Sep 2023 13:13:55 +0000
Subject: [PATCH 233/243] Simpler

---
 .../functions/date-time-functions.md          |  5 +-
 src/Functions/DateTimeTransforms.h            |  8 +--
 .../FunctionDateOrDateTimeToSomething.h       | 22 +++---
 src/Functions/IFunctionDateOrDateTime.h       |  4 +-
 src/Functions/toDaysSinceYearZero.cpp         | 69 +------------------
 .../02874_toDaysSinceYearZero.reference       |  4 ++
 .../0_stateless/02874_toDaysSinceYearZero.sql |  8 ++-
 7 files changed, 28 insertions(+), 92 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 50c8e4057c4..0364a610404 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -732,14 +732,15 @@ Returns for a given date, the number of days passed since [1 January 0000](https
 **Syntax**
 
 ``` sql
-toDaysSinceYearZero(date)
+toDaysSinceYearZero(date[, time_zone])
 ```
 
 Aliases: `TO_DAYS`
 
-**Arguments**
 
+**Arguments**
 - `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
 
 **Returned value**
 
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index ae8598fe9ea..a845f63626d 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -934,13 +934,9 @@ private:
 public:
     static constexpr auto name = "toDaysSinceYearZero";
 
-    static UInt32 execute(const DecimalUtils::DecimalComponents<DateTime64> & t, const DateLUTImpl & time_zone)
+    static UInt32 execute(Int64 t, const DateLUTImpl & time_zone)
     {
-        return DAYS_BETWEEN_YEARS_0_AND_1970 + static_cast<UInt32>(time_zone.toDayNum(t.whole));
-    }
-    static UInt32 execute(Int64, const DateLUTImpl &)
-    {
-        throwDateTimeIsNotSupported(name);
+        return DAYS_BETWEEN_YEARS_0_AND_1970 + static_cast<UInt32>(time_zone.toDayNum(t));
     }
     static UInt32 execute(UInt32 d, const DateLUTImpl &)
     {
diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h
index d98b788c7d7..e5e12ec6e92 100644
--- a/src/Functions/FunctionDateOrDateTimeToSomething.h
+++ b/src/Functions/FunctionDateOrDateTimeToSomething.h
@@ -19,9 +19,10 @@ public:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        this->checkArguments(arguments, (std::is_same_v<ToDataType, DataTypeDate> || std::is_same_v<ToDataType, DataTypeDate32>));
+        constexpr bool result_is_date_or_date32 = (std::is_same_v<ToDataType, DataTypeDate> || std::is_same_v<ToDataType, DataTypeDate32>);
+        this->checkArguments(arguments, result_is_date_or_date32);
 
-        /// For DateTime, if time zone is specified, attach it to type.
+        /// For DateTime results, if time zone is specified, attach it to type.
         /// If the time zone is specified but empty, throw an exception.
         if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
         {
@@ -34,6 +35,7 @@ public:
                     this->getName());
             return std::make_shared<ToDataType>(time_zone);
         }
+
         if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
         {
             Int64 scale = DataTypeDateTime64::default_scale;
@@ -42,17 +44,11 @@ public:
             auto source_scale = scale;
 
             if constexpr (std::is_same_v<ToStartOfMillisecondImpl, Transform>)
-            {
                 scale = std::max(source_scale, static_cast<Int64>(3));
-            }
             else if constexpr (std::is_same_v<ToStartOfMicrosecondImpl, Transform>)
-            {
                 scale = std::max(source_scale, static_cast<Int64>(6));
-            }
             else if constexpr (std::is_same_v<ToStartOfNanosecondImpl, Transform>)
-            {
                 scale = std::max(source_scale, static_cast<Int64>(9));
-            }
 
             return std::make_shared<ToDataType>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, false));
         }
@@ -63,18 +59,16 @@ public:
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
         const IDataType * from_type = arguments[0].type.get();
-        WhichDataType which(from_type);
 
-        if (which.isDate())
+        if (isDate(from_type))
             return DateTimeTransformImpl<DataTypeDate, ToDataType, Transform>::execute(arguments, result_type, input_rows_count);
-        else if (which.isDate32())
+        else if (isDate32(from_type))
             return DateTimeTransformImpl<DataTypeDate32, ToDataType, Transform>::execute(arguments, result_type, input_rows_count);
-        else if (which.isDateTime())
+        else if (isDateTime(from_type))
             return DateTimeTransformImpl<DataTypeDateTime, ToDataType, Transform>::execute(arguments, result_type, input_rows_count);
-        else if (which.isDateTime64())
+        else if (isDateTime64(from_type))
         {
             const auto scale = static_cast<const DataTypeDateTime64 *>(from_type)->getScale();
-
             const TransformDateTime64<Transform> transformer(scale);
             return DateTimeTransformImpl<DataTypeDateTime64, ToDataType, decltype(transformer)>::execute(arguments, result_type, input_rows_count, transformer);
         }
diff --git a/src/Functions/IFunctionDateOrDateTime.h b/src/Functions/IFunctionDateOrDateTime.h
index c22754eecd7..762b79bfafc 100644
--- a/src/Functions/IFunctionDateOrDateTime.h
+++ b/src/Functions/IFunctionDateOrDateTime.h
@@ -110,14 +110,14 @@ protected:
     {
         if (arguments.size() == 1)
         {
-            if (!isDateOrDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
+            if (!isDateOrDate32OrDateTimeOrDateTime64(arguments[0].type))
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                     "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64",
                     arguments[0].type->getName(), getName());
         }
         else if (arguments.size() == 2)
         {
-            if (!isDateOrDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
+            if (!isDateOrDate32OrDateTimeOrDateTime64(arguments[0].type))
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                     "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64",
                     arguments[0].type->getName(), getName());
diff --git a/src/Functions/toDaysSinceYearZero.cpp b/src/Functions/toDaysSinceYearZero.cpp
index 6688c5edcc1..f6239b2900b 100644
--- a/src/Functions/toDaysSinceYearZero.cpp
+++ b/src/Functions/toDaysSinceYearZero.cpp
@@ -1,78 +1,13 @@
-#include <Columns/ColumnsNumber.h>
-#include <DataTypes/DataTypeDate.h>
-#include <DataTypes/DataTypeDate32.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Functions/DateTimeTransforms.h>
+#include <Functions/FunctionDateOrDateTimeToSomething.h>
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/IFunction.h>
-#include "DataTypes/IDataType.h"
-#include "Functions/TransformDateTime64.h"
 
 
 namespace DB
 {
 
-namespace ErrorCodes
-{
-extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-}
-
-namespace
-{
-
-/** Returns number of days passed since 0000-01-01 */
-class FunctionToDaysSinceYearZero : public IFunction
-{
-    using ResultType = DataTypeUInt32;
-public:
-    static constexpr auto name = "toDaysSinceYearZero";
-    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionToDaysSinceYearZero>(context); }
-
-    explicit FunctionToDaysSinceYearZero(ContextPtr /*context*/) { }
-
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 1; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors mandatory_args{
-            {"date", &isDateOrDate32OrDateTimeOrDateTime64<IDataType>, nullptr, "Date or Date32 or DateTime or DateTime64"}
-        };
-
-        validateFunctionArgumentTypes(*this, arguments, mandatory_args);
-
-        return std::make_shared<ResultType>();
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        const IDataType * from_type = arguments[0].type.get();
-        WhichDataType which(from_type);
-
-        if (which.isDate())
-            return DateTimeTransformImpl<DataTypeDate, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
-        else if (which.isDate32())
-            return DateTimeTransformImpl<DataTypeDate32, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
-        else if (which.isDateTime())
-            return DateTimeTransformImpl<DataTypeDateTime, ResultType, ToDaysSinceYearZeroImpl>::execute(arguments, result_type, input_rows_count);
-        else if (which.isDateTime64())
-        {
-            const auto scale = static_cast<const DataTypeDateTime64 *>(from_type)->getScale();
-            const TransformDateTime64<ToDaysSinceYearZeroImpl> transformer(scale);
-            return DateTimeTransformImpl<DataTypeDateTime64, ResultType, decltype(transformer)>::execute(arguments, result_type, input_rows_count, transformer);
-        }
-
-        throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-            "Illegal type {} of argument of function {}",
-            arguments[0].type->getName(),
-            this->getName());
-    }
-};
-
-}
+using FunctionToDaysSinceYearZero = FunctionDateOrDateTimeToSomething<DataTypeUInt32, ToDaysSinceYearZeroImpl>;
 
 REGISTER_FUNCTION(ToDaysSinceYearZero)
 {
diff --git a/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference b/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference
index b74d4a369a1..04c5efd81b7 100644
--- a/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference
+++ b/tests/queries/0_stateless/02874_toDaysSinceYearZero.reference
@@ -2,13 +2,17 @@ Invalid parameters
 Const argument
 719528
 739136
+739136
 693961
 739136
+739136
 719528
 739136
+739136
 693961
 739136
 739136
+739136
 \N
 Non-const argument
 739136
diff --git a/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql b/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql
index d10674620ec..a02591f793e 100644
--- a/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql
+++ b/tests/queries/0_stateless/02874_toDaysSinceYearZero.sql
@@ -1,18 +1,24 @@
+SET session_timezone = 'Europe/Amsterdam'; -- disable time zone randomization in CI
+
 SELECT 'Invalid parameters';
 SELECT toDaysSinceYearZero(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT toDaysSinceYearZero(toDate('2023-09-08'), toDate('2023-09-08')); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT toDaysSinceYearZero(toDate('2023-09-08'), 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT toDaysSinceYearZero('str'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 SELECT toDaysSinceYearZero(42); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
 SELECT 'Const argument';
 SELECT toDaysSinceYearZero(toDate('1970-01-01'));
 SELECT toDaysSinceYearZero(toDate('2023-09-08'));
+SELECT toDaysSinceYearZero(toDate('2023-09-08'), 'America/Los_Angeles');
 SELECT toDaysSinceYearZero(toDate32('1900-01-01'));
 SELECT toDaysSinceYearZero(toDate32('2023-09-08'));
+SELECT toDaysSinceYearZero(toDate32('2023-09-08'), 'America/Los_Angeles');
 SELECT toDaysSinceYearZero(toDateTime('1970-01-01 00:00:00'));
 SELECT toDaysSinceYearZero(toDateTime('2023-09-08 11:11:11'));
+SELECT toDaysSinceYearZero(toDateTime('2023-09-08 11:11:11'), 'America/Los_Angeles');
 SELECT toDaysSinceYearZero(toDateTime64('1900-01-01 00:00:00.000', 3));
 SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123', 3));
+SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123', 3), 'America/Los_Angeles');
 SELECT toDaysSinceYearZero(toDateTime64('2023-09-08 11:11:11.123123123', 9));
 SELECT toDaysSinceYearZero(NULL);
 

From 2ffa407aaba5de849968e8a7897ba62c82c8c2cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?=
 <antaljanosbenjamin@users.noreply.github.com>
Date: Fri, 22 Sep 2023 15:17:36 +0200
Subject: [PATCH 234/243] Update src/Access/SettingsConstraints.cpp

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Access/SettingsConstraints.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp
index 59874f4e9ec..db805c83e17 100644
--- a/src/Access/SettingsConstraints.cpp
+++ b/src/Access/SettingsConstraints.cpp
@@ -342,7 +342,7 @@ bool SettingsConstraints::Checker::check(SettingChange & change,
         if (reaction == THROW_ON_VIOLATION)
             throw Exception(
                 ErrorCodes::SETTING_CONSTRAINT_VIOLATION,
-                "The maximum ({}) value is less than the maximum ({}) value for setting {}",
+                "The maximum ({}) value is less than the minimum ({}) value for setting {}",
                 max_value,
                 min_value,
                 setting_name);

From 80d511093b5c126e9534cfc20b1faeda07a165f1 Mon Sep 17 00:00:00 2001
From: kothiga <kothiga2112@gmail.com>
Date: Tue, 19 Sep 2023 12:16:11 -0700
Subject: [PATCH 235/243] Provide support for BSON on BE

---
 .../Impl/BSONEachRowRowInputFormat.cpp        | 56 ++++++++++---------
 .../Impl/BSONEachRowRowOutputFormat.cpp       |  9 ++-
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
index 2972f9da743..ea8ed960595 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@@ -118,7 +118,7 @@ static UInt8 readBSONType(ReadBuffer & in)
 static size_t readBSONSize(ReadBuffer & in)
 {
     BSONSizeT size;
-    readBinary(size, in);
+    readBinaryLittleEndian(size, in);
     return size;
 }
 
@@ -131,19 +131,19 @@ static void readAndInsertInteger(ReadBuffer & in, IColumn & column, const DataTy
     if (bson_type == BSONType::INT32)
     {
         UInt32 value;
-        readBinary(value, in);
+        readBinaryLittleEndian(value, in);
         assert_cast<ColumnVector<T> &>(column).insertValue(static_cast<T>(value));
     }
     else if (bson_type == BSONType::INT64)
     {
         UInt64 value;
-        readBinary(value, in);
+        readBinaryLittleEndian(value, in);
         assert_cast<ColumnVector<T> &>(column).insertValue(static_cast<T>(value));
     }
     else if (bson_type == BSONType::BOOL)
     {
         UInt8 value;
-        readBinary(value, in);
+        readBinaryLittleEndian(value, in);
         assert_cast<ColumnVector<T> &>(column).insertValue(static_cast<T>(value));
     }
     else
@@ -160,7 +160,7 @@ static void readAndInsertIPv4(ReadBuffer & in, IColumn & column, BSONType bson_t
         throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Int32 into column with type IPv4");
 
     UInt32 value;
-    readBinary(value, in);
+    readBinaryLittleEndian(value, in);
     assert_cast<ColumnIPv4 &>(column).insertValue(IPv4(value));
 }
 
@@ -172,7 +172,7 @@ static void readAndInsertDouble(ReadBuffer & in, IColumn & column, const DataTyp
                         getBSONTypeName(bson_type), data_type->getName());
 
     Float64 value;
-    readBinary(value, in);
+    readBinaryLittleEndian(value, in);
     assert_cast<ColumnVector<T> &>(column).insertValue(static_cast<T>(value));
 }
 
@@ -184,7 +184,7 @@ static void readAndInsertSmallDecimal(ReadBuffer & in, IColumn & column, const D
                         getBSONTypeName(bson_type), data_type->getName());
 
     DecimalType value;
-    readBinary(value, in);
+    readBinaryLittleEndian(value, in);
     assert_cast<ColumnDecimal<DecimalType> &>(column).insertValue(value);
 }
 
@@ -194,7 +194,7 @@ static void readAndInsertDateTime64(ReadBuffer & in, IColumn & column, BSONType
         throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into DateTime64 column", getBSONTypeName(bson_type));
 
     DateTime64 value;
-    readBinary(value, in);
+    readBinaryLittleEndian(value, in);
     assert_cast<DataTypeDateTime64::ColumnType &>(column).insertValue(value);
 }
 
@@ -222,7 +222,7 @@ static void readAndInsertBigInteger(ReadBuffer & in, IColumn & column, const Dat
             sizeof(ValueType));
 
     ValueType value;
-    readBinary(value, in);
+    readBinaryLittleEndian(value, in);
     assert_cast<ColumnType &>(column).insertValue(value);
 }
 
@@ -355,7 +355,7 @@ static void readAndInsertUUID(ReadBuffer & in, IColumn & column, BSONType bson_t
             sizeof(UUID));
 
     UUID value;
-    readBinary(value, in);
+    readBinaryLittleEndian(value, in);
     assert_cast<ColumnUUID &>(column).insertValue(value);
 }
 
@@ -371,7 +371,7 @@ void BSONEachRowRowInputFormat::readArray(IColumn & column, const DataTypePtr &
 
     size_t document_start = in->count();
     BSONSizeT document_size;
-    readBinary(document_size, *in);
+    readBinaryLittleEndian(document_size, *in);
     if (document_size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END))
         throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", document_size);
 
@@ -401,7 +401,7 @@ void BSONEachRowRowInputFormat::readTuple(IColumn & column, const DataTypePtr &
 
     size_t document_start = in->count();
     BSONSizeT document_size;
-    readBinary(document_size, *in);
+    readBinaryLittleEndian(document_size, *in);
     if (document_size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END))
         throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", document_size);
 
@@ -462,7 +462,7 @@ void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & da
 
     size_t document_start = in->count();
     BSONSizeT document_size;
-    readBinary(document_size, *in);
+    readBinaryLittleEndian(document_size, *in);
     if (document_size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END))
         throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", document_size);
 
@@ -696,7 +696,7 @@ static void skipBSONField(ReadBuffer & in, BSONType type)
         case BSONType::STRING:
         {
             BSONSizeT size;
-            readBinary(size, in);
+            readBinaryLittleEndian(size, in);
             in.ignore(size);
             break;
         }
@@ -704,7 +704,7 @@ static void skipBSONField(ReadBuffer & in, BSONType type)
         case BSONType::ARRAY:
         {
             BSONSizeT size;
-            readBinary(size, in);
+            readBinaryLittleEndian(size, in);
             if (size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END))
                 throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", size);
             in.ignore(size - sizeof(size));
@@ -713,7 +713,7 @@ static void skipBSONField(ReadBuffer & in, BSONType type)
         case BSONType::BINARY:
         {
             BSONSizeT size;
-            readBinary(size, in);
+            readBinaryLittleEndian(size, in);
             in.ignore(size + 1);
             break;
         }
@@ -738,14 +738,14 @@ static void skipBSONField(ReadBuffer & in, BSONType type)
         case BSONType::DB_POINTER:
         {
             BSONSizeT size;
-            readBinary(size, in);
+            readBinaryLittleEndian(size, in);
             in.ignore(size + BSON_DB_POINTER_SIZE);
             break;
         }
         case BSONType::JAVA_SCRIPT_CODE_W_SCOPE:
         {
             BSONSizeT size;
-            readBinary(size, in);
+            readBinaryLittleEndian(size, in);
             if (size < sizeof(BSONSizeT))
                 throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid java code_w_scope size: {}", size);
             in.ignore(size - sizeof(size));
@@ -787,7 +787,7 @@ bool BSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi
     size_t key_index = 0;
 
     current_document_start = in->count();
-    readBinary(current_document_size, *in);
+    readBinaryLittleEndian(current_document_size, *in);
     if (current_document_size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END))
         throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", current_document_size);
 
@@ -844,7 +844,7 @@ size_t BSONEachRowRowInputFormat::countRows(size_t max_block_size)
     BSONSizeT document_size;
     while (!in->eof() && num_rows < max_block_size)
     {
-        readBinary(document_size, *in);
+        readBinaryLittleEndian(document_size, *in);
         if (document_size < sizeof(BSONSizeT) + sizeof(BSON_DOCUMENT_END))
             throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid document size: {}", document_size);
         in->ignore(document_size - sizeof(BSONSizeT));
@@ -893,7 +893,7 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo
         case BSONType::STRING:
         {
             BSONSizeT size;
-            readBinary(size, in);
+            readBinaryLittleEndian(size, in);
             in.ignore(size);
             return std::make_shared<DataTypeString>();
         }
@@ -947,7 +947,7 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo
         case BSONType::BINARY:
         {
             BSONSizeT size;
-            readBinary(size, in);
+            readBinaryLittleEndian(size, in);
             auto subtype = getBSONBinarySubtype(readBSONType(in));
             in.ignore(size);
             switch (subtype)
@@ -982,7 +982,7 @@ NamesAndTypesList BSONEachRowSchemaReader::getDataTypesFromBSONDocument(bool all
 {
     size_t document_start = in.count();
     BSONSizeT document_size;
-    readBinary(document_size, in);
+    readBinaryLittleEndian(document_size, in);
     NamesAndTypesList names_and_types;
     while (in.count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size)
     {
@@ -1028,7 +1028,7 @@ fileSegmentationEngineBSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t
     while (!in.eof() && memory.size() < min_bytes && number_of_rows < max_rows)
     {
         BSONSizeT document_size;
-        readBinary(document_size, in);
+        readBinaryLittleEndian(document_size, in);
 
         if (document_size < sizeof(document_size))
             throw ParsingException(ErrorCodes::INCORRECT_DATA, "Size of BSON document is invalid");
@@ -1045,7 +1045,13 @@ fileSegmentationEngineBSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t
 
         size_t old_size = memory.size();
         memory.resize(old_size + document_size);
-        unalignedStore<BSONSizeT>(memory.data() + old_size, document_size);
+
+        // Ensure the document size we write to the memory is byte arranged for LE.
+        BSONSizeT size_out = document_size;
+        if constexpr(std::endian::native == std::endian::big)
+            size_out = std::byteswap(size_out);
+        unalignedStore<BSONSizeT>(memory.data() + old_size, size_out);
+
         in.readStrict(memory.data() + old_size + sizeof(document_size), document_size - sizeof(document_size));
         ++number_of_rows;
     }
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
index 2bb5410781c..b7f415ff449 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
@@ -58,7 +58,7 @@ static void writeBSONSize(size_t size, WriteBuffer & buf)
     if (size > MAX_BSON_SIZE)
         throw Exception(ErrorCodes::INCORRECT_DATA, "Too large document/value size: {}. Maximum allowed size: {}.", size, MAX_BSON_SIZE);
 
-    writePODBinary<BSONSizeT>(BSONSizeT(size), buf);
+    writeBinaryLittleEndian(BSONSizeT(size), buf);
 }
 
 template <typename Type>
@@ -79,7 +79,7 @@ template <typename ColumnType, typename ValueType>
 static void writeBSONNumber(BSONType type, const IColumn & column, size_t row_num, const String & name, WriteBuffer & buf)
 {
     writeBSONTypeAndKeyName(type, name, buf);
-    writePODBinary<ValueType>(assert_cast<const ColumnType &>(column).getElement(row_num), buf);
+    writeBinaryLittleEndian(ValueType(assert_cast<const ColumnType &>(column).getElement(row_num)), buf);
 }
 
 template <typename StringColumnType>
@@ -109,8 +109,7 @@ static void writeBSONBigInteger(const IColumn & column, size_t row_num, const St
     writeBSONTypeAndKeyName(BSONType::BINARY, name, buf);
     writeBSONSize(sizeof(typename ColumnType::ValueType), buf);
     writeBSONType(BSONBinarySubtype::BINARY, buf);
-    auto data = assert_cast<const ColumnType &>(column).getDataAt(row_num);
-    buf.write(data.data, data.size);
+    writeBinaryLittleEndian(assert_cast<const ColumnType &>(column).getElement(row_num), buf);
 }
 
 size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name, const String & path, std::unordered_map<String, size_t> & nested_document_sizes)
@@ -407,7 +406,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
             writeBSONTypeAndKeyName(BSONType::BINARY, name, out);
             writeBSONSize(sizeof(UUID), out);
             writeBSONType(BSONBinarySubtype::UUID, out);
-            writeBinary(assert_cast<const ColumnUUID &>(column).getElement(row_num), out);
+            writeBinaryLittleEndian(assert_cast<const ColumnUUID &>(column).getElement(row_num), out);
             break;
         }
         case TypeIndex::LowCardinality:

From 13eee0e9500bae91c9d60f0a76294767ca874aff Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Fri, 22 Sep 2023 14:13:20 -0400
Subject: [PATCH 236/243] Set a minimum limit of `num_streams` in StorageS3

---
 src/Storages/StorageS3.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index 42c62a3e8f8..79a0721cf9a 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1078,7 +1078,11 @@ Pipe StorageS3::read(
         query_configuration, distributed_processing, local_context, query_info.query, virtual_columns, nullptr, local_context->getFileProgressCallback());
 
     size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
-    num_streams = std::min(num_streams, estimated_keys_count);
+    if (estimated_keys_count > 1)
+        num_streams = std::min(num_streams, estimated_keys_count);
+    else
+        /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case.
+        num_streams = 1;
 
     auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
     bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())

From 3e57b007a88105f5570d1989bf2b43a4b53eb924 Mon Sep 17 00:00:00 2001
From: kothiga <kothiga2112@gmail.com>
Date: Fri, 22 Sep 2023 12:25:17 -0700
Subject: [PATCH 237/243] Use LE version of unalignedStore.

---
 src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
index ea8ed960595..b38aaa426fd 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@@ -1045,13 +1045,7 @@ fileSegmentationEngineBSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t
 
         size_t old_size = memory.size();
         memory.resize(old_size + document_size);
-
-        // Ensure the document size we write to the memory is byte arranged for LE.
-        BSONSizeT size_out = document_size;
-        if constexpr(std::endian::native == std::endian::big)
-            size_out = std::byteswap(size_out);
-        unalignedStore<BSONSizeT>(memory.data() + old_size, size_out);
-
+        unalignedStoreLittleEndian<BSONSizeT>(memory.data() + old_size, document_size);
         in.readStrict(memory.data() + old_size + sizeof(document_size), document_size - sizeof(document_size));
         ++number_of_rows;
     }

From 6db8d2aa921b9b1c659d1010467df57d4463d707 Mon Sep 17 00:00:00 2001
From: Suzy Wang <suzy.wang@ibm.com>
Date: Fri, 22 Sep 2023 16:29:47 -0300
Subject: [PATCH 238/243] ipv4 read fix

---
 src/Common/HashTable/Hash.h                             | 2 +-
 src/DataTypes/Serializations/SerializationIPv4andIPv6.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h
index c62be4fe2b8..90209891704 100644
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@@ -310,7 +310,7 @@ requires (sizeof(T) <= sizeof(UInt64))
 inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1)
 {
     DB::UInt64 out {0};
-    std::memcpy(&out, &key, sizeof(T));
+    std::memcpy(reinterpret_cast<char*>(&out) + sizeof(UInt64) - sizeof(T), &key, sizeof(T));
     return intHashCRC32(out, updated_value);
 }
 
diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h
index 61464962f1c..1289f82d1c4 100644
--- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h
+++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h
@@ -86,12 +86,12 @@ public:
     void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings &) const override
     {
         IPv x = field.get<IPv>();
-        writeBinary(x, ostr);
+        writeBinaryLittleEndian(x, ostr);
     }
     void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings &) const override
     {
         IPv x;
-        readBinary(x.toUnderType(), istr);
+        readBinaryLittleEndian(x.toUnderType(), istr);
         field = NearestFieldType<IPv>(x);
     }
     void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override

From 51bc09d3477b6a01467b524b8e3be40a1be4e30a Mon Sep 17 00:00:00 2001
From: Suzy Wang <suzy.wang@ibm.com>
Date: Fri, 22 Sep 2023 16:41:49 -0300
Subject: [PATCH 239/243] big endian

---
 src/Common/HashTable/Hash.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h
index 90209891704..6952e4bb1a5 100644
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@@ -310,7 +310,10 @@ requires (sizeof(T) <= sizeof(UInt64))
 inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1)
 {
     DB::UInt64 out {0};
-    std::memcpy(reinterpret_cast<char*>(&out) + sizeof(UInt64) - sizeof(T), &key, sizeof(T));
+    if constexpr (std::endian::native == std::endian::little)
+        std::memcpy(&out, &key, sizeof(T));
+    else
+        std::memcpy(reinterpret_cast<char*>(&out) + sizeof(UInt64) - sizeof(T), &key, sizeof(T));
     return intHashCRC32(out, updated_value);
 }
 

From 1326bffe606b92e5501db0d62668234005b7790b Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 23 Sep 2023 04:14:03 +0200
Subject: [PATCH 240/243] Allow LIKE over binary data

---
 src/Common/OptimizedRegularExpression.cpp     |  8 ++++++
 .../0_stateless/02886_binary_like.reference   | 24 +++++++++++++++++
 .../queries/0_stateless/02886_binary_like.sql | 26 +++++++++++++++++++
 3 files changed, 58 insertions(+)
 create mode 100644 tests/queries/0_stateless/02886_binary_like.reference
 create mode 100644 tests/queries/0_stateless/02886_binary_like.sql

diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp
index 3501a355c19..d64b26a28a3 100644
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@@ -496,6 +496,14 @@ OptimizedRegularExpression::OptimizedRegularExpression(const std::string & regex
             regexp_options.set_dot_nl(true);
 
         re2 = std::make_unique<re2::RE2>(regexp_, regexp_options);
+
+        /// Fallback to latin1 to allow matching binary data.
+        if (!re2->ok() && re2->error_code() == re2::RE2::ErrorCode::ErrorBadUTF8)
+        {
+            regexp_options.set_encoding(re2::RE2::Options::EncodingLatin1);
+            re2 = std::make_unique<re2::RE2>(regexp_, regexp_options);
+        }
+
         if (!re2->ok())
         {
             throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP,
diff --git a/tests/queries/0_stateless/02886_binary_like.reference b/tests/queries/0_stateless/02886_binary_like.reference
new file mode 100644
index 00000000000..c23ceaf9784
--- /dev/null
+++ b/tests/queries/0_stateless/02886_binary_like.reference
@@ -0,0 +1,24 @@
+1
+1
+1
+1
+1
+0
+0
+1
+1
+1
+1
+1
+1
+0
+0
+1
+1
+1
+1
+1
+1
+0
+0
+1
diff --git a/tests/queries/0_stateless/02886_binary_like.sql b/tests/queries/0_stateless/02886_binary_like.sql
new file mode 100644
index 00000000000..ba11f1fc060
--- /dev/null
+++ b/tests/queries/0_stateless/02886_binary_like.sql
@@ -0,0 +1,26 @@
+SELECT 'aяb' LIKE 'a_b';
+SELECT 'a\0b' LIKE 'a_b';
+SELECT 'a\0b' LIKE 'a\0b';
+SELECT 'a\0b' LIKE 'a%\0b';
+SELECT 'a\xFFb' LIKE 'a%\xFFb';
+SELECT 'a\xFFb' LIKE 'a%\xFF\xFEb';
+SELECT 'a\xFFb' LIKE '%a\xFF\xFEb';
+SELECT 'a\xFF\xFEb' LIKE '%a\xFF\xFEb';
+
+SELECT materialize('aяb') LIKE 'a_b';
+SELECT materialize('a\0b') LIKE 'a_b';
+SELECT materialize('a\0b') LIKE 'a\0b';
+SELECT materialize('a\0b') LIKE 'a%\0b';
+SELECT materialize('a\xFFb') LIKE 'a%\xFFb';
+SELECT materialize('a\xFFb') LIKE 'a%\xFF\xFEb';
+SELECT materialize('a\xFFb') LIKE '%a\xFF\xFEb';
+SELECT materialize('a\xFF\xFEb') LIKE '%a\xFF\xFEb';
+
+SELECT materialize('aяb') LIKE materialize('a_b');
+SELECT materialize('a\0b') LIKE materialize('a_b');
+SELECT materialize('a\0b') LIKE materialize('a\0b');
+SELECT materialize('a\0b') LIKE materialize('a%\0b');
+SELECT materialize('a\xFFb') LIKE materialize('a%\xFFb');
+SELECT materialize('a\xFFb') LIKE materialize('a%\xFF\xFEb');
+SELECT materialize('a\xFFb') LIKE materialize('%a\xFF\xFEb');
+SELECT materialize('a\xFF\xFEb') LIKE materialize('%a\xFF\xFEb');

From 81804ce9ba1f419e2a8f1be70e9d68618038858f Mon Sep 17 00:00:00 2001
From: Amos Bird <amosbird@gmail.com>
Date: Sat, 23 Sep 2023 17:50:12 +0800
Subject: [PATCH 241/243] Rebuild minmax_count_projection when partition key
 gets modified

---
 src/Storages/AlterCommands.cpp                     | 14 ++++++++++++++
 ...count_projection_modify_partition_key.reference |  3 +++
 ...inmax_count_projection_modify_partition_key.sql | 13 +++++++++++++
 3 files changed, 30 insertions(+)
 create mode 100644 tests/queries/0_stateless/01710_minmax_count_projection_modify_partition_key.reference
 create mode 100644 tests/queries/0_stateless/01710_minmax_count_projection_modify_partition_key.sql

diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index da46cb4d7fe..1f69af0b6ce 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -11,6 +11,7 @@
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/addTypeConversionToAST.h>
 #include <Interpreters/ExpressionAnalyzer.h>
+#include <Interpreters/FunctionNameNormalizer.h>
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/RenameColumnVisitor.h>
 #include <Interpreters/GinFilter.h>
@@ -937,8 +938,21 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, ContextPtr context
 
     /// And in partition key expression
     if (metadata_copy.partition_key.definition_ast != nullptr)
+    {
         metadata_copy.partition_key.recalculateWithNewAST(metadata_copy.partition_key.definition_ast, metadata_copy.columns, context);
 
+        /// If partition key expression is changed, we also need to rebuild minmax_count_projection
+        if (!blocksHaveEqualStructure(metadata_copy.partition_key.sample_block, metadata.partition_key.sample_block))
+        {
+            auto minmax_columns = metadata_copy.getColumnsRequiredForPartitionKey();
+            auto partition_key = metadata_copy.partition_key.expression_list_ast->clone();
+            FunctionNameNormalizer().visit(partition_key.get());
+            auto primary_key_asts = metadata_copy.primary_key.expression_list_ast->children;
+            metadata_copy.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection(
+                metadata_copy.columns, partition_key, minmax_columns, primary_key_asts, context));
+        }
+    }
+
     // /// And in sample key expression
     if (metadata_copy.sampling_key.definition_ast != nullptr)
         metadata_copy.sampling_key.recalculateWithNewAST(metadata_copy.sampling_key.definition_ast, metadata_copy.columns, context);
diff --git a/tests/queries/0_stateless/01710_minmax_count_projection_modify_partition_key.reference b/tests/queries/0_stateless/01710_minmax_count_projection_modify_partition_key.reference
new file mode 100644
index 00000000000..60f9fb08918
--- /dev/null
+++ b/tests/queries/0_stateless/01710_minmax_count_projection_modify_partition_key.reference
@@ -0,0 +1,3 @@
+x	1
+x	1
+y	1
diff --git a/tests/queries/0_stateless/01710_minmax_count_projection_modify_partition_key.sql b/tests/queries/0_stateless/01710_minmax_count_projection_modify_partition_key.sql
new file mode 100644
index 00000000000..5297fb3e194
--- /dev/null
+++ b/tests/queries/0_stateless/01710_minmax_count_projection_modify_partition_key.sql
@@ -0,0 +1,13 @@
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (type Enum('x'), s String) ENGINE = MergeTree ORDER BY s PARTITION BY type;
+INSERT INTO test VALUES ('x', 'Hello');
+
+SELECT type, count() FROM test GROUP BY type ORDER BY type;
+
+ALTER TABLE test MODIFY COLUMN type Enum('x', 'y');
+INSERT INTO test VALUES ('y', 'World');
+
+SELECT type, count() FROM test GROUP BY type ORDER BY type;
+
+DROP TABLE test;

From cf080677bfbf7a84f3e4f17e68d01557f9daaa4e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 23 Sep 2023 22:22:27 +0300
Subject: [PATCH 242/243] Simplify code

---
 src/Analyzer/Passes/AnyFunctionPass.cpp | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/Analyzer/Passes/AnyFunctionPass.cpp b/src/Analyzer/Passes/AnyFunctionPass.cpp
index 5fa709e71cf..75f12bc7d46 100644
--- a/src/Analyzer/Passes/AnyFunctionPass.cpp
+++ b/src/Analyzer/Passes/AnyFunctionPass.cpp
@@ -24,20 +24,19 @@ private:
             if (argument->as<LambdaNode>())
                 return false;
 
-            /// Function arrayJoin is special and should be skipped (think about it as a
-            /// an aggregate function), otherwise wrong result will be produced.
-            /// For example:
-            ///     SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
-            ///     ┌─number─┬─arrayJoin(array(array(), array()))─┐
-            ///     │      0 │ []                                 │
-            ///     │      0 │ []                                 │
-            ///     └────────┴────────────────────────────────────┘
             if (const auto * inside_function = argument->as<FunctionNode>())
+            {
+                /// Function arrayJoin is special and should be skipped (think about it as
+                /// an aggregate function), otherwise wrong result will be produced.
+                /// For example:
+                ///     SELECT *, any(arrayJoin([[], []])) FROM numbers(1) GROUP BY number
+                ///     ┌─number─┬─arrayJoin(array(array(), array()))─┐
+                ///     │      0 │ []                                 │
+                ///     │      0 │ []                                 │
+                ///     └────────┴────────────────────────────────────┘
                 if (inside_function->getFunctionName() == "arrayJoin")
                     return false;
 
-            if (const auto * inside_function = argument->as<FunctionNode>())
-            {
                 if (!canRewrite(inside_function))
                     return false;
             }

From b0d76b0028a96f75f85d05587454ff058ede2923 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 24 Sep 2023 01:05:27 +0200
Subject: [PATCH 243/243] Remove test `01051_system_stack_trace`

---
 .../01051_system_stack_trace.reference        | 18 -----------------
 .../0_stateless/01051_system_stack_trace.sql  | 20 -------------------
 2 files changed, 38 deletions(-)
 delete mode 100644 tests/queries/0_stateless/01051_system_stack_trace.reference
 delete mode 100644 tests/queries/0_stateless/01051_system_stack_trace.sql

diff --git a/tests/queries/0_stateless/01051_system_stack_trace.reference b/tests/queries/0_stateless/01051_system_stack_trace.reference
deleted file mode 100644
index 6ef82c703e9..00000000000
--- a/tests/queries/0_stateless/01051_system_stack_trace.reference
+++ /dev/null
@@ -1,18 +0,0 @@
--- { echo }
-SELECT count() > 0 FROM system.stack_trace WHERE query_id != '' AND thread_name = 'TCPHandler';
-1
--- opimization for not reading /proc/self/task/{}/comm and avoid sending signal
-SELECT countIf(thread_id > 0) > 0 FROM system.stack_trace;
-1
--- optimization for trace
-SELECT count(trace) > 0 FROM system.stack_trace WHERE length(trace) > 0 LIMIT 1;
-1
--- optimization for query_id
-SELECT length(query_id) > 0 FROM system.stack_trace WHERE query_id != '' AND thread_name = 'TCPHandler' LIMIT 1;
-1
--- optimization for thread_name
-SELECT length(thread_name) > 0 FROM system.stack_trace WHERE thread_name != '' LIMIT 1;
-1
--- enough rows (optimizations works "correctly")
-SELECT count() > 100 FROM system.stack_trace;
-1
diff --git a/tests/queries/0_stateless/01051_system_stack_trace.sql b/tests/queries/0_stateless/01051_system_stack_trace.sql
deleted file mode 100644
index b9b08f94221..00000000000
--- a/tests/queries/0_stateless/01051_system_stack_trace.sql
+++ /dev/null
@@ -1,20 +0,0 @@
--- Tags: no-parallel
--- Tag no-parallel: to decrease failure probability of collecting stack traces
-
--- NOTE: It is OK to have bigger timeout here since:
--- a) this test is marked as no-parallel
--- b) there is a filter by thread_name, so it will send signals only to the threads with the name TCPHandler
-SET storage_system_stack_trace_pipe_read_timeout_ms = 5000;
-
--- { echo }
-SELECT count() > 0 FROM system.stack_trace WHERE query_id != '' AND thread_name = 'TCPHandler';
--- opimization for not reading /proc/self/task/{}/comm and avoid sending signal
-SELECT countIf(thread_id > 0) > 0 FROM system.stack_trace;
--- optimization for trace
-SELECT count(trace) > 0 FROM system.stack_trace WHERE length(trace) > 0 LIMIT 1;
--- optimization for query_id
-SELECT length(query_id) > 0 FROM system.stack_trace WHERE query_id != '' AND thread_name = 'TCPHandler' LIMIT 1;
--- optimization for thread_name
-SELECT length(thread_name) > 0 FROM system.stack_trace WHERE thread_name != '' LIMIT 1;
--- enough rows (optimizations works "correctly")
-SELECT count() > 100 FROM system.stack_trace;