From 3e50ccdb452ca74d2477d2f9a0c090dca211ec16 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 24 Mar 2023 00:27:04 +0000
Subject: [PATCH 001/277] fixed bug with names mapping, better structure
 propagation

---
 src/Interpreters/Context.cpp | 69 +++++++++++++-----------------------
 1 file changed, 24 insertions(+), 45 deletions(-)
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 4e882f3ab5b..c6006af5ee2 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1394,60 +1394,39 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
         }
         if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
         {
-            const auto & structure_hint = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
-            bool use_columns_from_insert_query = true;
+            const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
+            auto table_structure = table_function_ptr->getActualTableStructure(getQueryContext());
 
-            /// use_structure_from_insertion_table_in_table_functions=2 means `auto`
-            if (select_query_hint && getSettingsRef().use_structure_from_insertion_table_in_table_functions == 2)
+            /// Insert table matches columns against SELECT expression by position, so we want to map
+            /// insert table columns to table function columns through names from SELECT expression.
+
+            auto insert_column = insert_structure.begin();
+            for (const auto & expression : select_query_hint->select()->as<ASTExpressionList>()->children)
             {
-                const auto * expression_list = select_query_hint->select()->as<ASTExpressionList>();
-                std::unordered_set<String> virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
-                Names columns_names;
-                bool have_asterisk = false;
-                /// First, check if we have only identifiers, asterisk and literals in select expression,
-                /// and if no, we cannot use the structure from insertion table.
-                for (const auto & expression : expression_list->children)
+                if (auto * identifier = expression->as<ASTIdentifier>())
                 {
-                    if (auto * identifier = expression->as<ASTIdentifier>())
+                    if (table_structure.hasPhysical(identifier->name()))
+                        table_structure.modify(identifier->name(), [&insert_column](ColumnDescription & column){ column.type = insert_column->type; });
+                    ++insert_column;
+                }
+                else if (expression->as<ASTAsterisk>())
+                {
+                    for (const auto & column : table_structure)
                     {
-                        columns_names.push_back(identifier->name());
-                    }
-                    else if (expression->as<ASTAsterisk>())
-                    {
-                        have_asterisk = true;
-                    }
-                    else if (!expression->as<ASTLiteral>())
-                    {
-                        use_columns_from_insert_query = false;
-                        break;
+                        table_structure.modify(column.name, [&insert_column](ColumnDescription & column){ column.type = insert_column->type; });
+                        ++insert_column;
+                        if (insert_column == insert_structure.end())
+                            break;
                     }
                 }
+                else
+                    ++insert_column;
 
-                /// Check that all identifiers are column names from insertion table and not virtual column names from storage.
-                for (const auto & column_name : columns_names)
-                {
-                    if (!structure_hint.has(column_name) || virtual_column_names.contains(column_name))
-                    {
-                        use_columns_from_insert_query = false;
-                        break;
-                    }
-                }
-
-                /// If we don't have asterisk but only subset of columns, we should use
-                /// structure from insertion table only in case when table function
-                /// supports reading subset of columns from data.
-                if (use_columns_from_insert_query && !have_asterisk && !columns_names.empty())
-                {
-                    /// For input function we should check if input format supports reading subset of columns.
-                    if (table_function_ptr->getName() == "input")
-                        use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat());
-                    else
-                        use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
-                }
+                if (insert_column == insert_structure.end())
+                    break;
             }
 
-            if (use_columns_from_insert_query)
-                table_function_ptr->setStructureHint(structure_hint);
+            table_function_ptr->setStructureHint(table_structure);
         }
 
         res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());

From 83ca006fcbfeb2390b4a3437659a0c9227d3fdbc Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 24 Mar 2023 13:16:54 +0000
Subject: [PATCH 002/277] same fix for analyzer

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 55 ++++++++++++++---------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index f5f577a20ab..eb0aed558b5 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6097,25 +6097,9 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
     if (!nested_table_function)
         expressions_visitor.visit(table_function_node_typed.getArgumentsNode());
 
-    const auto & table_function_factory = TableFunctionFactory::instance();
-    const auto & table_function_name = table_function_node_typed.getTableFunctionName();
-
     auto & scope_context = scope.context;
 
-    TableFunctionPtr table_function_ptr = table_function_factory.tryGet(table_function_name, scope_context);
-    if (!table_function_ptr)
-    {
-        auto hints = TableFunctionFactory::instance().getHints(table_function_name);
-        if (!hints.empty())
-            throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
-                "Unknown table function {}. Maybe you meant: {}",
-                table_function_name,
-                DB::toString(hints));
-        else
-            throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
-                "Unknown table function {}",
-                table_function_name);
-    }
+    TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_function_node_typed.toAST(), scope_context);
 
     if (!nested_table_function &&
         scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions &&
@@ -6125,9 +6109,40 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
         const auto & insertion_table = scope_context->getInsertionTable();
         if (!insertion_table.empty())
         {
-            auto insertion_table_storage = DatabaseCatalog::instance().getTable(insertion_table, scope_context);
-            const auto & structure_hint = insertion_table_storage->getInMemoryMetadataPtr()->columns;
-            table_function_ptr->setStructureHint(structure_hint);
+            auto & expression_list = scope.scope_node->as<QueryNode &>().getProjection();
+            const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
+            auto table_structure = table_function_ptr->getActualTableStructure(scope_context);
+
+            /// Insert table matches columns against SELECT expression by position, so we want to map
+            /// insert table columns to table function columns through names from SELECT expression.
+
+            auto insert_column = insert_structure.begin();
+            for (const auto & expression : expression_list)
+            {
+                if (auto * identifier_node = expression->as<IdentifierNode>())
+                {
+                    if (table_structure.hasPhysical(identifier_node->getIdentifier().getFullName()))
+                        table_structure.modify(identifier_node->getIdentifier().getFullName(), [&insert_column](ColumnDescription & column){ column.type = insert_column->type; });
+                    ++insert_column;
+                }
+                else if (auto * matcher_node = expression->as<MatcherNode>(); matcher_node && matcher_node->getMatcherType() == MatcherNodeType::ASTERISK)
+                {
+                    for (const auto & column : table_structure)
+                    {
+                        table_structure.modify(column.name, [&insert_column](ColumnDescription & column){ column.type = insert_column->type; });
+                        ++insert_column;
+                        if (insert_column == insert_structure.end())
+                            break;
+                    }
+                }
+                else
+                    ++insert_column;
+
+                if (insert_column == insert_structure.end())
+                    break;
+            }
+
+            table_function_ptr->setStructureHint(table_structure);
         }
     }
 

From e64500e72b3e8537587b6a3c6b980b54bc392ebb Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Fri, 24 Mar 2023 13:34:11 +0000
Subject: [PATCH 003/277] Automatic style fix

---
 docker/test/performance-comparison/perf.py           |  2 ++
 docker/test/performance-comparison/report.py         |  1 -
 tests/ci/clickhouse_helper.py                        |  1 -
 tests/ci/docker_images_check.py                      |  1 -
 tests/ci/get_previous_release_tag.py                 |  1 -
 tests/ci/report.py                                   |  2 +-
 tests/integration/helpers/cluster.py                 |  3 +--
 tests/integration/helpers/network.py                 |  2 --
 .../pytest_xdist_logging_to_separate_files.py        |  1 +
 .../test_detach_part_wrong_partition_id.py           |  1 -
 .../test_cluster_copier/test_three_nodes.py          |  1 -
 .../test_cluster_copier/test_two_nodes.py            |  1 -
 tests/integration/test_composable_protocols/test.py  |  1 -
 .../test_create_query_constraints/test.py            |  2 --
 .../common.py                                        |  1 -
 tests/integration/test_disks_app_func/test.py        |  1 -
 .../test_distributed_ddl_parallel/test.py            |  1 +
 tests/integration/test_fetch_memory_usage/test.py    |  1 -
 .../scripts/stress_test.py                           |  1 -
 tests/integration/test_jbod_balancer/test.py         |  1 -
 .../test_keeper_and_access_storage/test.py           |  1 +
 tests/integration/test_keeper_back_to_back/test.py   |  2 +-
 tests/integration/test_keeper_persistent_log/test.py |  1 -
 .../test_keeper_zookeeper_converter/test.py          |  1 -
 tests/integration/test_merge_tree_load_parts/test.py |  6 +++---
 .../s3_endpoint/endpoint.py                          |  1 -
 .../test_merge_tree_settings_constraints/test.py     |  1 -
 .../test_old_parts_finally_removed/test.py           |  1 -
 tests/integration/test_partition/test.py             |  4 +++-
 tests/integration/test_password_constraints/test.py  |  1 -
 tests/integration/test_read_only_table/test.py       |  1 -
 .../test_reload_auxiliary_zookeepers/test.py         |  1 -
 .../s3_endpoint/endpoint.py                          |  1 +
 tests/integration/test_s3_with_proxy/test.py         |  1 +
 .../integration/test_ssl_cert_authentication/test.py |  1 -
 tests/integration/test_storage_kafka/kafka_pb2.py    |  1 -
 .../test_storage_kafka/message_with_repeated_pb2.py  |  1 -
 tests/integration/test_storage_kafka/social_pb2.py   |  1 -
 tests/integration/test_storage_kafka/test.py         | 12 ++----------
 tests/integration/test_storage_nats/nats_pb2.py      |  1 -
 .../test_storage_postgresql_replica/test.py          |  1 -
 .../test_storage_rabbitmq/rabbitmq_pb2.py            |  1 -
 tests/integration/test_storage_rabbitmq/test.py      |  3 ---
 tests/integration/test_storage_s3/test.py            |  1 +
 .../test_storage_s3/test_invalid_env_credentials.py  |  1 +
 tests/integration/test_system_merges/test.py         |  1 -
 tests/integration/test_ttl_move/test.py              |  2 +-
 tests/integration/test_zero_copy_fetch/test.py       |  1 -
 utils/changelog-simple/format-changelog.py           |  1 +
 utils/keeper-overload/keeper-overload.py             |  2 +-
 50 files changed, 23 insertions(+), 57 deletions(-)

diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py
index 65bf49c2914..7a4e6386d0d 100755
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@@ -26,6 +26,7 @@ logging.basicConfig(
 total_start_seconds = time.perf_counter()
 stage_start_seconds = total_start_seconds
 
+
 # Thread executor that does not hides exception that happens during function
 # execution, and rethrows it after join()
 class SafeThread(Thread):
@@ -158,6 +159,7 @@ for e in subst_elems:
 
     available_parameters[name] = values
 
+
 # Takes parallel lists of templates, substitutes them with all combos of
 # parameters. The set of parameters is determined based on the first list.
 # Note: keep the order of queries -- sometimes we have DROP IF EXISTS
diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py
index 782cf29863c..214f2d550b4 100755
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@@ -670,7 +670,6 @@ if args.report == "main":
     )
 
 elif args.report == "all-queries":
-
     print((header_template.format()))
 
     add_tested_commits()
diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py
index d60a9e6afd1..64b64896f66 100644
--- a/tests/ci/clickhouse_helper.py
+++ b/tests/ci/clickhouse_helper.py
@@ -141,7 +141,6 @@ def prepare_tests_results_for_clickhouse(
     report_url: str,
     check_name: str,
 ) -> List[dict]:
-
     pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
     base_ref = "master"
     head_ref = "master"
diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py
index 192d216614e..f2b1105b3b0 100644
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@@ -96,7 +96,6 @@ def get_images_dict(repo_path: str, image_file_path: str) -> ImagesDict:
 def get_changed_docker_images(
     pr_info: PRInfo, images_dict: ImagesDict
 ) -> Set[DockerImage]:
-
     if not images_dict:
         return set()
 
diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py
index c6fe6cd5fb5..c2d279f7fec 100755
--- a/tests/ci/get_previous_release_tag.py
+++ b/tests/ci/get_previous_release_tag.py
@@ -51,7 +51,6 @@ def find_previous_release(
 
     for release in releases:
         if release.version < server_version:
-
             # Check if the artifact exists on GitHub.
             # It can be not true for a short period of time
             # after creating a tag for a new release before uploading the packages.
diff --git a/tests/ci/report.py b/tests/ci/report.py
index 947fb33d905..ddee035d26f 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -473,7 +473,7 @@ def create_build_html_report(
     commit_url: str,
 ) -> str:
     rows = ""
-    for (build_result, build_log_url, artifact_urls) in zip(
+    for build_result, build_log_url, artifact_urls in zip(
         build_results, build_logs_urls, artifact_urls_list
     ):
         row = "<tr>"
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index dc5ada81995..a9a996e0a5f 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -63,6 +63,7 @@ DEFAULT_ENV_NAME = ".env"
 
 SANITIZER_SIGN = "=================="
 
+
 # to create docker-compose env file
 def _create_env_file(path, variables):
     logging.debug(f"Env {variables} stored in {path}")
@@ -1454,7 +1455,6 @@ class ClickHouseCluster:
         config_root_name="clickhouse",
         extra_configs=[],
     ) -> "ClickHouseInstance":
-
         """Add an instance to the cluster.
 
         name - the name of the instance directory and the value of the 'instance' macro in ClickHouse.
@@ -3089,7 +3089,6 @@ class ClickHouseInstance:
         config_root_name="clickhouse",
         extra_configs=[],
     ):
-
         self.name = name
         self.base_cmd = cluster.base_cmd
         self.docker_id = cluster.get_instance_docker_id(self.name)
diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py
index e408c9beec1..471aa2bdc2e 100644
--- a/tests/integration/helpers/network.py
+++ b/tests/integration/helpers/network.py
@@ -216,7 +216,6 @@ class _NetworkManager:
         container_exit_timeout=60,
         docker_api_version=os.environ.get("DOCKER_API_VERSION"),
     ):
-
         self.container_expire_timeout = container_expire_timeout
         self.container_exit_timeout = container_exit_timeout
 
@@ -232,7 +231,6 @@ class _NetworkManager:
 
     def _ensure_container(self):
         if self._container is None or self._container_expire_time <= time.time():
-
             for i in range(5):
                 if self._container is not None:
                     try:
diff --git a/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py b/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py
index d424ad58fa4..370aa23a014 100644
--- a/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py
+++ b/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py
@@ -1,6 +1,7 @@
 import logging
 import os.path
 
+
 # Makes the parallel workers of pytest-xdist to log to separate files.
 # Without this function all workers will log to the same log file
 # and mix everything together making it much more difficult for troubleshooting.
diff --git a/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py b/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py
index 02fccfae4e5..a6f7a8653da 100644
--- a/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py
+++ b/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py
@@ -24,7 +24,6 @@ def start_cluster():
 
 
 def test_detach_part_wrong_partition_id(start_cluster):
-
     # Here we create table with partition by UUID.
     node_21_6.query(
         "create table tab (id UUID, value UInt32) engine = MergeTree PARTITION BY (id) order by tuple()"
diff --git a/tests/integration/test_cluster_copier/test_three_nodes.py b/tests/integration/test_cluster_copier/test_three_nodes.py
index 31d6c0448f4..e7d07757adb 100644
--- a/tests/integration/test_cluster_copier/test_three_nodes.py
+++ b/tests/integration/test_cluster_copier/test_three_nodes.py
@@ -19,7 +19,6 @@ cluster = ClickHouseCluster(__file__)
 def started_cluster():
     global cluster
     try:
-
         for name in ["first", "second", "third"]:
             cluster.add_instance(
                 name,
diff --git a/tests/integration/test_cluster_copier/test_two_nodes.py b/tests/integration/test_cluster_copier/test_two_nodes.py
index 10ab7d03b00..2b6fcf6cac2 100644
--- a/tests/integration/test_cluster_copier/test_two_nodes.py
+++ b/tests/integration/test_cluster_copier/test_two_nodes.py
@@ -19,7 +19,6 @@ cluster = ClickHouseCluster(__file__)
 def started_cluster():
     global cluster
     try:
-
         for name in ["first_of_two", "second_of_two"]:
             instance = cluster.add_instance(
                 name,
diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py
index bc87fea5296..df74cfffa54 100644
--- a/tests/integration/test_composable_protocols/test.py
+++ b/tests/integration/test_composable_protocols/test.py
@@ -63,7 +63,6 @@ def netcat(hostname, port, content):
 
 
 def test_connections():
-
     client = Client(server.ip_address, 9000, command=cluster.client_bin_path)
     assert client.query("SELECT 1") == "1\n"
 
diff --git a/tests/integration/test_create_query_constraints/test.py b/tests/integration/test_create_query_constraints/test.py
index 8df043fd24b..33c41b4f161 100644
--- a/tests/integration/test_create_query_constraints/test.py
+++ b/tests/integration/test_create_query_constraints/test.py
@@ -25,7 +25,6 @@ def start_cluster():
 
 
 def test_create_query_const_constraints():
-
     instance.query("CREATE USER u_const SETTINGS max_threads = 1 CONST")
     instance.query("GRANT ALL ON *.* TO u_const")
 
@@ -57,7 +56,6 @@ def test_create_query_const_constraints():
 
 
 def test_create_query_minmax_constraints():
-
     instance.query("CREATE USER u_minmax SETTINGS max_threads = 4 MIN 2 MAX 6")
     instance.query("GRANT ALL ON *.* TO u_minmax")
 
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py
index b38e81b0227..01addae2542 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py
@@ -348,7 +348,6 @@ class RangedLayoutTester(BaseLayoutTester):
         self.layouts = LAYOUTS_RANGED
 
     def execute(self, layout_name, node):
-
         if layout_name not in self.layout_to_dictionary:
             raise RuntimeError("Source doesn't support layout: {}".format(layout_name))
 
diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py
index 027ef8feed0..2428c53854e 100644
--- a/tests/integration/test_disks_app_func/test.py
+++ b/tests/integration/test_disks_app_func/test.py
@@ -7,7 +7,6 @@ import pytest
 def started_cluster():
     global cluster
     try:
-
         cluster = ClickHouseCluster(__file__)
         cluster.add_instance(
             "disks_app_test", main_configs=["config.xml"], with_minio=True
diff --git a/tests/integration/test_distributed_ddl_parallel/test.py b/tests/integration/test_distributed_ddl_parallel/test.py
index 6ebfe472e09..eb98dd3e230 100644
--- a/tests/integration/test_distributed_ddl_parallel/test.py
+++ b/tests/integration/test_distributed_ddl_parallel/test.py
@@ -10,6 +10,7 @@ from helpers.cluster import ClickHouseCluster
 
 cluster = ClickHouseCluster(__file__)
 
+
 # By default the exceptions that was throwed in threads will be ignored
 # (they will not mark the test as failed, only printed to stderr).
 #
diff --git a/tests/integration/test_fetch_memory_usage/test.py b/tests/integration/test_fetch_memory_usage/test.py
index a4371140150..7591cc0e8a9 100644
--- a/tests/integration/test_fetch_memory_usage/test.py
+++ b/tests/integration/test_fetch_memory_usage/test.py
@@ -18,7 +18,6 @@ def started_cluster():
 
 
 def test_huge_column(started_cluster):
-
     if (
         node.is_built_with_thread_sanitizer()
         or node.is_built_with_memory_sanitizer()
diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py
index b8bafb3d0c1..fe69d72c1c7 100644
--- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py
+++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py
@@ -13,7 +13,6 @@ number_of_iterations = 100
 
 
 def perform_request():
-
     buffer = BytesIO()
     crl = pycurl.Curl()
     crl.setopt(pycurl.INTERFACE, client_ip)
diff --git a/tests/integration/test_jbod_balancer/test.py b/tests/integration/test_jbod_balancer/test.py
index e746698611a..df34a075d5a 100644
--- a/tests/integration/test_jbod_balancer/test.py
+++ b/tests/integration/test_jbod_balancer/test.py
@@ -45,7 +45,6 @@ def start_cluster():
 
 
 def check_balance(node, table):
-
     partitions = node.query(
         """
         WITH
diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py
index 6ec307f7082..0314825b6b7 100644
--- a/tests/integration/test_keeper_and_access_storage/test.py
+++ b/tests/integration/test_keeper_and_access_storage/test.py
@@ -10,6 +10,7 @@ node1 = cluster.add_instance(
     "node1", main_configs=["configs/keeper.xml"], stay_alive=True
 )
 
+
 # test that server is able to start
 @pytest.fixture(scope="module")
 def started_cluster():
diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py
index 73fface02b4..b737ac284d2 100644
--- a/tests/integration/test_keeper_back_to_back/test.py
+++ b/tests/integration/test_keeper_back_to_back/test.py
@@ -546,7 +546,6 @@ def test_random_requests(started_cluster):
 
 
 def test_end_of_session(started_cluster):
-
     fake_zk1 = None
     fake_zk2 = None
     genuine_zk1 = None
@@ -685,6 +684,7 @@ def test_concurrent_watches(started_cluster):
             nonlocal watches_created
             nonlocal all_paths_created
             fake_zk.ensure_path(global_path + "/" + str(i))
+
             # new function each time
             def dumb_watch(event):
                 nonlocal dumb_watch_triggered_counter
diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py
index 70cc14fe26d..4164ffb33d3 100644
--- a/tests/integration/test_keeper_persistent_log/test.py
+++ b/tests/integration/test_keeper_persistent_log/test.py
@@ -163,7 +163,6 @@ def test_state_duplicate_restart(started_cluster):
 
 # http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html
 def test_ephemeral_after_restart(started_cluster):
-
     try:
         node_zk = None
         node_zk2 = None
diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py
index 063421bf922..de5a9416119 100644
--- a/tests/integration/test_keeper_zookeeper_converter/test.py
+++ b/tests/integration/test_keeper_zookeeper_converter/test.py
@@ -114,7 +114,6 @@ def start_clickhouse():
 
 
 def copy_zookeeper_data(make_zk_snapshots):
-
     if make_zk_snapshots:  # force zookeeper to create snapshot
         generate_zk_snapshot()
     else:
diff --git a/tests/integration/test_merge_tree_load_parts/test.py b/tests/integration/test_merge_tree_load_parts/test.py
index 777b6f14fc6..dfbe00c8e28 100644
--- a/tests/integration/test_merge_tree_load_parts/test.py
+++ b/tests/integration/test_merge_tree_load_parts/test.py
@@ -148,17 +148,17 @@ def test_merge_tree_load_parts_corrupted(started_cluster):
     node1.query("SYSTEM WAIT LOADING PARTS mt_load_parts_2")
 
     def check_parts_loading(node, partition, loaded, failed, skipped):
-        for (min_block, max_block) in loaded:
+        for min_block, max_block in loaded:
             part_name = f"{partition}_{min_block}_{max_block}"
             assert node.contains_in_log(f"Loading Active part {part_name}")
             assert node.contains_in_log(f"Finished loading Active part {part_name}")
 
-        for (min_block, max_block) in failed:
+        for min_block, max_block in failed:
             part_name = f"{partition}_{min_block}_{max_block}"
             assert node.contains_in_log(f"Loading Active part {part_name}")
             assert not node.contains_in_log(f"Finished loading Active part {part_name}")
 
-        for (min_block, max_block) in skipped:
+        for min_block, max_block in skipped:
             part_name = f"{partition}_{min_block}_{max_block}"
             assert not node.contains_in_log(f"Loading Active part {part_name}")
             assert not node.contains_in_log(f"Finished loading Active part {part_name}")
diff --git a/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py b/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py
index b6567dfebc5..4613fdb850b 100644
--- a/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py
+++ b/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py
@@ -42,7 +42,6 @@ def delete(_bucket):
 
 @route("/<_bucket>/<_path:path>", ["GET", "POST", "PUT", "DELETE"])
 def server(_bucket, _path):
-
     # It's delete query for failed part
     if _path.endswith("delete"):
         response.set_header("Location", "http://minio1:9001/" + _bucket + "/" + _path)
diff --git a/tests/integration/test_merge_tree_settings_constraints/test.py b/tests/integration/test_merge_tree_settings_constraints/test.py
index 0bb0179108d..be6e2a31873 100644
--- a/tests/integration/test_merge_tree_settings_constraints/test.py
+++ b/tests/integration/test_merge_tree_settings_constraints/test.py
@@ -20,7 +20,6 @@ def start_cluster():
 
 
 def test_merge_tree_settings_constraints():
-
     assert "Setting storage_policy should not be changed" in instance.query_and_get_error(
         f"CREATE TABLE wrong_table (number Int64) engine = MergeTree() ORDER BY number SETTINGS storage_policy = 'secret_policy'"
     )
diff --git a/tests/integration/test_old_parts_finally_removed/test.py b/tests/integration/test_old_parts_finally_removed/test.py
index 108b72c5ccd..5347d433419 100644
--- a/tests/integration/test_old_parts_finally_removed/test.py
+++ b/tests/integration/test_old_parts_finally_removed/test.py
@@ -63,7 +63,6 @@ def test_part_finally_removed(started_cluster):
     )
 
     for i in range(60):
-
         if (
             node1.query(
                 "SELECT count() from system.parts WHERE table = 'drop_outdated_part'"
diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py
index ae4393fc6f6..a34141c6189 100644
--- a/tests/integration/test_partition/test.py
+++ b/tests/integration/test_partition/test.py
@@ -528,7 +528,9 @@ def test_make_clone_in_detached(started_cluster):
         ["cp", "-r", path + "all_0_0_0", path + "detached/broken_all_0_0_0"]
     )
     assert_eq_with_retry(instance, "select * from clone_in_detached", "\n")
-    assert ["broken_all_0_0_0",] == sorted(
+    assert [
+        "broken_all_0_0_0",
+    ] == sorted(
         instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n")
     )
 
diff --git a/tests/integration/test_password_constraints/test.py b/tests/integration/test_password_constraints/test.py
index e3628861b28..9cdff51caa1 100644
--- a/tests/integration/test_password_constraints/test.py
+++ b/tests/integration/test_password_constraints/test.py
@@ -17,7 +17,6 @@ def start_cluster():
 
 
 def test_complexity_rules(start_cluster):
-
     error_message = "DB::Exception: Invalid password. The password should: be at least 12 characters long, contain at least 1 numeric character, contain at least 1 lowercase character, contain at least 1 uppercase character, contain at least 1 special character"
     assert error_message in node.query_and_get_error(
         "CREATE USER u_1 IDENTIFIED WITH plaintext_password BY ''"
diff --git a/tests/integration/test_read_only_table/test.py b/tests/integration/test_read_only_table/test.py
index 914c6a99508..df084f9dbbd 100644
--- a/tests/integration/test_read_only_table/test.py
+++ b/tests/integration/test_read_only_table/test.py
@@ -49,7 +49,6 @@ def start_cluster():
 
 
 def test_restart_zookeeper(start_cluster):
-
     for table_id in range(NUM_TABLES):
         node1.query(
             f"INSERT INTO test_table_{table_id} VALUES (1), (2), (3), (4), (5);"
diff --git a/tests/integration/test_reload_auxiliary_zookeepers/test.py b/tests/integration/test_reload_auxiliary_zookeepers/test.py
index bb1455333fc..476c5dee99e 100644
--- a/tests/integration/test_reload_auxiliary_zookeepers/test.py
+++ b/tests/integration/test_reload_auxiliary_zookeepers/test.py
@@ -20,7 +20,6 @@ def start_cluster():
 
 
 def test_reload_auxiliary_zookeepers(start_cluster):
-
     node.query(
         "CREATE TABLE simple (date Date, id UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', 'node') ORDER BY tuple() PARTITION BY date;"
     )
diff --git a/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py b/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py
index d6a732cc681..1d33ca02f86 100644
--- a/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py
+++ b/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 from bottle import request, route, run, response
 
+
 # Handle for MultipleObjectsDelete.
 @route("/<_bucket>", ["POST"])
 def delete(_bucket):
diff --git a/tests/integration/test_s3_with_proxy/test.py b/tests/integration/test_s3_with_proxy/test.py
index 1102d190a87..1af040c3c30 100644
--- a/tests/integration/test_s3_with_proxy/test.py
+++ b/tests/integration/test_s3_with_proxy/test.py
@@ -5,6 +5,7 @@ import time
 import pytest
 from helpers.cluster import ClickHouseCluster
 
+
 # Runs simple proxy resolver in python env container.
 def run_resolver(cluster):
     container_id = cluster.get_container_id("resolver")
diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py
index 7c62ca0d8b6..b3570b6e281 100644
--- a/tests/integration/test_ssl_cert_authentication/test.py
+++ b/tests/integration/test_ssl_cert_authentication/test.py
@@ -87,7 +87,6 @@ config = """<clickhouse>
 
 
 def execute_query_native(node, query, user, cert_name):
-
     config_path = f"{SCRIPT_DIR}/configs/client.xml"
 
     formatted = config.format(
diff --git a/tests/integration/test_storage_kafka/kafka_pb2.py b/tests/integration/test_storage_kafka/kafka_pb2.py
index 7de1363bbf1..3e47af6c1e0 100644
--- a/tests/integration/test_storage_kafka/kafka_pb2.py
+++ b/tests/integration/test_storage_kafka/kafka_pb2.py
@@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages(
     DESCRIPTOR, "clickhouse_path.format_schemas.kafka_pb2", globals()
 )
 if _descriptor._USE_C_DESCRIPTORS == False:
-
     DESCRIPTOR._options = None
     _KEYVALUEPAIR._serialized_start = 46
     _KEYVALUEPAIR._serialized_end = 88
diff --git a/tests/integration/test_storage_kafka/message_with_repeated_pb2.py b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py
index 4d1a23c0b43..3715a9bea04 100644
--- a/tests/integration/test_storage_kafka/message_with_repeated_pb2.py
+++ b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py
@@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages(
     DESCRIPTOR, "clickhouse_path.format_schemas.message_with_repeated_pb2", globals()
 )
 if _descriptor._USE_C_DESCRIPTORS == False:
-
     DESCRIPTOR._options = None
     DESCRIPTOR._serialized_options = b"H\001"
     _MESSAGE._serialized_start = 62
diff --git a/tests/integration/test_storage_kafka/social_pb2.py b/tests/integration/test_storage_kafka/social_pb2.py
index 830ade81d33..f91a7bd0539 100644
--- a/tests/integration/test_storage_kafka/social_pb2.py
+++ b/tests/integration/test_storage_kafka/social_pb2.py
@@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages(
     DESCRIPTOR, "clickhouse_path.format_schemas.social_pb2", globals()
 )
 if _descriptor._USE_C_DESCRIPTORS == False:
-
     DESCRIPTOR._options = None
     _USER._serialized_start = 47
     _USER._serialized_end = 90
diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py
index 51952ac1eb7..3a4fa6c6bfe 100644
--- a/tests/integration/test_storage_kafka/test.py
+++ b/tests/integration/test_storage_kafka/test.py
@@ -121,7 +121,7 @@ def kafka_create_topic(
 
 def kafka_delete_topic(admin_client, topic, max_retries=50):
     result = admin_client.delete_topics([topic])
-    for (topic, e) in result.topic_error_codes:
+    for topic, e in result.topic_error_codes:
         if e == 0:
             logging.debug(f"Topic {topic} deleted")
         else:
@@ -917,9 +917,7 @@ def describe_consumer_group(kafka_cluster, name):
         member_info["client_id"] = client_id
         member_info["client_host"] = client_host
         member_topics_assignment = []
-        for (topic, partitions) in MemberAssignment.decode(
-            member_assignment
-        ).assignment:
+        for topic, partitions in MemberAssignment.decode(member_assignment).assignment:
             member_topics_assignment.append({"topic": topic, "partitions": partitions})
         member_info["assignment"] = member_topics_assignment
         res.append(member_info)
@@ -1537,7 +1535,6 @@ def test_kafka_protobuf_no_delimiter(kafka_cluster):
 
 
 def test_kafka_materialized_view(kafka_cluster):
-
     instance.query(
         """
         DROP TABLE IF EXISTS test.view;
@@ -2315,7 +2312,6 @@ def test_kafka_virtual_columns2(kafka_cluster):
 
 
 def test_kafka_produce_key_timestamp(kafka_cluster):
-
     admin_client = KafkaAdminClient(
         bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port)
     )
@@ -2444,7 +2440,6 @@ def test_kafka_insert_avro(kafka_cluster):
 
 
 def test_kafka_produce_consume_avro(kafka_cluster):
-
     admin_client = KafkaAdminClient(
         bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port)
     )
@@ -4031,7 +4026,6 @@ def test_kafka_predefined_configuration(kafka_cluster):
 
 # https://github.com/ClickHouse/ClickHouse/issues/26643
 def test_issue26643(kafka_cluster):
-
     # for backporting:
     # admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092")
     admin_client = KafkaAdminClient(
@@ -4313,7 +4307,6 @@ def test_row_based_formats(kafka_cluster):
         "RowBinaryWithNamesAndTypes",
         "MsgPack",
     ]:
-
         print(format_name)
 
         kafka_create_topic(admin_client, format_name)
@@ -4438,7 +4431,6 @@ def test_block_based_formats_2(kafka_cluster):
         "ORC",
         "JSONCompactColumns",
     ]:
-
         kafka_create_topic(admin_client, format_name)
 
         instance.query(
diff --git a/tests/integration/test_storage_nats/nats_pb2.py b/tests/integration/test_storage_nats/nats_pb2.py
index 4330ff57950..e9e5cb72363 100644
--- a/tests/integration/test_storage_nats/nats_pb2.py
+++ b/tests/integration/test_storage_nats/nats_pb2.py
@@ -31,7 +31,6 @@ ProtoKeyValue = _reflection.GeneratedProtocolMessageType(
 _sym_db.RegisterMessage(ProtoKeyValue)
 
 if _descriptor._USE_C_DESCRIPTORS == False:
-
     DESCRIPTOR._options = None
     _PROTOKEYVALUE._serialized_start = 45
     _PROTOKEYVALUE._serialized_end = 88
diff --git a/tests/integration/test_storage_postgresql_replica/test.py b/tests/integration/test_storage_postgresql_replica/test.py
index 5df8b9029e6..8666d7ae58c 100644
--- a/tests/integration/test_storage_postgresql_replica/test.py
+++ b/tests/integration/test_storage_postgresql_replica/test.py
@@ -706,7 +706,6 @@ def test_abrupt_connection_loss_while_heavy_replication(started_cluster):
 
 
 def test_abrupt_server_restart_while_heavy_replication(started_cluster):
-
     # FIXME (kssenii) temporary disabled
     if instance.is_built_with_sanitizer():
         pytest.skip("Temporary disabled (FIXME)")
diff --git a/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py b/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py
index e017b4e66c2..a5845652eef 100644
--- a/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py
+++ b/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py
@@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages(
     DESCRIPTOR, "clickhouse_path.format_schemas.rabbitmq_pb2", globals()
 )
 if _descriptor._USE_C_DESCRIPTORS == False:
-
     DESCRIPTOR._options = None
     _KEYVALUEPROTO._serialized_start = 49
     _KEYVALUEPROTO._serialized_end = 92
diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 2e54f21787a..53b6c4109ef 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -2864,7 +2864,6 @@ def test_rabbitmq_predefined_configuration(rabbitmq_cluster):
 
 
 def test_rabbitmq_msgpack(rabbitmq_cluster):
-
     instance.query(
         """
         drop table if exists rabbit_in;
@@ -2908,7 +2907,6 @@ def test_rabbitmq_msgpack(rabbitmq_cluster):
 
 
 def test_rabbitmq_address(rabbitmq_cluster):
-
     instance2.query(
         """
         drop table if exists rabbit_in;
@@ -3243,7 +3241,6 @@ def test_block_based_formats_2(rabbitmq_cluster):
         "ORC",
         "JSONCompactColumns",
     ]:
-
         print(format_name)
 
         instance.query(
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 8b20727a7b5..4d493d9526b 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -18,6 +18,7 @@ MINIO_INTERNAL_PORT = 9001
 
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
 
+
 # Creates S3 bucket for tests and allows anonymous read-write access to it.
 def prepare_s3_bucket(started_cluster):
     # Allows read-write access for bucket without authorization.
diff --git a/tests/integration/test_storage_s3/test_invalid_env_credentials.py b/tests/integration/test_storage_s3/test_invalid_env_credentials.py
index 2f5d9349904..aa6479a2ed3 100644
--- a/tests/integration/test_storage_s3/test_invalid_env_credentials.py
+++ b/tests/integration/test_storage_s3/test_invalid_env_credentials.py
@@ -11,6 +11,7 @@ MINIO_INTERNAL_PORT = 9001
 
 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
 
+
 # Creates S3 bucket for tests and allows anonymous read-write access to it.
 def prepare_s3_bucket(started_cluster):
     # Allows read-write access for bucket without authorization.
diff --git a/tests/integration/test_system_merges/test.py b/tests/integration/test_system_merges/test.py
index 0a469bd7bbd..ff303afe19e 100644
--- a/tests/integration/test_system_merges/test.py
+++ b/tests/integration/test_system_merges/test.py
@@ -171,7 +171,6 @@ def test_mutation_simple(started_cluster, replicated):
     starting_block = 0 if replicated else 1
 
     try:
-
         for node in nodes:
             node.query(
                 f"create table {name} (a Int64) engine={engine} order by tuple()"
diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py
index 99978cbf6dc..89824293320 100644
--- a/tests/integration/test_ttl_move/test.py
+++ b/tests/integration/test_ttl_move/test.py
@@ -1863,7 +1863,7 @@ def test_ttl_move_if_exists(started_cluster, name, dest_type):
                 )
             )
 
-        for (node, policy) in zip(
+        for node, policy in zip(
             [node1, node2], ["only_jbod_1", "small_jbod_with_external"]
         ):
             node.query(
diff --git a/tests/integration/test_zero_copy_fetch/test.py b/tests/integration/test_zero_copy_fetch/test.py
index b71752528d3..9b9aa5e0da7 100644
--- a/tests/integration/test_zero_copy_fetch/test.py
+++ b/tests/integration/test_zero_copy_fetch/test.py
@@ -16,7 +16,6 @@ cluster = ClickHouseCluster(__file__)
 @pytest.fixture(scope="module")
 def started_cluster():
     try:
-
         cluster.add_instance(
             "node1",
             main_configs=["configs/storage_conf.xml"],
diff --git a/utils/changelog-simple/format-changelog.py b/utils/changelog-simple/format-changelog.py
index d5e1518270e..01f2694dd0f 100755
--- a/utils/changelog-simple/format-changelog.py
+++ b/utils/changelog-simple/format-changelog.py
@@ -20,6 +20,7 @@ parser.add_argument(
 )
 args = parser.parse_args()
 
+
 # This function mirrors the PR description checks in ClickhousePullRequestTrigger.
 # Returns False if the PR should not be mentioned changelog.
 def parse_one_pull_request(item):
diff --git a/utils/keeper-overload/keeper-overload.py b/utils/keeper-overload/keeper-overload.py
index bdb4563c713..0a059b10588 100755
--- a/utils/keeper-overload/keeper-overload.py
+++ b/utils/keeper-overload/keeper-overload.py
@@ -166,7 +166,7 @@ def main(args):
     keeper_bench_path = args.keeper_bench_path
 
     keepers = []
-    for (port, server_id) in zip(PORTS, SERVER_IDS):
+    for port, server_id in zip(PORTS, SERVER_IDS):
         keepers.append(
             Keeper(
                 keeper_binary_path, server_id, port, workdir, args.with_thread_fuzzer

From bc107c70fa863237685c5c353937e9d4af9dd674 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 1 Mar 2023 18:50:51 +0100
Subject: [PATCH 004/277] merge and mutation make thread group for setting
 memory trackers right

---
 src/Common/MemoryTracker.cpp                  |  9 +++
 src/Common/MemoryTracker.h                    |  2 +
 src/Common/ThreadStatus.h                     |  8 +--
 src/Storages/MergeTree/MergeList.cpp          | 72 +++++++------------
 src/Storages/MergeTree/MergeList.h            | 27 +++----
 .../MergeTree/MergePlainMergeTreeTask.cpp     |  4 +-
 src/Storages/MergeTree/MergeTreeData.cpp      |  2 +-
 .../MergeTree/MutatePlainMergeTreeTask.cpp    |  4 +-
 .../ReplicatedMergeMutateTaskBase.cpp         |  4 +-
 9 files changed, 56 insertions(+), 76 deletions(-)

diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index e2129e1013e..4cfb7f764e5 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -28,6 +28,7 @@
 #include <random>
 #include <cstdlib>
 #include <string>
+#include <magic_enum.hpp>
 
 
 namespace
@@ -118,6 +119,14 @@ MemoryTracker::~MemoryTracker()
     }
 }
 
+String MemoryTracker::getDebugLog() const
+{
+    return fmt::format("MemoryTracker(addr {} level {} peak {} ammount {})",
+                       size_t(this),
+                       magic_enum::enum_name(level),
+                       ReadableSize(getPeak()),
+                       ReadableSize(get()));
+}
 
 void MemoryTracker::logPeakMemoryUsage()
 {
diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h
index 66b56730b75..66037345eb0 100644
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@@ -215,6 +215,8 @@ public:
 
     /// Prints info about peak memory consumption into log.
     void logPeakMemoryUsage();
+
+    String getDebugLog() const;
 };
 
 extern MemoryTracker total_memory_tracker;
diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index 77c924f9650..4f7e9ca6830 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -40,7 +40,7 @@ class TaskStatsInfoGetter;
 class InternalTextLogsQueue;
 struct ViewRuntimeData;
 class QueryViewsLog;
-class MemoryTrackerThreadSwitcher;
+class ThreadGroupSwitcher;
 using InternalTextLogsQueuePtr = std::shared_ptr<InternalTextLogsQueue>;
 using InternalTextLogsQueueWeakPtr = std::weak_ptr<InternalTextLogsQueue>;
 
@@ -176,12 +176,6 @@ private:
     bool performance_counters_finalized = false;
 
     String query_id_from_query_context;
-    /// Requires access to query_id.
-    friend class MemoryTrackerThreadSwitcher;
-    void setQueryId(const String & query_id_)
-    {
-        query_id_from_query_context = query_id_;
-    }
 
     struct TimePoint
     {
diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index fa1887a02e6..4705733bb80 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -11,38 +11,24 @@ namespace DB
 {
 
 
-MemoryTrackerThreadSwitcher::MemoryTrackerThreadSwitcher(MergeListEntry & merge_list_entry_)
+ThreadGroupSwitcher::ThreadGroupSwitcher(MergeListEntry & merge_list_entry_)
     : merge_list_entry(merge_list_entry_)
 {
-    // Each merge is executed into separate background processing pool thread
-    background_thread_memory_tracker = CurrentThread::getMemoryTracker();
-    background_thread_memory_tracker_prev_parent = background_thread_memory_tracker->getParent();
-    background_thread_memory_tracker->setParent(&merge_list_entry->memory_tracker);
+    prev_thread_group = CurrentThread::getGroup();
+    if (!prev_thread_group)
+        return;
 
-    prev_untracked_memory_limit = current_thread->untracked_memory_limit;
-    current_thread->untracked_memory_limit = merge_list_entry->max_untracked_memory;
-
-    /// Avoid accounting memory from another mutation/merge
-    /// (NOTE: consider moving such code to ThreadFromGlobalPool and related places)
-    prev_untracked_memory = current_thread->untracked_memory;
-    current_thread->untracked_memory = merge_list_entry->untracked_memory;
-
-    prev_query_id = std::string(current_thread->getQueryId());
-    current_thread->setQueryId(merge_list_entry->query_id);
+    CurrentThread::detachGroupIfNotDetached();
+    CurrentThread::attachToGroup(merge_list_entry_->thread_group);
 }
 
-
-MemoryTrackerThreadSwitcher::~MemoryTrackerThreadSwitcher()
+ThreadGroupSwitcher::~ThreadGroupSwitcher()
 {
-    // Unplug memory_tracker from current background processing pool thread
-    background_thread_memory_tracker->setParent(background_thread_memory_tracker_prev_parent);
+    if (!prev_thread_group)
+        return;
 
-    current_thread->untracked_memory_limit = prev_untracked_memory_limit;
-
-    merge_list_entry->untracked_memory = current_thread->untracked_memory;
-    current_thread->untracked_memory = prev_untracked_memory;
-
-    current_thread->setQueryId(prev_query_id);
+    CurrentThread::detachGroup();
+    CurrentThread::attachTo(prev_thread_group);
 }
 
 MergeListElement::MergeListElement(
@@ -55,7 +41,6 @@ MergeListElement::MergeListElement(
     , result_part_path{future_part->path}
     , result_part_info{future_part->part_info}
     , num_parts{future_part->parts.size()}
-    , max_untracked_memory(settings.max_untracked_memory)
     , query_id(table_id.getShortName() + "::" + result_part_name)
     , thread_id{getThreadId()}
     , merge_type{future_part->merge_type}
@@ -78,6 +63,12 @@ MergeListElement::MergeListElement(
         is_mutation = (result_part_info.getDataVersion() != source_data_version);
     }
 
+    thread_group = std::make_shared<ThreadGroupStatus>();
+
+    thread_group->master_thread_id = CurrentThread::get().thread_id;
+
+    auto & memory_tracker = thread_group->memory_tracker;
+
     memory_tracker.setDescription(description.c_str());
     /// MemoryTracker settings should be set here, because
     /// later (see MemoryTrackerThreadSwitcher)
@@ -97,15 +88,16 @@ MergeListElement::MergeListElement(
     ///
     /// NOTE: Remember, that Thread level MemoryTracker does not have any settings,
     /// so it's parent is required.
-    MemoryTracker * query_memory_tracker = CurrentThread::getMemoryTracker();
-    MemoryTracker * parent_query_memory_tracker;
-    if (query_memory_tracker->level == VariableContext::Thread &&
-        (parent_query_memory_tracker = query_memory_tracker->getParent()) &&
-        parent_query_memory_tracker != &total_memory_tracker)
-    {
-        memory_tracker.setOrRaiseHardLimit(parent_query_memory_tracker->getHardLimit());
-    }
+    MemoryTracker * cur_memory_tracker = CurrentThread::getMemoryTracker();
 
+    if (cur_memory_tracker->level == VariableContext::Thread)
+    {
+        MemoryTracker * query_memory_tracker = cur_memory_tracker->getParent();
+        if (query_memory_tracker != &total_memory_tracker)
+        {
+            memory_tracker.setOrRaiseHardLimit(query_memory_tracker->getHardLimit());
+        }
+    }
 }
 
 MergeInfo MergeListElement::getInfo() const
@@ -128,7 +120,7 @@ MergeInfo MergeListElement::getInfo() const
     res.rows_read = rows_read.load(std::memory_order_relaxed);
     res.rows_written = rows_written.load(std::memory_order_relaxed);
     res.columns_written = columns_written.load(std::memory_order_relaxed);
-    res.memory_usage = memory_tracker.get();
+    res.memory_usage = getMemoryTracker().get();
     res.thread_id = thread_id;
     res.merge_type = toString(merge_type);
     res.merge_algorithm = toString(merge_algorithm.load(std::memory_order_relaxed));
@@ -142,14 +134,4 @@ MergeInfo MergeListElement::getInfo() const
     return res;
 }
 
-MergeListElement::~MergeListElement()
-{
-    if (untracked_memory != 0)
-    {
-        CurrentThread::getMemoryTracker()->adjustWithUntrackedMemory(untracked_memory);
-        untracked_memory = 0;
-    }
-}
-
-
 }
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 17a56272a57..b557f745f0d 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -5,6 +5,7 @@
 #include <Common/Stopwatch.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/MemoryTracker.h>
+#include <Common/ThreadStatus.h>
 #include <Storages/MergeTree/MergeType.h>
 #include <Storages/MergeTree/MergeAlgorithm.h>
 #include <Storages/MergeTree/MergeTreePartInfo.h>
@@ -63,23 +64,19 @@ struct Settings;
 
 /**
  * Since merge is executed with multiple threads, this class
- * switches the parent MemoryTracker to account all the memory used.
+ * switches the parent MemoryTracker as part of the thread group to account all the memory used.
  */
-class MemoryTrackerThreadSwitcher : boost::noncopyable
+class ThreadGroupSwitcher : boost::noncopyable
 {
 public:
-    explicit MemoryTrackerThreadSwitcher(MergeListEntry & merge_list_entry_);
-    ~MemoryTrackerThreadSwitcher();
+    explicit ThreadGroupSwitcher(MergeListEntry & merge_list_entry_);
+    ~ThreadGroupSwitcher();
 private:
     MergeListEntry & merge_list_entry;
-    MemoryTracker * background_thread_memory_tracker;
-    MemoryTracker * background_thread_memory_tracker_prev_parent = nullptr;
-    Int64 prev_untracked_memory_limit;
-    Int64 prev_untracked_memory;
-    String prev_query_id;
+    ThreadGroupStatusPtr prev_thread_group;
 };
 
-using MemoryTrackerThreadSwitcherPtr = std::unique_ptr<MemoryTrackerThreadSwitcher>;
+using ThreadGroupSwitcherPtr = std::unique_ptr<ThreadGroupSwitcher>;
 
 struct MergeListElement : boost::noncopyable
 {
@@ -113,10 +110,6 @@ struct MergeListElement : boost::noncopyable
     /// Updated only for Vertical algorithm
     std::atomic<UInt64> columns_written{};
 
-    /// Used to adjust ThreadStatus::untracked_memory_limit
-    UInt64 max_untracked_memory;
-    /// Used to avoid losing any allocation context
-    UInt64 untracked_memory = 0;
     /// Used for identifying mutations/merges in trace_log
     std::string query_id;
 
@@ -128,7 +121,7 @@ struct MergeListElement : boost::noncopyable
     /// Description used for logging
     /// Needs to outlive memory_tracker since it's used in its destructor
     const String description{"Mutate/Merge"};
-    MemoryTracker memory_tracker{VariableContext::Process};
+    ThreadGroupStatusPtr thread_group;
 
     MergeListElement(
         const StorageID & table_id_,
@@ -137,9 +130,9 @@ struct MergeListElement : boost::noncopyable
 
     MergeInfo getInfo() const;
 
-    MergeListElement * ptr() { return this; }
+    const MemoryTracker & getMemoryTracker() const { return thread_group->memory_tracker; }
 
-    ~MergeListElement();
+    MergeListElement * ptr() { return this; }
 
     MergeListElement & ref() { return *this; }
 };
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
index 1ccdefd2b6a..c2b3f9dfc8d 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@@ -32,9 +32,9 @@ bool MergePlainMergeTreeTask::executeStep()
     ProfileEventsScope profile_events_scope(&profile_counters);
 
     /// Make out memory tracker a parent of current thread memory tracker
-    MemoryTrackerThreadSwitcherPtr switcher;
+    ThreadGroupSwitcherPtr switcher;
     if (merge_list_entry)
-        switcher = std::make_unique<MemoryTrackerThreadSwitcher>(*merge_list_entry);
+        switcher = std::make_unique<ThreadGroupSwitcher>(*merge_list_entry);
 
     switch (state)
     {
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 84c301e5986..2a80dc7fb5e 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -7415,7 +7415,7 @@ try
 
         part_log_elem.rows = (*merge_entry)->rows_written;
         part_log_elem.bytes_uncompressed = (*merge_entry)->bytes_written_uncompressed;
-        part_log_elem.peak_memory_usage = (*merge_entry)->memory_tracker.getPeak();
+        part_log_elem.peak_memory_usage = (*merge_entry)->getMemoryTracker().getPeak();
     }
 
     if (profile_counters)
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index 9bd0f148d6c..a05a12eabe4 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -68,9 +68,9 @@ bool MutatePlainMergeTreeTask::executeStep()
     ProfileEventsScope profile_events_scope(&profile_counters);
 
     /// Make out memory tracker a parent of current thread memory tracker
-    MemoryTrackerThreadSwitcherPtr switcher;
+    ThreadGroupSwitcherPtr switcher;
     if (merge_list_entry)
-        switcher = std::make_unique<MemoryTrackerThreadSwitcher>(*merge_list_entry);
+        switcher = std::make_unique<ThreadGroupSwitcher>(*merge_list_entry);
 
     switch (state)
     {
diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
index 9ce7eb42666..0af96cec323 100644
--- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
@@ -128,9 +128,9 @@ bool ReplicatedMergeMutateTaskBase::executeStep()
 
 bool ReplicatedMergeMutateTaskBase::executeImpl()
 {
-    MemoryTrackerThreadSwitcherPtr switcher;
+    ThreadGroupSwitcherPtr switcher;
     if (merge_mutate_entry)
-        switcher = std::make_unique<MemoryTrackerThreadSwitcher>(*merge_mutate_entry);
+        switcher = std::make_unique<ThreadGroupSwitcher>(*merge_mutate_entry);
 
     auto remove_processed_entry = [&] () -> bool
     {

From da4f2bd9232e5fa19a9d6b5e8119b5a9627af301 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 3 Mar 2023 15:21:53 +0100
Subject: [PATCH 005/277] do not attach empty thread group

---
 src/Common/MemoryTracker.cpp         | 2 +-
 src/Storages/MergeTree/MergeList.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index 4cfb7f764e5..16c0d1e9eb1 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -121,7 +121,7 @@ MemoryTracker::~MemoryTracker()
 
 String MemoryTracker::getDebugLog() const
 {
-    return fmt::format("MemoryTracker(addr {} level {} peak {} ammount {})",
+    return fmt::format("MemoryTracker(addr {} level {} peak {} amount {})",
                        size_t(this),
                        magic_enum::enum_name(level),
                        ReadableSize(getPeak()),
diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index 4705733bb80..a9c55495b4b 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -28,7 +28,7 @@ ThreadGroupSwitcher::~ThreadGroupSwitcher()
         return;
 
     CurrentThread::detachGroup();
-    CurrentThread::attachTo(prev_thread_group);
+    CurrentThread::attachToGroup(prev_thread_group);
 }
 
 MergeListElement::MergeListElement(

From 6a6d45e6e76669677378c94dc3997a29dccec912 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 3 Mar 2023 17:21:45 +0100
Subject: [PATCH 006/277] set up performance_counters for thread group

---
 src/Storages/MergeTree/MergeList.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index a9c55495b4b..65725c1a632 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -65,6 +65,11 @@ MergeListElement::MergeListElement(
 
     thread_group = std::make_shared<ThreadGroupStatus>();
 
+    auto p_counters = CurrentThread::get().current_performance_counters;
+    while (p_counters && p_counters->level != VariableContext::Process)
+        p_counters = p_counters->getParent();
+    thread_group->performance_counters.setParent(p_counters);
+
     thread_group->master_thread_id = CurrentThread::get().thread_id;
 
     auto & memory_tracker = thread_group->memory_tracker;

From 0fcf7c0363ba5279878fe1c9d3f536a2b28cfe72 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Fri, 3 Mar 2023 23:09:36 +0100
Subject: [PATCH 007/277] std::optional instead shared_ptr

---
 src/Storages/MergeTree/MergeList.cpp          | 31 +++++++++++++++++--
 src/Storages/MergeTree/MergeList.h            | 13 +++++---
 .../MergeTree/MergePlainMergeTreeTask.cpp     |  4 +--
 .../MergeTree/MutatePlainMergeTreeTask.cpp    |  4 +--
 .../ReplicatedMergeMutateTaskBase.cpp         |  4 +--
 5 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index 65725c1a632..f53c6ad81ee 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -11,14 +11,14 @@ namespace DB
 {
 
 
-ThreadGroupSwitcher::ThreadGroupSwitcher(MergeListEntry & merge_list_entry_)
+ThreadGroupSwitcher::ThreadGroupSwitcher(MergeListEntry * merge_list_entry_)
     : merge_list_entry(merge_list_entry_)
 {
     prev_thread_group = CurrentThread::getGroup();
     if (!prev_thread_group)
         return;
 
-    CurrentThread::detachGroupIfNotDetached();
+    CurrentThread::detachGroup();
     CurrentThread::attachToGroup(merge_list_entry_->thread_group);
 }
 
@@ -27,10 +27,37 @@ ThreadGroupSwitcher::~ThreadGroupSwitcher()
     if (!prev_thread_group)
         return;
 
+    if (!merge_list_entry)
+        return;
+
     CurrentThread::detachGroup();
     CurrentThread::attachToGroup(prev_thread_group);
 }
 
+ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupSwitcher && other)
+{
+    this->swap(other);
+}
+
+ThreadGroupSwitcher& ThreadGroupSwitcher::operator=(ThreadGroupSwitcher && other)
+{
+    if (this != &other)
+    {
+        auto tmp = ThreadGroupSwitcher();
+        tmp.swap(other);
+        this->swap(tmp);
+    }
+    return *this;
+}
+
+void ThreadGroupSwitcher::swap(ThreadGroupSwitcher & other)
+{
+    std::swap(merge_list_entry, other.merge_list_entry);
+    std::swap(prev_thread_group, other.prev_thread_group);
+    std::swap(prev_query_id, other.prev_query_id);
+}
+
+
 MergeListElement::MergeListElement(
     const StorageID & table_id_,
     FutureMergedMutatedPartPtr future_part,
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index b557f745f0d..200a574698c 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -66,18 +66,21 @@ struct Settings;
  * Since merge is executed with multiple threads, this class
  * switches the parent MemoryTracker as part of the thread group to account all the memory used.
  */
-class ThreadGroupSwitcher : boost::noncopyable
+class ThreadGroupSwitcher : private boost::noncopyable
 {
 public:
-    explicit ThreadGroupSwitcher(MergeListEntry & merge_list_entry_);
+    explicit ThreadGroupSwitcher(MergeListEntry * merge_list_entry_);
+    ThreadGroupSwitcher(ThreadGroupSwitcher && other);
+    ThreadGroupSwitcher& operator=(ThreadGroupSwitcher && other);
     ~ThreadGroupSwitcher();
 private:
-    MergeListEntry & merge_list_entry;
+    ThreadGroupSwitcher() = default;
+    void swap(ThreadGroupSwitcher & other);
+
+    MergeListEntry * merge_list_entry = nullptr;
     ThreadGroupStatusPtr prev_thread_group;
 };
 
-using ThreadGroupSwitcherPtr = std::unique_ptr<ThreadGroupSwitcher>;
-
 struct MergeListElement : boost::noncopyable
 {
     const StorageID table_id;
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
index c2b3f9dfc8d..052c6467b37 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@@ -32,9 +32,9 @@ bool MergePlainMergeTreeTask::executeStep()
     ProfileEventsScope profile_events_scope(&profile_counters);
 
     /// Make out memory tracker a parent of current thread memory tracker
-    ThreadGroupSwitcherPtr switcher;
+    std::optional<ThreadGroupSwitcher> switcher;
     if (merge_list_entry)
-        switcher = std::make_unique<ThreadGroupSwitcher>(*merge_list_entry);
+        switcher = ThreadGroupSwitcher(merge_list_entry.get());
 
     switch (state)
     {
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index a05a12eabe4..673bfaa0d47 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -68,9 +68,9 @@ bool MutatePlainMergeTreeTask::executeStep()
     ProfileEventsScope profile_events_scope(&profile_counters);
 
     /// Make out memory tracker a parent of current thread memory tracker
-    ThreadGroupSwitcherPtr switcher;
+    std::optional<ThreadGroupSwitcher> switcher;
     if (merge_list_entry)
-        switcher = std::make_unique<ThreadGroupSwitcher>(*merge_list_entry);
+        switcher = ThreadGroupSwitcher(merge_list_entry.get());
 
     switch (state)
     {
diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
index 0af96cec323..fbc6522170e 100644
--- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
@@ -128,9 +128,9 @@ bool ReplicatedMergeMutateTaskBase::executeStep()
 
 bool ReplicatedMergeMutateTaskBase::executeImpl()
 {
-    ThreadGroupSwitcherPtr switcher;
+    std::optional<ThreadGroupSwitcher> switcher;
     if (merge_mutate_entry)
-        switcher = std::make_unique<ThreadGroupSwitcher>(*merge_mutate_entry);
+        switcher = ThreadGroupSwitcher(merge_mutate_entry.get());
 
     auto remove_processed_entry = [&] () -> bool
     {

From da3e744405bc9707ac8b453f0637303a191d4847 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 6 Mar 2023 13:53:19 +0100
Subject: [PATCH 008/277] set context from the master thread

---
 src/Storages/MergeTree/MergeList.cpp | 11 +++++++----
 src/Storages/MergeTree/MergeList.h   |  8 ++++----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index f53c6ad81ee..3a2e5a4ff35 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -34,12 +34,12 @@ ThreadGroupSwitcher::~ThreadGroupSwitcher()
     CurrentThread::attachToGroup(prev_thread_group);
 }
 
-ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupSwitcher && other)
+ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupSwitcher && other) noexcept
 {
     this->swap(other);
 }
 
-ThreadGroupSwitcher& ThreadGroupSwitcher::operator=(ThreadGroupSwitcher && other)
+ThreadGroupSwitcher& ThreadGroupSwitcher::operator=(ThreadGroupSwitcher && other) noexcept
 {
     if (this != &other)
     {
@@ -50,7 +50,7 @@ ThreadGroupSwitcher& ThreadGroupSwitcher::operator=(ThreadGroupSwitcher && other
     return *this;
 }
 
-void ThreadGroupSwitcher::swap(ThreadGroupSwitcher & other)
+void ThreadGroupSwitcher::swap(ThreadGroupSwitcher & other) noexcept
 {
     std::swap(merge_list_entry, other.merge_list_entry);
     std::swap(prev_thread_group, other.prev_thread_group);
@@ -92,7 +92,10 @@ MergeListElement::MergeListElement(
 
     thread_group = std::make_shared<ThreadGroupStatus>();
 
-    auto p_counters = CurrentThread::get().current_performance_counters;
+    thread_group->query_context = CurrentThread::get().getQueryContext();
+    thread_group->global_context = CurrentThread::get().getGlobalContext();
+
+    auto * p_counters = CurrentThread::get().current_performance_counters;
     while (p_counters && p_counters->level != VariableContext::Process)
         p_counters = p_counters->getParent();
     thread_group->performance_counters.setParent(p_counters);
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 200a574698c..2a166470203 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -70,12 +70,12 @@ class ThreadGroupSwitcher : private boost::noncopyable
 {
 public:
     explicit ThreadGroupSwitcher(MergeListEntry * merge_list_entry_);
-    ThreadGroupSwitcher(ThreadGroupSwitcher && other);
-    ThreadGroupSwitcher& operator=(ThreadGroupSwitcher && other);
+    ThreadGroupSwitcher(ThreadGroupSwitcher && other) noexcept;
+    ThreadGroupSwitcher& operator=(ThreadGroupSwitcher && other) noexcept;
     ~ThreadGroupSwitcher();
 private:
-    ThreadGroupSwitcher() = default;
-    void swap(ThreadGroupSwitcher & other);
+    ThreadGroupSwitcher() noexcept = default;
+    void swap(ThreadGroupSwitcher & other) noexcept;
 
     MergeListEntry * merge_list_entry = nullptr;
     ThreadGroupStatusPtr prev_thread_group;

From aeb8766ad59ec2b813a811142cb6e9cd0aa57572 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Sat, 18 Mar 2023 21:14:32 +0100
Subject: [PATCH 009/277] adjust after rebase

---
 src/Storages/MergeTree/MergeList.cpp          | 45 +++----------------
 src/Storages/MergeTree/MergeList.h            | 22 ++++++---
 .../MergeTree/MergePlainMergeTreeTask.cpp     |  2 +-
 .../MergeTree/MutatePlainMergeTreeTask.cpp    |  2 +-
 .../ReplicatedMergeMutateTaskBase.cpp         |  2 +-
 5 files changed, 25 insertions(+), 48 deletions(-)

diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index 3a2e5a4ff35..c13c5e6105e 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -11,15 +11,16 @@ namespace DB
 {
 
 
-ThreadGroupSwitcher::ThreadGroupSwitcher(MergeListEntry * merge_list_entry_)
-    : merge_list_entry(merge_list_entry_)
+ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupStatusPtr thread_group)
 {
+    chassert(thread_group);
+
     prev_thread_group = CurrentThread::getGroup();
     if (!prev_thread_group)
         return;
 
-    CurrentThread::detachGroup();
-    CurrentThread::attachToGroup(merge_list_entry_->thread_group);
+    CurrentThread::detachFromGroupIfNotDetached();
+    CurrentThread::attachToGroup(thread_group);
 }
 
 ThreadGroupSwitcher::~ThreadGroupSwitcher()
@@ -27,37 +28,10 @@ ThreadGroupSwitcher::~ThreadGroupSwitcher()
     if (!prev_thread_group)
         return;
 
-    if (!merge_list_entry)
-        return;
-
-    CurrentThread::detachGroup();
+    CurrentThread::detachFromGroupIfNotDetached();
     CurrentThread::attachToGroup(prev_thread_group);
 }
 
-ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupSwitcher && other) noexcept
-{
-    this->swap(other);
-}
-
-ThreadGroupSwitcher& ThreadGroupSwitcher::operator=(ThreadGroupSwitcher && other) noexcept
-{
-    if (this != &other)
-    {
-        auto tmp = ThreadGroupSwitcher();
-        tmp.swap(other);
-        this->swap(tmp);
-    }
-    return *this;
-}
-
-void ThreadGroupSwitcher::swap(ThreadGroupSwitcher & other) noexcept
-{
-    std::swap(merge_list_entry, other.merge_list_entry);
-    std::swap(prev_thread_group, other.prev_thread_group);
-    std::swap(prev_query_id, other.prev_query_id);
-}
-
-
 MergeListElement::MergeListElement(
     const StorageID & table_id_,
     FutureMergedMutatedPartPtr future_part,
@@ -90,18 +64,13 @@ MergeListElement::MergeListElement(
         is_mutation = (result_part_info.getDataVersion() != source_data_version);
     }
 
-    thread_group = std::make_shared<ThreadGroupStatus>();
-
-    thread_group->query_context = CurrentThread::get().getQueryContext();
-    thread_group->global_context = CurrentThread::get().getGlobalContext();
+    thread_group = ThreadGroupStatus::createForQuery(CurrentThread::get().getQueryContext(), {});
 
     auto * p_counters = CurrentThread::get().current_performance_counters;
     while (p_counters && p_counters->level != VariableContext::Process)
         p_counters = p_counters->getParent();
     thread_group->performance_counters.setParent(p_counters);
 
-    thread_group->master_thread_id = CurrentThread::get().thread_id;
-
     auto & memory_tracker = thread_group->memory_tracker;
 
     memory_tracker.setDescription(description.c_str());
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 2a166470203..37587d9d517 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -69,15 +69,23 @@ struct Settings;
 class ThreadGroupSwitcher : private boost::noncopyable
 {
 public:
-    explicit ThreadGroupSwitcher(MergeListEntry * merge_list_entry_);
-    ThreadGroupSwitcher(ThreadGroupSwitcher && other) noexcept;
-    ThreadGroupSwitcher& operator=(ThreadGroupSwitcher && other) noexcept;
-    ~ThreadGroupSwitcher();
-private:
     ThreadGroupSwitcher() noexcept = default;
-    void swap(ThreadGroupSwitcher & other) noexcept;
+    explicit ThreadGroupSwitcher(ThreadGroupStatusPtr thread_group);
+    ThreadGroupSwitcher(ThreadGroupSwitcher && other) noexcept
+        : prev_thread_group(std::move(other.prev_thread_group))
+    {
+        other.prev_thread_group = nullptr;
+    }
+    ThreadGroupSwitcher & operator=(ThreadGroupSwitcher && other) noexcept
+    {
+        chassert(this != &other);
+        prev_thread_group = std::move(other.prev_thread_group);
+        other.prev_thread_group = nullptr;
+        return *this;
+    }
+    ~ThreadGroupSwitcher();
 
-    MergeListEntry * merge_list_entry = nullptr;
+private:
     ThreadGroupStatusPtr prev_thread_group;
 };
 
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
index 052c6467b37..709a681619a 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@@ -34,7 +34,7 @@ bool MergePlainMergeTreeTask::executeStep()
     /// Make out memory tracker a parent of current thread memory tracker
     std::optional<ThreadGroupSwitcher> switcher;
     if (merge_list_entry)
-        switcher = ThreadGroupSwitcher(merge_list_entry.get());
+        switcher = ThreadGroupSwitcher((*merge_list_entry)->thread_group);
 
     switch (state)
     {
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index 673bfaa0d47..822098e4352 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -70,7 +70,7 @@ bool MutatePlainMergeTreeTask::executeStep()
     /// Make out memory tracker a parent of current thread memory tracker
     std::optional<ThreadGroupSwitcher> switcher;
     if (merge_list_entry)
-        switcher = ThreadGroupSwitcher(merge_list_entry.get());
+        switcher = ThreadGroupSwitcher((*merge_list_entry)->thread_group);
 
     switch (state)
     {
diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
index fbc6522170e..9368f7d8c51 100644
--- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
@@ -130,7 +130,7 @@ bool ReplicatedMergeMutateTaskBase::executeImpl()
 {
     std::optional<ThreadGroupSwitcher> switcher;
     if (merge_mutate_entry)
-        switcher = ThreadGroupSwitcher(merge_mutate_entry.get());
+        switcher = ThreadGroupSwitcher((*merge_mutate_entry)->thread_group);
 
     auto remove_processed_entry = [&] () -> bool
     {

From a4e2d09001b0f4cdd1f187bb3f3d95d2d10d2d79 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 24 Mar 2023 21:58:23 +0000
Subject: [PATCH 010/277] without schema inference from actual data

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 84 +++++++++++++++++------
 src/Interpreters/Context.cpp              | 66 +++++++++++++-----
 2 files changed, 110 insertions(+), 40 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 2c4f7c3dc3b..87891cf148e 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -112,6 +112,8 @@ namespace ErrorCodes
     extern const int ALIAS_REQUIRED;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int UNKNOWN_TABLE;
+    extern const int ILLEGAL_COLUMN;
+    extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
 }
 
 /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h before.
@@ -6087,9 +6089,24 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
     if (!nested_table_function)
         expressions_visitor.visit(table_function_node_typed.getArgumentsNode());
 
+    const auto & table_function_name = table_function_node_typed.getTableFunctionName();
+
     auto & scope_context = scope.context;
 
-    TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_function_node_typed.toAST(), scope_context);
+    TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().tryGet(table_function_name, scope_context);
+    if (!table_function_ptr)
+    {
+        auto hints = TableFunctionFactory::instance().getHints(table_function_name);
+        if (!hints.empty())
+            throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
+                "Unknown table function {}. Maybe you meant: {}",
+                table_function_name,
+                DB::toString(hints));
+        else
+            throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
+                "Unknown table function {}",
+                table_function_name);
+    }
 
     if (!nested_table_function &&
         scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions &&
@@ -6099,40 +6116,65 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
         const auto & insertion_table = scope_context->getInsertionTable();
         if (!insertion_table.empty())
         {
-            auto & expression_list = scope.scope_node->as<QueryNode &>().getProjection();
             const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
-            auto table_structure = table_function_ptr->getActualTableStructure(scope_context);
+            DB::ColumnsDescription structure_hint;
 
             /// Insert table matches columns against SELECT expression by position, so we want to map
             /// insert table columns to table function columns through names from SELECT expression.
 
             auto insert_column = insert_structure.begin();
-            for (const auto & expression : expression_list)
+            auto insert_structure_end = insert_structure.end();
+            auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
+            bool asterisk = false;
+            const auto & expression_list = scope.scope_node->as<QueryNode &>().getProjection();
+            auto expression = expression_list.begin();
+
+            for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
             {
-                if (auto * identifier_node = expression->as<IdentifierNode>())
+                if (auto * identifier_node = (*expression)->as<IdentifierNode>())
                 {
-                    if (table_structure.hasPhysical(identifier_node->getIdentifier().getFullName()))
-                        table_structure.modify(identifier_node->getIdentifier().getFullName(), [&insert_column](ColumnDescription & column){ column.type = insert_column->type; });
-                    ++insert_column;
-                }
-                else if (auto * matcher_node = expression->as<MatcherNode>(); matcher_node && matcher_node->getMatcherType() == MatcherNodeType::ASTERISK)
-                {
-                    for (const auto & column : table_structure)
+                    if (!virtual_column_names.contains(identifier_node->getIdentifier().getFullName()))
                     {
-                        table_structure.modify(column.name, [&insert_column](ColumnDescription & column){ column.type = insert_column->type; });
-                        ++insert_column;
-                        if (insert_column == insert_structure.end())
-                            break;
+                        if (asterisk)
+                            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+
+                        structure_hint.add({ identifier_node->getIdentifier().getFullName(), insert_column->type });
                     }
+
+                    if (asterisk)
+                        --insert_structure_end;
+                    else
+                        ++insert_column;
+                }
+                else if (auto * matcher_node = (*expression)->as<MatcherNode>(); matcher_node && matcher_node->getMatcherType() == MatcherNodeType::ASTERISK)
+                {
+                    if (asterisk)
+                        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only one asterisk can be used in INSERT SELECT query.");
+                    if (!structure_hint.empty())
+                        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+
+                    asterisk = true;
                 }
                 else
-                    ++insert_column;
-
-                if (insert_column == insert_structure.end())
-                    break;
+                {
+                    if (asterisk)
+                        --insert_structure_end;
+                    else
+                        ++insert_column;
+                }
             }
 
-            table_function_ptr->setStructureHint(table_structure);
+            if (expression != expression_list.end())
+                throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
+
+            if (asterisk)
+            {
+                for (; insert_column != insert_structure_end; ++insert_column)
+                    structure_hint.add({ insert_column->name, insert_column->type });
+            }
+
+            if (!structure_hint.empty())
+                table_function_ptr->setStructureHint(structure_hint);
         }
     }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 395c735a70a..3aa0f339cf2 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -165,6 +165,8 @@ namespace ErrorCodes
     extern const int UNKNOWN_READ_METHOD;
     extern const int NOT_IMPLEMENTED;
     extern const int UNKNOWN_FUNCTION;
+    extern const int ILLEGAL_COLUMN;
+    extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
 }
 
 
@@ -1395,38 +1397,64 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
         if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
         {
             const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
-            auto table_structure = table_function_ptr->getActualTableStructure(getQueryContext());
+            DB::ColumnsDescription structure_hint;
 
             /// Insert table matches columns against SELECT expression by position, so we want to map
             /// insert table columns to table function columns through names from SELECT expression.
 
             auto insert_column = insert_structure.begin();
-            for (const auto & expression : select_query_hint->select()->as<ASTExpressionList>()->children)
+            auto insert_structure_end = insert_structure.end();
+            auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
+            bool asterisk = false;
+            const auto & expression_list = select_query_hint->select()->as<ASTExpressionList>()->children;
+            auto expression = expression_list.begin();
+
+            for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
             {
-                if (auto * identifier = expression->as<ASTIdentifier>())
+                if (auto * identifier = (*expression)->as<ASTIdentifier>())
                 {
-                    if (table_structure.hasPhysical(identifier->name()))
-                        table_structure.modify(identifier->name(), [&insert_column](ColumnDescription & column){ column.type = insert_column->type; });
-                    ++insert_column;
-                }
-                else if (expression->as<ASTAsterisk>())
-                {
-                    for (const auto & column : table_structure)
+                    if (!virtual_column_names.contains(identifier->name()))
                     {
-                        table_structure.modify(column.name, [&insert_column](ColumnDescription & column){ column.type = insert_column->type; });
-                        ++insert_column;
-                        if (insert_column == insert_structure.end())
-                            break;
+                        if (asterisk)
+                            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+
+                        structure_hint.add({ identifier->name(), insert_column->type });
                     }
+
+                    if (asterisk)
+                        --insert_structure_end;
+                    else
+                        ++insert_column;
+                }
+                else if ((*expression)->as<ASTAsterisk>())
+                {
+                    if (asterisk)
+                        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only one asterisk can be used in INSERT SELECT query.");
+                    if (!structure_hint.empty())
+                        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+
+                    asterisk = true;
                 }
                 else
-                    ++insert_column;
-
-                if (insert_column == insert_structure.end())
-                    break;
+                {
+                    if (asterisk)
+                        --insert_structure_end;
+                    else
+                        ++insert_column;
+                }
             }
 
-            table_function_ptr->setStructureHint(table_structure);
+            if (expression != expression_list.end())
+                throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
+
+            if (asterisk)
+            {
+                for (; insert_column != insert_structure_end; ++insert_column)
+                    structure_hint.add({ insert_column->name, insert_column->type });
+            }
+
+            if (!structure_hint.empty())
+                table_function_ptr->setStructureHint(structure_hint);
         }
 
         res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());

From 07502600f7518d81f94b89095b70c2297f48a357 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 25 Mar 2023 05:15:10 +0000
Subject: [PATCH 011/277] clang wants it :\

---
 src/Interpreters/Context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 3aa0f339cf2..c787d387632 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1407,7 +1407,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
             bool asterisk = false;
             const auto & expression_list = select_query_hint->select()->as<ASTExpressionList>()->children;
-            auto expression = expression_list.begin();
+            const auto * expression = expression_list.begin();
 
             for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
             {

From adede9dcb0f9446adf9375ceca870ce024112bc4 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 25 Mar 2023 20:55:10 +0000
Subject: [PATCH 012/277] test fixed

---
 ...e_structure_from_insertion_table.reference |  6 ++++++
 ...458_use_structure_from_insertion_table.sql | 20 +++++++++----------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference
index 0ca28640270..7a004c58187 100644
--- a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference
+++ b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference
@@ -1,9 +1,15 @@
+\N	0
 \N	1
 1	2
+1	2
 \N	42
 \N	42
 \N	42
 \N	42
+\N	42
+\N	42
+42
+42
 \N
 \N
 \N
diff --git a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
index a609dc361fe..ac53e003521 100644
--- a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
+++ b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
@@ -9,17 +9,17 @@ set use_structure_from_insertion_table_in_table_functions=2;
 insert into test select * from file(02458_data.jsonl);
 insert into test select x, 1 from file(02458_data.jsonl);
 insert into test select x, y from file(02458_data.jsonl);
-insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
-insert into test select x, z from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
+insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError UNKNOWN_IDENTIFIER}
+insert into test select x, z from file(02458_data.jsonl);
 
 insert into test select * from file(02458_data.jsoncompacteachrow);
-insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
-insert into test select x, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
-insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
-insert into test select x, z from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
+insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
+insert into test select x, y from file(02458_data.jsoncompacteachrow);
+insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError UNKNOWN_IDENTIFIER}
+insert into test select x, z from file(02458_data.jsoncompacteachrow);
 
 insert into test select * from input() format CSV 1,2
-insert into test select x, y from input() format CSV 1,2 -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
+insert into test select x, y from input() format CSV 1,2
 insert into test select x, y from input() format JSONEachRow {"x" : null, "y" : 42}
 
 select * from test order by y;
@@ -28,10 +28,10 @@ drop table test;
 create table test (x Nullable(UInt32)) engine=Memory();
 insert into test select * from file(02458_data.jsonl);
 insert into test select x from file(02458_data.jsonl);
-insert into test select y from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
-insert into test select y as x from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
+insert into test select y from file(02458_data.jsonl);
+insert into test select y as x from file(02458_data.jsonl);
 
-insert into test select c1 from input() format CSV 1,2; -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
+insert into test select c1 from input() format CSV 1,2; -- {clientError INCORRECT_DATA}
 insert into test select x from input() format JSONEachRow {"x" : null, "y" : 42}
 
 select * from test order by x;

From 22da93e239ffd4402ba27aee4c982742082cc9fc Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Mar 2023 21:41:01 +0000
Subject: [PATCH 013/277] Cosmetics

---
 src/Functions/formatDateTime.cpp | 99 +++++++++++++++-----------------
 src/Functions/parseDateTime.cpp  | 26 ++++-----
 2 files changed, 58 insertions(+), 67 deletions(-)

diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index bbb4c3ba5b0..daea8b3a7b0 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -39,21 +39,17 @@ namespace ErrorCodes
 namespace
 {
 
-struct FormatDateTimeTraits
+enum class SupportInteger
 {
-    enum class SupportInteger
-    {
-        Yes,
-        No
-    };
-
-    enum class FormatSyntax
-    {
-        MySQL,
-        Joda
-    };
+    Yes,
+    No
 };
 
+enum class FormatSyntax
+{
+    MySQL,
+    Joda
+};
 
 template <typename DataType> struct InstructionValueTypeMap {};
 template <> struct InstructionValueTypeMap<DataTypeInt8>       { using InstructionValueType = UInt32; };
@@ -85,11 +81,9 @@ constexpr std::string_view weekdaysFull[] = {"Sunday", "Monday", "Tuesday", "Wed
 
 constexpr std::string_view weekdaysShort[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
 
-constexpr std::string_view monthsFull[]
-    = {"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"};
+constexpr std::string_view monthsFull[] = {"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"};
 
-constexpr std::string_view monthsShort[]
-    = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
+constexpr std::string_view monthsShort[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
 
 /** formatDateTime(time, 'format')
   * Performs formatting of time, according to provided format.
@@ -129,7 +123,7 @@ constexpr std::string_view monthsShort[]
   *
   * PS. We can make this function to return FixedString. Currently it returns String.
   */
-template <typename Name, FormatDateTimeTraits::SupportInteger support_integer, FormatDateTimeTraits::FormatSyntax format_syntax>
+template <typename Name, SupportInteger support_integer, FormatSyntax format_syntax>
 class FunctionFormatDateTimeImpl : public IFunction
 {
 private:
@@ -157,7 +151,7 @@ private:
         /// This is the reason why we use raw function pointer in MySQL format and std::function
         /// in Joda format.
         using Func = std::conditional_t<
-            format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL,
+            format_syntax == FormatSyntax::MySQL,
             size_t (*)(char *, Time, UInt64, UInt32, const DateLUTImpl &),
             std::function<size_t(char *, Time, UInt64, UInt32, const DateLUTImpl &)>>;
 
@@ -257,7 +251,10 @@ private:
             return pos;
         }
     public:
-        static size_t mysqlNoop(char *, Time, UInt64, UInt32, const DateLUTImpl &) { return 0; }
+        static size_t mysqlNoop(char *, Time, UInt64, UInt32, const DateLUTImpl &)
+        {
+            return 0;
+        }
 
         static size_t mysqlCentury(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
@@ -430,8 +427,7 @@ private:
             return writeNumber2(dest, ToSecondImpl::execute(source, timezone));
         }
 
-        static size_t
-        mysqlFractionalSecond(char * dest, Time /*source*/, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & /*timezone*/)
+        static size_t mysqlFractionalSecond(char * dest, Time /*source*/, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & /*timezone*/)
         {
             if (scale == 0)
                 scale = 1;
@@ -672,7 +668,7 @@ public:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if constexpr (support_integer == FormatDateTimeTraits::SupportInteger::Yes)
+        if constexpr (support_integer == SupportInteger::Yes)
         {
             if (arguments.size() != 1 && arguments.size() != 2 && arguments.size() != 3)
                 throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
@@ -718,7 +714,7 @@ public:
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, [[maybe_unused]] size_t input_rows_count) const override
     {
         ColumnPtr res;
-        if constexpr (support_integer == FormatDateTimeTraits::SupportInteger::Yes)
+        if constexpr (support_integer == SupportInteger::Yes)
         {
             if (arguments.size() == 1)
             {
@@ -793,7 +789,7 @@ public:
         using T = typename InstructionValueTypeMap<DataType>::InstructionValueType;
         std::vector<Instruction<T>> instructions;
         String out_template;
-        auto result_size = parseFormat(format, instructions, scale, out_template);
+        size_t out_template_size = parseFormat(format, instructions, scale, out_template);
 
         const DateLUTImpl * time_zone_tmp = nullptr;
         if (castType(arguments[0].type.get(), [&]([[maybe_unused]] const auto & type) { return true; }))
@@ -807,26 +803,26 @@ public:
         const auto & vec = times->getData();
 
         auto col_res = ColumnString::create();
-        auto & dst_data = col_res->getChars();
-        auto & dst_offsets = col_res->getOffsets();
-        dst_data.resize(vec.size() * (result_size + 1));
-        dst_offsets.resize(vec.size());
+        auto & res_data = col_res->getChars();
+        auto & res_offsets = col_res->getOffsets();
+        res_data.resize(vec.size() * (out_template_size + 1));
+        res_offsets.resize(vec.size());
 
-        if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL)
+        if constexpr (format_syntax == FormatSyntax::MySQL)
         {
-            /// Fill result with literals.
+            /// Fill result with template.
             {
-                UInt8 * begin = dst_data.data();
-                UInt8 * end = begin + dst_data.size();
-                UInt8 * pos = begin;
+                const UInt8 * const begin = res_data.data();
+                const UInt8 * const end = res_data.data() + res_data.size();
+                UInt8 * pos = res_data.data();
 
                 if (pos < end)
                 {
-                    memcpy(pos, out_template.data(), result_size + 1); /// With zero terminator.
-                    pos += result_size + 1;
+                    memcpy(pos, out_template.data(), out_template_size + 1); /// With zero terminator. mystring[mystring.size()] = '\0' is guaranteed since C++11.
+                    pos += out_template_size + 1;
                 }
 
-                /// Fill by copying exponential growing ranges.
+                /// Copy exponentially growing ranges.
                 while (pos < end)
                 {
                     size_t bytes_to_copy = std::min(pos - begin, end - pos);
@@ -836,7 +832,7 @@ public:
             }
         }
 
-        auto * begin = reinterpret_cast<char *>(dst_data.data());
+        auto * begin = reinterpret_cast<char *>(res_data.data());
         auto * pos = begin;
         for (size_t i = 0; i < vec.size(); ++i)
         {
@@ -844,9 +840,7 @@ public:
             {
                 const auto c = DecimalUtils::split(vec[i], scale);
                 for (auto & instruction : instructions)
-                {
                     instruction.perform(pos, static_cast<Int64>(c.whole), c.fractional, scale, time_zone);
-                }
             }
             else
             {
@@ -855,21 +849,19 @@ public:
             }
             *pos++ = '\0';
 
-            dst_offsets[i] = pos - begin;
+            res_offsets[i] = pos - begin;
         }
 
-        dst_data.resize(pos - begin);
+        res_data.resize(pos - begin);
         return col_res;
     }
 
     template <typename T>
     size_t parseFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, String & out_template) const
     {
-        static_assert(
-            format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL || format_syntax == FormatDateTimeTraits::FormatSyntax::Joda,
-            "format syntax must be one of MySQL or Joda");
+        static_assert(format_syntax == FormatSyntax::MySQL || format_syntax == FormatSyntax::Joda);
 
-        if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL)
+        if constexpr (format_syntax == FormatSyntax::MySQL)
             return parseMySQLFormat(format, instructions, scale, out_template);
         else
             return parseJodaFormat(format, instructions, scale, out_template);
@@ -914,13 +906,13 @@ public:
 
                 switch (*pos)
                 {
-                    // Abbreviated weekday [Mon...Sun]
+                    // Abbreviated weekday [Mon-Sun]
                     case 'a':
                         instructions.emplace_back(&Instruction<T>::mysqlDayOfWeekTextShort);
                         out_template += "Mon";
                         break;
 
-                    // Abbreviated month [Jan...Dec]
+                    // Abbreviated month [Jan-Dec]
                     case 'b':
                         instructions.emplace_back(&Instruction<T>::mysqlMonthOfYearTextShort);
                         out_template += "Jan";
@@ -958,12 +950,10 @@ public:
 
                     // Fractional seconds
                     case 'f':
-                    {
                         /// If the time data type has no fractional part, then we print '0' as the fractional part.
                         instructions.emplace_back(&Instruction<T>::mysqlFractionalSecond);
                         out_template += String(std::max<UInt32>(1, scale), '0');
                         break;
-                    }
 
                     // Short YYYY-MM-DD date, equivalent to %Y-%m-%d   2001-08-23
                     case 'F':
@@ -1013,7 +1003,7 @@ public:
                         out_template += "0";
                         break;
 
-                    // Full weekday [Monday...Sunday]
+                    // Full weekday [Monday-Sunday]
                     case 'W':
                         instructions.emplace_back(&Instruction<T>::mysqlDayOfWeekTextLong);
                         out_template += "Monday";
@@ -1186,6 +1176,7 @@ public:
         size_t reserve_size = 0;
         const char * pos = format.data();
         const char * end = format.data() + format.size();
+
         while (pos < end)
         {
             const char * cur_token = pos;
@@ -1392,10 +1383,10 @@ struct NameFromUnixTimeInJodaSyntax
 };
 
 
-using FunctionFormatDateTime = FunctionFormatDateTimeImpl<NameFormatDateTime, FormatDateTimeTraits::SupportInteger::No, FormatDateTimeTraits::FormatSyntax::MySQL>;
-using FunctionFromUnixTimestamp = FunctionFormatDateTimeImpl<NameFromUnixTime, FormatDateTimeTraits::SupportInteger::Yes, FormatDateTimeTraits::FormatSyntax::MySQL>;
-using FunctionFormatDateTimeInJodaSyntax = FunctionFormatDateTimeImpl<NameFormatDateTimeInJodaSyntax, FormatDateTimeTraits::SupportInteger::No, FormatDateTimeTraits::FormatSyntax::Joda>;
-using FunctionFromUnixTimestampInJodaSyntax = FunctionFormatDateTimeImpl<NameFromUnixTimeInJodaSyntax, FormatDateTimeTraits::SupportInteger::Yes, FormatDateTimeTraits::FormatSyntax::Joda>;
+using FunctionFormatDateTime = FunctionFormatDateTimeImpl<NameFormatDateTime, SupportInteger::No, FormatSyntax::MySQL>;
+using FunctionFromUnixTimestamp = FunctionFormatDateTimeImpl<NameFromUnixTime, SupportInteger::Yes, FormatSyntax::MySQL>;
+using FunctionFormatDateTimeInJodaSyntax = FunctionFormatDateTimeImpl<NameFormatDateTimeInJodaSyntax, SupportInteger::No, FormatSyntax::Joda>;
+using FunctionFromUnixTimestampInJodaSyntax = FunctionFormatDateTimeImpl<NameFromUnixTimeInJodaSyntax, SupportInteger::Yes, FormatSyntax::Joda>;
 
 }
 
diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index abee7e0d8f8..cd3c0d993d0 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -101,16 +101,16 @@ namespace
         bool is_year_of_era = false; /// If true, year is calculated from era and year of era, the latter cannot be zero or negative.
         bool has_year = false; /// Whether year was explicitly specified.
 
-        /// If is_clock_hour = true, is_hour_of_half_day = true, hour's range is [1, 12]
-        /// If is_clock_hour = true, is_hour_of_half_day = false, hour's range is [1, 24]
-        /// If is_clock_hour = false, is_hour_of_half_day = true, hour's range is [0, 11]
-        /// If is_clock_hour = false, is_hour_of_half_day = false, hour's range is [0, 23]
+        /// If hour_starts_at_1 = true, is_hour_of_half_day = true, hour's range is [1, 12]
+        /// If hour_starts_at_1 = true, is_hour_of_half_day = false, hour's range is [1, 24]
+        /// If hour_starts_at_1 = false, is_hour_of_half_day = true, hour's range is [0, 11]
+        /// If hour_starts_at_1 = false, is_hour_of_half_day = false, hour's range is [0, 23]
         Int32 hour = 0;
         Int32 minute = 0; /// range [0, 59]
         Int32 second = 0; /// range [0, 59]
 
         bool is_am = true; /// If is_hour_of_half_day = true and is_am = false (i.e. pm) then add 12 hours to the result DateTime
-        bool is_clock_hour = false; /// Whether the hour is clockhour
+        bool hour_starts_at_1 = false; /// Whether the hour is clockhour
         bool is_hour_of_half_day = false; /// Whether the hour is of half day
 
         bool has_time_zone_offset = false; /// If true, time zone offset is explicitly specified.
@@ -137,7 +137,7 @@ namespace
             second = 0;
 
             is_am = true;
-            is_clock_hour = false;
+            hour_starts_at_1 = false;
             is_hour_of_half_day = false;
 
             has_time_zone_offset = false;
@@ -275,23 +275,23 @@ namespace
                 throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Unknown half day of day: {}", text);
         }
 
-        void setHour(Int32 hour_, bool is_hour_of_half_day_ = false, bool is_clock_hour_ = false)
+        void setHour(Int32 hour_, bool is_hour_of_half_day_ = false, bool hour_starts_at_1_ = false)
         {
             Int32 max_hour;
             Int32 min_hour;
             Int32 new_hour = hour_;
-            if (!is_hour_of_half_day_ && !is_clock_hour_)
+            if (!is_hour_of_half_day_ && !hour_starts_at_1_)
             {
                 max_hour = 23;
                 min_hour = 0;
             }
-            else if (!is_hour_of_half_day_ && is_clock_hour_)
+            else if (!is_hour_of_half_day_ && hour_starts_at_1_)
             {
                 max_hour = 24;
                 min_hour = 1;
                 new_hour = hour_ % 24;
             }
-            else if (is_hour_of_half_day_ && !is_clock_hour_)
+            else if (is_hour_of_half_day_ && !hour_starts_at_1_)
             {
                 max_hour = 11;
                 min_hour = 0;
@@ -306,16 +306,16 @@ namespace
             if (hour_ < min_hour || hour_ > max_hour)
                 throw Exception(
                     ErrorCodes::CANNOT_PARSE_DATETIME,
-                    "Value {} for hour must be in the range [{}, {}] if_hour_of_half_day={} and is_clock_hour={}",
+                    "Value {} for hour must be in the range [{}, {}] if_hour_of_half_day={} and hour_starts_at_1={}",
                     hour,
                     max_hour,
                     min_hour,
                     is_hour_of_half_day_,
-                    is_clock_hour_);
+                    hour_starts_at_1_);
 
             hour = new_hour;
             is_hour_of_half_day = is_hour_of_half_day_;
-            is_clock_hour = is_clock_hour_;
+            hour_starts_at_1 = hour_starts_at_1_;
         }
 
         void setMinute(Int32 minute_)

From 3db38dbb5a1a227c749549faf4ec0f140bc267b7 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 Mar 2023 22:16:10 +0000
Subject: [PATCH 014/277] Replace mySQL date formatter M behavior from minutes
 to month name

---
 .../functions/date-time-functions.md          |   8 +-
 src/Functions/formatDateTime.cpp              | 576 +++++++++++++-----
 src/Functions/parseDateTime.cpp               |  40 +-
 .../00718_format_datetime.reference           |   4 +-
 .../0_stateless/00718_format_datetime.sql     |   3 +
 ...00921_datetime64_compatibility_long.python |   2 +-
 ...21_datetime64_compatibility_long.reference |   2 +-
 .../0_stateless/01411_from_unixtime.reference |   2 +-
 .../0_stateless/02564_date_format.reference   |   2 +-
 .../02668_parse_datetime.reference            |   7 +
 .../0_stateless/02668_parse_datetime.sql      |   6 +-
 11 files changed, 493 insertions(+), 159 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index d06ab253cf7..425d67ed5a0 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1276,16 +1276,16 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %k       | hour in 24h format (00-23)                              | 22         |
 | %l       | hour in 12h format (01-12)                              | 09         |
 | %m       | month as an integer number (01-12)                      | 01         |
-| %M       | minute (00-59)                                          | 33         |
+| %M       | full month name (January-December)                      | January    |
 | %n       | new-line character (‘’)                                 |            |
 | %p       | AM or PM designation                                    | PM         |
 | %Q       | Quarter (1-4)                                           | 1          |
-| %r       | 12-hour HH:MM AM/PM time, equivalent to %H:%M %p        | 10:30 PM   |
-| %R       | 24-hour HH:MM time, equivalent to %H:%M                 | 22:33      |
+| %r       | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p        | 10:30 PM   |
+| %R       | 24-hour HH:MM time, equivalent to %H:%i                 | 22:33      |
 | %s       | second (00-59)                                          | 44         |
 | %S       | second (00-59)                                          | 44         |
 | %t       | horizontal-tab character (’)                            |            |
-| %T       | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44   |
+| %T       | ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S | 22:33:44   |
 | %u       | ISO 8601 weekday as number with Monday as 1 (1-7)       | 2          |
 | %V       | ISO 8601 week number (01-53)                            | 01         |
 | %w       | weekday as a integer number with Sunday as 0 (0-6)      | 2          |
diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index daea8b3a7b0..c243222db91 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -109,13 +109,13 @@ constexpr std::string_view monthsShort[] = {"Jan", "Feb", "Mar", "Apr", "May", "
   *
   * Performance on Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz:
   *
-  * WITH formatDateTime(now() + number, '%H:%M:%S') AS x SELECT count() FROM system.numbers WHERE NOT ignore(x);
+  * WITH formatDateTime(now() + number, '%H:%i:%S') AS x SELECT count() FROM system.numbers WHERE NOT ignore(x);
   * - 97 million rows per second per core;
   *
   * WITH formatDateTime(toDateTime('2018-01-01 00:00:00') + number, '%F %T') AS x SELECT count() FROM system.numbers WHERE NOT ignore(x)
   * - 71 million rows per second per core;
   *
-  * select count() from (select formatDateTime(t, '%m/%d/%Y %H:%M:%S') from (select toDateTime('2018-01-01 00:00:00')+number as t from numbers(100000000)));
+  * select count() from (select formatDateTime(t, '%m/%d/%Y %H:%i:%S') from (select toDateTime('2018-01-01 00:00:00')+number as t from numbers(100000000)));
   * - 53 million rows per second per core;
   *
   * select count() from (select formatDateTime(t, 'Hello %Y World') from (select toDateTime('2018-01-01 00:00:00')+number as t from numbers(100000000)));
@@ -146,26 +146,34 @@ private:
     class Instruction
     {
     public:
-        /// Using std::function will cause performance degradation in MySQL format by 0.45x.
-        /// But std::function is required for Joda format to capture extra variables.
-        /// This is the reason why we use raw function pointer in MySQL format and std::function
-        /// in Joda format.
-        using Func = std::conditional_t<
-            format_syntax == FormatSyntax::MySQL,
-            size_t (*)(char *, Time, UInt64, UInt32, const DateLUTImpl &),
-            std::function<size_t(char *, Time, UInt64, UInt32, const DateLUTImpl &)>>;
+        /// Joda format generally requires capturing extra variables (i.e. holding state) which is more convenient with
+        /// std::function and std::bind. Unfortunately, std::function causes a performance degradation by 0.45x compared to raw function
+        /// pointers. For MySQL format, we generally prefer raw function pointers. Because of the special case that not all formatters are
+        /// fixed-width formatters (see mysqlLiteral), we still need to be able to store state. For that reason, we use member function
+        /// pointers instead of static function pointers.
+        using FuncMysql = size_t (Instruction<Time>::*)(char *, Time, UInt64, UInt32, const DateLUTImpl &);
+        FuncMysql func_mysql = nullptr;
 
-        Func func;
+        using FuncJoda = std::function<size_t(char *, Time, UInt64, UInt32, const DateLUTImpl &)>;
+        FuncJoda func_joda = nullptr;
 
         /// extra_shift is only used in MySQL format syntax. It is always 0 in Joda format syntax.
         size_t extra_shift = 0;
 
-        /// Instruction for appending date/time related number in specified format.
-        explicit Instruction(Func && func_) : func(std::move(func_)) {}
+        // Holds literal characters that will be copied into the output. Used by the mysqlLiteral instruction.
+        String literal;
+
+        Instruction() = default;
+
+        void setMysqlFunc(FuncMysql && func) { func_mysql = std::move(func); }
+        void setJodaFunc(FuncJoda && func) { func_joda = std::move(func); }
+        void setLiteral(std::string_view literal_) { literal = literal_; }
 
         void perform(char *& dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
         {
-            auto shift = func(dest, source, fractional_second, scale, timezone);
+            size_t shift = func_mysql
+                           ? ((static_cast<Instruction<Time>*>(this))->*func_mysql)(dest, source, fractional_second, scale, timezone)
+                           : func_joda(dest, source, fractional_second, scale, timezone);
             dest += shift + extra_shift;
         }
 
@@ -251,24 +259,30 @@ private:
             return pos;
         }
     public:
-        static size_t mysqlNoop(char *, Time, UInt64, UInt32, const DateLUTImpl &)
+        size_t mysqlNoop(char *, Time, UInt64, UInt32, const DateLUTImpl &)
         {
             return 0;
         }
 
-        static size_t mysqlCentury(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlLiteral(char * dest, Time, UInt64, UInt32, const DateLUTImpl &)
+        {
+            memcpy(dest, literal.data(), literal.size());
+            return literal.size();
+        }
+
+        size_t mysqlCentury(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             auto year = ToYearImpl::execute(source, timezone);
             auto century = year / 100;
             return writeNumber2(dest, century);
         }
 
-        static size_t mysqlDayOfMonth(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlDayOfMonth(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber2(dest, ToDayOfMonthImpl::execute(source, timezone));
         }
 
-        static size_t mysqlAmericanDate(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlAmericanDate(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             writeNumber2(dest, ToMonthImpl::execute(source, timezone));
             writeNumber2(dest + 3, ToDayOfMonthImpl::execute(source, timezone));
@@ -276,7 +290,7 @@ private:
             return 8;
         }
 
-        static size_t mysqlDayOfMonthSpacePadded(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlDayOfMonthSpacePadded(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             auto day = ToDayOfMonthImpl::execute(source, timezone);
             if (day < 10)
@@ -286,7 +300,7 @@ private:
             return 2;
         }
 
-        static size_t mysqlISO8601Date(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT
+        size_t mysqlISO8601Date(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             writeNumber4(dest, ToYearImpl::execute(source, timezone));
             writeNumber2(dest + 5, ToMonthImpl::execute(source, timezone));
@@ -294,12 +308,12 @@ private:
             return 10;
         }
 
-        static size_t mysqlDayOfYear(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlDayOfYear(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber3(dest, ToDayOfYearImpl::execute(source, timezone));
         }
 
-        static size_t mysqlMonth(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlMonth(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber2(dest, ToMonthImpl::execute(source, timezone));
         }
@@ -312,17 +326,17 @@ private:
             return str_view.size();
         }
 
-        static size_t mysqlMonthOfYearTextShort(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
+        size_t mysqlMonthOfYearTextShort(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
         {
             return monthOfYearText(dest, source, true, fractional_second, scale, timezone);
         }
 
-        static size_t mysqlMonthOfYearTextLong(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
+        size_t mysqlMonthOfYearTextLong(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
         {
             return monthOfYearText(dest, source, false, fractional_second, scale, timezone);
         }
 
-        static size_t mysqlDayOfWeek(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlDayOfWeek(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             *dest = '0' + ToDayOfWeekImpl::execute(source, 0, timezone);
             return 1;
@@ -339,65 +353,65 @@ private:
             return str_view.size();
         }
 
-        static size_t mysqlDayOfWeekTextShort(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
+        size_t mysqlDayOfWeekTextShort(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
         {
             return dayOfWeekText(dest, source, true, fractional_second, scale, timezone);
         }
 
-        static size_t mysqlDayOfWeekTextLong(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
+        size_t mysqlDayOfWeekTextLong(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
         {
             return dayOfWeekText(dest, source, false, fractional_second, scale, timezone);
         }
 
-        static size_t mysqlDayOfWeek0To6(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlDayOfWeek0To6(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
             *dest = '0' + (day == 7 ? 0 : day);
             return 1;
         }
 
-        static size_t mysqlISO8601Week(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT
+        size_t mysqlISO8601Week(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber2(dest, ToISOWeekImpl::execute(source, timezone));
         }
 
-        static size_t mysqlISO8601Year2(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT
+        size_t mysqlISO8601Year2(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber2(dest, ToISOYearImpl::execute(source, timezone) % 100);
         }
 
-        static size_t mysqlISO8601Year4(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT
+        size_t mysqlISO8601Year4(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber4(dest, ToISOYearImpl::execute(source, timezone));
         }
 
-        static size_t mysqlYear2(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlYear2(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber2(dest, ToYearImpl::execute(source, timezone) % 100);
         }
 
-        static size_t mysqlYear4(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlYear4(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber4(dest, ToYearImpl::execute(source, timezone));
         }
 
-        static size_t mysqlHour24(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlHour24(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber2(dest, ToHourImpl::execute(source, timezone));
         }
 
-        static size_t mysqlHour12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlHour12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             auto x = ToHourImpl::execute(source, timezone);
             return writeNumber2(dest, x == 0 ? 12 : (x > 12 ? x - 12 : x));
         }
 
-        static size_t mysqlMinute(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlMinute(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber2(dest, ToMinuteImpl::execute(source, timezone));
         }
 
-        static size_t mysqlAMPM(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT
+        static size_t AMPM(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT
         {
             auto hour = ToHourImpl::execute(source, timezone);
             dest[0] = hour >= 12 ? 'P' : 'A';
@@ -405,14 +419,19 @@ private:
             return 2;
         }
 
-        static size_t mysqlHHMM24(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlAMPM(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
+        {
+            return AMPM(dest, source, fractional_second, scale, timezone);
+        }
+
+        size_t mysqlHHMM24(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             writeNumber2(dest, ToHourImpl::execute(source, timezone));
             writeNumber2(dest + 3, ToMinuteImpl::execute(source, timezone));
             return 5;
         }
 
-        static size_t mysqlHHMM12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlHHMM12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             auto hour = ToHourImpl::execute(source, timezone);
             writeNumber2(dest, hour == 0 ? 12 : (hour > 12 ? hour - 12 : hour));
@@ -422,12 +441,12 @@ private:
             return 8;
         }
 
-        static size_t mysqlSecond(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlSecond(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             return writeNumber2(dest, ToSecondImpl::execute(source, timezone));
         }
 
-        static size_t mysqlFractionalSecond(char * dest, Time /*source*/, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & /*timezone*/)
+        size_t mysqlFractionalSecond(char * dest, Time /*source*/, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & /*timezone*/)
         {
             if (scale == 0)
                 scale = 1;
@@ -440,7 +459,7 @@ private:
             return scale;
         }
 
-        static size_t mysqlISO8601Time(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT
+        size_t mysqlISO8601Time(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) // NOLINT
         {
             writeNumber2(dest, ToHourImpl::execute(source, timezone));
             writeNumber2(dest + 3, ToMinuteImpl::execute(source, timezone));
@@ -448,7 +467,7 @@ private:
             return 8;
         }
 
-        static size_t mysqlTimezoneOffset(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlTimezoneOffset(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             auto offset = TimezoneOffsetImpl::execute(source, timezone);
             if (offset < 0)
@@ -462,7 +481,7 @@ private:
             return 5;
         }
 
-        static size_t mysqlQuarter(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        size_t mysqlQuarter(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
             *dest = '0' + ToQuarterImpl::execute(source, timezone);
             return 1;
@@ -571,7 +590,7 @@ private:
         static size_t jodaHalfDayOfDay(
             size_t /*min_represent_digits*/, char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
         {
-            return mysqlAMPM(dest, source, fractional_second, scale, timezone);
+            return AMPM(dest, source, fractional_second, scale, timezone);
         }
 
         static size_t jodaHourOfHalfDay(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
@@ -647,6 +666,34 @@ private:
         }
     };
 
+    [[noreturn]] static void throwPercentIsLastCharacterException()
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "'%' must not be the last character in the format string, use '%%' instead");
+    }
+
+    static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format)
+    {
+        constexpr std::array variable_width_formatter = {'M', 'W'};
+
+        for (size_t i = 0; i < format.size(); ++i)
+        {
+            switch (format[i])
+            {
+                case '%':
+                    if (i + 1 >= format.size())
+                        throwPercentIsLastCharacterException();
+                    if (std::any_of(variable_width_formatter.begin(), variable_width_formatter.end(), [&](char c){ return c == format[i + 1]; }))
+                        return false;
+                    i += 1;
+                    continue;
+                default:
+                    break;
+            }
+        }
+
+        return true;
+    }
+
 public:
     static constexpr auto name = Name::name;
 
@@ -786,10 +833,23 @@ public:
         if constexpr (std::is_same_v<DataType, DataTypeDateTime64>)
             scale = times->getScale();
 
+        /// For MySQL, we support two modes of execution:
+        /// - All formatters in the format string are fixed-width. As a result, all output rows will have the same with and structure. We
+        ///   take advantage of this and 1. create a "template" with placeholders from the format string, 2. allocate a result column large
+        ///   enough to store the template on each row, 3. copy the template into each result row 4. run instructions which replace the
+        ///   formatter placeholders. All other parts of the template (e.g. whitespaces) are already as desired and instructions skip over
+        ///   them (see 'extra_shift' in the formatters).
+        /// - The format string contains at least one variable-width formatter. Output rows will potentially be of different size.
+        ///   Steps 1. and 2. are performed as above (the result column is allocated based on a worst-case size estimation). The result
+        ///   column rows are NOT populated with the template and left uninitialized. We run the normal instructions for formatters AND
+        ///   instructions that copy literal characters before/between/after formatters. As a result, each byte of each result row is
+        ///   written which is obviously slow.
+        bool only_fixed_width_formatters = format_syntax == FormatSyntax::MySQL ? containsOnlyFixedWidthMySQLFormatters(format) : false;
+
         using T = typename InstructionValueTypeMap<DataType>::InstructionValueType;
         std::vector<Instruction<T>> instructions;
         String out_template;
-        size_t out_template_size = parseFormat(format, instructions, scale, out_template);
+        size_t out_template_size = parseFormat(format, instructions, scale, only_fixed_width_formatters, out_template);
 
         const DateLUTImpl * time_zone_tmp = nullptr;
         if (castType(arguments[0].type.get(), [&]([[maybe_unused]] const auto & type) { return true; }))
@@ -808,7 +868,7 @@ public:
         res_data.resize(vec.size() * (out_template_size + 1));
         res_offsets.resize(vec.size());
 
-        if constexpr (format_syntax == FormatSyntax::MySQL)
+        if (format_syntax == FormatSyntax::MySQL && only_fixed_width_formatters)
         {
             /// Fill result with template.
             {
@@ -857,32 +917,58 @@ public:
     }
 
     template <typename T>
-    size_t parseFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, String & out_template) const
+    size_t parseFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, bool only_fixed_width_formatters, String & out_template) const
     {
         static_assert(format_syntax == FormatSyntax::MySQL || format_syntax == FormatSyntax::Joda);
 
         if constexpr (format_syntax == FormatSyntax::MySQL)
-            return parseMySQLFormat(format, instructions, scale, out_template);
+            return parseMySQLFormat(format, instructions, scale, only_fixed_width_formatters, out_template);
         else
-            return parseJodaFormat(format, instructions, scale, out_template);
+            return parseJodaFormat(format, instructions, scale, only_fixed_width_formatters, out_template);
     }
 
     template <typename T>
-    size_t parseMySQLFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, String & out_template) const
+    size_t parseMySQLFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, bool only_fixed_width_formatters, String & out_template) const
     {
         auto add_extra_shift = [&](size_t amount)
         {
             if (instructions.empty())
-                instructions.emplace_back(&Instruction<T>::mysqlNoop);
+            {
+                Instruction<T> instruction;
+                instruction.setMysqlFunc(&Instruction<T>::mysqlNoop);
+                instructions.push_back(instruction);
+            }
             instructions.back().extra_shift += amount;
         };
 
-        auto add_instruction_or_extra_shift = [&](auto * func [[maybe_unused]], size_t amount [[maybe_unused]])
+        auto add_literal_instruction = [&](std::string_view literal)
         {
-            if constexpr (std::is_same_v<T, UInt32> || std::is_same_v<T, Int64>)
-                instructions.emplace_back(std::move(func));
-            else
+            Instruction<T> instruction;
+            instruction.setMysqlFunc(&Instruction<T>::mysqlLiteral);
+            instruction.setLiteral(literal);
+            instructions.push_back(instruction);
+        };
+
+        auto add_extra_shift_or_literal_instruction = [&](size_t amount, std::string_view literal)
+        {
+            if (only_fixed_width_formatters)
                 add_extra_shift(amount);
+            else
+                add_literal_instruction(literal);
+        };
+
+        auto add_time_instruction = [&]([[maybe_unused]] typename Instruction<T>::FuncMysql && func, [[maybe_unused]] size_t amount, [[maybe_unused]] std::string_view literal)
+        {
+            /// DateTime/DateTime64 --> insert instruction
+            /// Other types cannot provide the requested data --> write out template
+            if constexpr (std::is_same_v<T, UInt32> || std::is_same_v<T, Int64>)
+            {
+                Instruction<T> instruction;
+                instruction.setMysqlFunc(std::move(func));
+                instructions.push_back(instruction);
+            }
+            else
+                add_extra_shift_or_literal_instruction(amount, literal);
         };
 
         const char * pos = format.data();
@@ -890,244 +976,378 @@ public:
 
         while (true)
         {
-            const char * percent_pos = find_first_symbols<'%'>(pos, end);
+            const char * const percent_pos = find_first_symbols<'%'>(pos, end);
 
             if (percent_pos < end)
             {
                 if (pos < percent_pos)
                 {
-                    add_extra_shift(percent_pos - pos);
+                    /// Handle characters before next %
+                    add_extra_shift_or_literal_instruction(percent_pos - pos, std::string_view(pos, percent_pos - pos));
                     out_template += String(pos, percent_pos - pos);
                 }
 
                 pos = percent_pos + 1;
                 if (pos >= end)
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sign '%' is the last in format, if you need it, use '%%'");
+                    throwPercentIsLastCharacterException();
 
                 switch (*pos)
                 {
                     // Abbreviated weekday [Mon-Sun]
                     case 'a':
-                        instructions.emplace_back(&Instruction<T>::mysqlDayOfWeekTextShort);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfWeekTextShort);
+                        instructions.push_back(instruction);
                         out_template += "Mon";
                         break;
+                    }
 
                     // Abbreviated month [Jan-Dec]
                     case 'b':
-                        instructions.emplace_back(&Instruction<T>::mysqlMonthOfYearTextShort);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlMonthOfYearTextShort);
+                        instructions.push_back(instruction);
                         out_template += "Jan";
                         break;
+                    }
 
                     // Month as a integer number (01-12)
                     case 'c':
-                        instructions.emplace_back(&Instruction<T>::mysqlMonth);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
+                        instructions.push_back(instruction);
                         out_template += "00";
                         break;
+                    }
 
                     // Year, divided by 100, zero-padded
                     case 'C':
-                        instructions.emplace_back(&Instruction<T>::mysqlCentury);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlCentury);
+                        instructions.push_back(instruction);
                         out_template += "00";
                         break;
+                    }
 
                     // Day of month, zero-padded (01-31)
                     case 'd':
-                        instructions.emplace_back(&Instruction<T>::mysqlDayOfMonth);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfMonth);
+                        instructions.push_back(instruction);
                         out_template += "00";
                         break;
+                    }
 
                     // Short MM/DD/YY date, equivalent to %m/%d/%y
                     case 'D':
-                        instructions.emplace_back(&Instruction<T>::mysqlAmericanDate);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlAmericanDate);
+                        instructions.push_back(instruction);
                         out_template += "00/00/00";
                         break;
+                    }
 
                     // Day of month, space-padded ( 1-31)  23
                     case 'e':
-                        instructions.emplace_back(&Instruction<T>::mysqlDayOfMonthSpacePadded);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfMonthSpacePadded);
+                        instructions.push_back(instruction);
                         out_template += " 0";
                         break;
+                    }
+
+                    // Full month [January-December]
+                    case 'M':
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlMonthOfYearTextLong);
+                        instructions.push_back(instruction);
+                        out_template += "September"; /// longest possible month name
+                        break;
+                    }
 
                     // Fractional seconds
                     case 'f':
+                    {
                         /// If the time data type has no fractional part, then we print '0' as the fractional part.
-                        instructions.emplace_back(&Instruction<T>::mysqlFractionalSecond);
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlFractionalSecond);
+                        instructions.push_back(instruction);
                         out_template += String(std::max<UInt32>(1, scale), '0');
                         break;
+                    }
 
                     // Short YYYY-MM-DD date, equivalent to %Y-%m-%d   2001-08-23
                     case 'F':
-                        instructions.emplace_back(&Instruction<T>::mysqlISO8601Date);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlISO8601Date);
+                        instructions.push_back(instruction);
                         out_template += "0000-00-00";
                         break;
+                    }
 
                     // Last two digits of year of ISO 8601 week number (see %G)
                     case 'g':
-                        instructions.emplace_back(&Instruction<T>::mysqlISO8601Year2);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlISO8601Year2);
+                        instructions.push_back(instruction);
                         out_template += "00";
                         break;
+                    }
 
                     // Year of ISO 8601 week number (see %V)
                     case 'G':
-                        instructions.emplace_back(&Instruction<T>::mysqlISO8601Year4);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlISO8601Year4);
+                        instructions.push_back(instruction);
                         out_template += "0000";
                         break;
+                    }
 
                     // Day of the year (001-366)   235
                     case 'j':
-                        instructions.emplace_back(&Instruction<T>::mysqlDayOfYear);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfYear);
+                        instructions.push_back(instruction);
                         out_template += "000";
                         break;
+                    }
 
                     // Month as a integer number (01-12)
                     case 'm':
-                        instructions.emplace_back(&Instruction<T>::mysqlMonth);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
+                        instructions.push_back(instruction);
                         out_template += "00";
                         break;
+                    }
 
                     // ISO 8601 weekday as number with Monday as 1 (1-7)
                     case 'u':
-                        instructions.emplace_back(&Instruction<T>::mysqlDayOfWeek);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfWeek);
+                        instructions.push_back(instruction);
                         out_template += "0";
                         break;
+                    }
 
                     // ISO 8601 week number (01-53)
                     case 'V':
-                        instructions.emplace_back(&Instruction<T>::mysqlISO8601Week);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlISO8601Week);
+                        instructions.push_back(instruction);
                         out_template += "00";
                         break;
+                    }
 
-                    // Weekday as a integer number with Sunday as 0 (0-6)  4
+                    // Weekday as a decimal number with Sunday as 0 (0-6)  4
                     case 'w':
-                        instructions.emplace_back(&Instruction<T>::mysqlDayOfWeek0To6);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfWeek0To6);
+                        instructions.push_back(instruction);
                         out_template += "0";
                         break;
+                    }
 
                     // Full weekday [Monday-Sunday]
                     case 'W':
-                        instructions.emplace_back(&Instruction<T>::mysqlDayOfWeekTextLong);
-                        out_template += "Monday";
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfWeekTextLong);
+                        instructions.push_back(instruction);
+                        out_template += "Wednesday"; /// longest possible weekday name
                         break;
+                    }
 
                     // Two digits year
                     case 'y':
-                        instructions.emplace_back(&Instruction<T>::mysqlYear2);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlYear2);
+                        instructions.push_back(instruction);
                         out_template += "00";
                         break;
+                    }
 
                     // Four digits year
                     case 'Y':
-                        instructions.emplace_back(&Instruction<T>::mysqlYear4);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlYear4);
+                        instructions.push_back(instruction);
                         out_template += "0000";
                         break;
+                    }
 
                     // Quarter (1-4)
                     case 'Q':
-                        instructions.template emplace_back(&Instruction<T>::mysqlQuarter);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlQuarter);
+                        instructions.push_back(instruction);
                         out_template += "0";
                         break;
+                    }
 
                     // Offset from UTC timezone as +hhmm or -hhmm
                     case 'z':
-                        instructions.emplace_back(&Instruction<T>::mysqlTimezoneOffset);
+                    {
+                        Instruction<T> instruction;
+                        instruction.setMysqlFunc(&Instruction<T>::mysqlTimezoneOffset);
+                        instructions.push_back(instruction);
                         out_template += "+0000";
                         break;
+                    }
 
                     /// Time components. If the argument is Date, not a DateTime, then this components will have default value.
 
-                    // Minute (00-59)
-                    case 'M':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlMinute, 2);
-                        out_template += "00";
-                        break;
-
                     // AM or PM
                     case 'p':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlAMPM, 2);
-                        out_template += "AM";
+                    {
+                        constexpr std::string_view val = "AM";
+                        add_time_instruction(&Instruction<T>::mysqlAMPM, 2, val);
+                        out_template += val;
                         break;
+                    }
 
                     // 12-hour HH:MM time, equivalent to %h:%i %p 2:55 PM
                     case 'r':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHHMM12, 8);
-                        out_template += "12:00 AM";
+                    {
+                        constexpr std::string_view val = "12:00 AM";
+                        add_time_instruction(&Instruction<T>::mysqlHHMM12, 8, val);
+                        out_template += val;
                         break;
+                    }
 
                     // 24-hour HH:MM time, equivalent to %H:%i 14:55
                     case 'R':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHHMM24, 5);
-                        out_template += "00:00";
+                    {
+                        constexpr std::string_view val = "00:00";
+                        add_time_instruction(&Instruction<T>::mysqlHHMM24, 5, val);
+                        out_template += val;
                         break;
+                    }
 
                     // Seconds
                     case 's':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlSecond, 2);
-                        out_template += "00";
+                    {
+                        constexpr std::string_view val = "00";
+                        add_time_instruction(&Instruction<T>::mysqlSecond, 2, val);
+                        out_template += val;
                         break;
+                    }
 
                     // Seconds
                     case 'S':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlSecond, 2);
-                        out_template += "00";
+                    {
+                        constexpr std::string_view val = "00";
+                        add_time_instruction(&Instruction<T>::mysqlSecond, 2, val);
+                        out_template += val;
                         break;
+                    }
 
                     // ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S 14:55:02
                     case 'T':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlISO8601Time, 8);
-                        out_template += "00:00:00";
+                    {
+                        constexpr std::string_view val = "00:00:00";
+                        add_time_instruction(&Instruction<T>::mysqlISO8601Time, 8, val);
+                        out_template += val;
                         break;
+                    }
 
                     // Hour in 12h format (01-12)
                     case 'h':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHour12, 2);
-                        out_template += "12";
+                    {
+                        constexpr std::string_view val = "12";
+                        add_time_instruction(&Instruction<T>::mysqlHour12, 2, val);
+                        out_template += val;
                         break;
+                    }
 
                     // Hour in 24h format (00-23)
                     case 'H':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHour24, 2);
-                        out_template += "00";
+                    {
+                        constexpr std::string_view val = "00";
+                        add_time_instruction(&Instruction<T>::mysqlHour24, 2, val);
+                        out_template += val;
                         break;
+                    }
 
                     // Minute of hour range [0, 59]
                     case 'i':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlMinute, 2);
-                        out_template += "00";
+                    {
+                        constexpr std::string_view val = "00";
+                        add_time_instruction(&Instruction<T>::mysqlMinute, 2, val);
+                        out_template += val;
                         break;
+                    }
 
                     // Hour in 12h format (01-12)
                     case 'I':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHour12, 2);
-                        out_template += "12";
+                    {
+                        constexpr std::string_view val = "12";
+                        add_time_instruction(&Instruction<T>::mysqlHour12, 2, val);
+                        out_template += val;
                         break;
+                    }
 
                     // Hour in 24h format (00-23)
                     case 'k':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHour24, 2);
-                        out_template += "00";
+                    {
+                        constexpr std::string_view val = "00";
+                        add_time_instruction(&Instruction<T>::mysqlHour24, 2, val);
+                        out_template += val;
                         break;
+                    }
 
                     // Hour in 12h format (01-12)
                     case 'l':
-                        add_instruction_or_extra_shift(&Instruction<T>::mysqlHour12, 2);
-                        out_template += "12";
+                    {
+                        constexpr std::string_view val = "12";
+                        add_time_instruction(&Instruction<T>::mysqlHour12, 2, val);
+                        out_template += val;
                         break;
+                    }
 
                     case 't':
-                        add_extra_shift(1);
-                        out_template += "\t";
+                    {
+                        constexpr std::string_view val = "\t";
+                        add_extra_shift_or_literal_instruction(1, val);
+                        out_template += val;
                         break;
+                    }
 
                     case 'n':
-                        add_extra_shift(1);
-                        out_template += "\n";
+                    {
+                        constexpr std::string_view val = "\n";
+                        add_extra_shift_or_literal_instruction(1, val);
+                        out_template += val;
                         break;
+                    }
 
                     // Escaped literal characters.
                     case '%':
-                        add_extra_shift(1);
-                        out_template += "%";
+                    {
+                        constexpr std::string_view val = "%";
+                        add_extra_shift_or_literal_instruction(1, val);
+                        out_template += val;
                         break;
+                    }
 
                     // Unimplemented
                     case 'U':
@@ -1138,7 +1358,6 @@ public:
                         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for YEAR for week (Mon-Sun)");
                     case 'X':
                         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for YEAR for week (Sun-Sat)");
-
                     default:
                         throw Exception(
                             ErrorCodes::BAD_ARGUMENTS,
@@ -1147,12 +1366,12 @@ public:
                             *pos,
                             getName());
                 }
-
                 ++pos;
             }
             else
             {
-                add_extra_shift(end - pos);
+                /// Handle characters after last %
+                add_extra_shift_or_literal_instruction(end - pos, std::string_view(pos, end - pos));
                 out_template += String(pos, end - pos);
                 break;
             }
@@ -1162,33 +1381,41 @@ public:
     }
 
     template <typename T>
-    size_t parseJodaFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32, String &) const
+    size_t parseJodaFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32, bool, String &) const
     {
         /// If the argument was DateTime, add instruction for printing. If it was date, just append default literal
-        auto add_instruction = [&](auto && func [[maybe_unused]], const String & default_literal [[maybe_unused]])
+        auto add_instruction = [&]([[maybe_unused]] typename Instruction<T>::FuncJoda && func, [[maybe_unused]] const String & default_literal)
         {
             if constexpr (std::is_same_v<T, UInt32> || std::is_same_v<T, Int64>)
-                instructions.emplace_back(func);
+            {
+                Instruction<T> instruction;
+                instruction.setJodaFunc(std::move(func));
+                instructions.push_back(instruction);
+            }
             else
-                instructions.emplace_back(std::bind_front(&Instruction<T>::template jodaLiteral<String>, default_literal));
+            {
+                Instruction<T> instruction;
+                instruction.setJodaFunc(std::bind_front(&Instruction<T>::template jodaLiteral<String>, default_literal));
+                instructions.push_back(instruction);
+            }
         };
 
         size_t reserve_size = 0;
         const char * pos = format.data();
         const char * end = format.data() + format.size();
-
         while (pos < end)
         {
             const char * cur_token = pos;
-
             // Literal case
             if (*cur_token == '\'')
             {
                 // Case 1: 2 consecutive single quote
                 if (pos + 1 < end && *(pos + 1) == '\'')
                 {
+                    Instruction<T> instruction;
                     std::string_view literal(cur_token, 1);
-                    instructions.emplace_back(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
+                    instruction.setJodaFunc(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
+                    instructions.push_back(instruction);
                     ++reserve_size;
                     pos += 2;
                 }
@@ -1202,9 +1429,10 @@ public:
                     {
                         for (Int64 i = 1; i <= count; i++)
                         {
+                            Instruction<T> instruction;
                             std::string_view literal(cur_token + i, 1);
-                            instructions.emplace_back(
-                                std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
+                            instruction.setJodaFunc(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
+                            instructions.push_back(instruction);
                             ++reserve_size;
                             if (*(cur_token + i) == '\'')
                                 i += 1;
@@ -1225,68 +1453,114 @@ public:
                 switch (*cur_token)
                 {
                     case 'G':
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaEra, repetitions));
+                    {
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaEra, repetitions));
+                        instructions.push_back(instruction);
                         reserve_size += repetitions <= 3 ? 2 : 13;
                         break;
+                    }
                     case 'C':
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaCenturyOfEra, repetitions));
+                    {
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaCenturyOfEra, repetitions));
+                        instructions.push_back(instruction);
                         /// Year range [1900, 2299]
                         reserve_size += std::max(repetitions, 2);
                         break;
+                    }
                     case 'Y':
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaYearOfEra, repetitions));
+                    {
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaYearOfEra, repetitions));
+                        instructions.push_back(instruction);
                         /// Year range [1900, 2299]
                         reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4);
                         break;
+                    }
                     case 'x':
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaWeekYear, repetitions));
+                    {
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaWeekYear, repetitions));
+                        instructions.push_back(instruction);
                         /// weekyear range [1900, 2299]
                         reserve_size += std::max(repetitions, 4);
                         break;
+                    }
                     case 'w':
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaWeekOfWeekYear, repetitions));
+                    {
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaWeekOfWeekYear, repetitions));
+                        instructions.push_back(instruction);
                         /// Week of weekyear range [1, 52]
                         reserve_size += std::max(repetitions, 2);
                         break;
+                    }
                     case 'e':
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfWeek1Based, repetitions));
+                    {
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaDayOfWeek1Based, repetitions));
+                        instructions.push_back(instruction);
                         /// Day of week range [1, 7]
                         reserve_size += std::max(repetitions, 1);
                         break;
+                    }
                     case 'E':
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfWeekText, repetitions));
+                    {
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaDayOfWeekText, repetitions));
+                        instructions.push_back(instruction);
                         /// Maximum length of short name is 3, maximum length of full name is 9.
                         reserve_size += repetitions <= 3 ? 3 : 9;
                         break;
+                    }
                     case 'y':
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaYear, repetitions));
+                    {
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaYear, repetitions));
+                        instructions.push_back(instruction);
                         /// Year range [1900, 2299]
                         reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4);
                         break;
+                    }
                     case 'D':
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfYear, repetitions));
+                    {
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaDayOfYear, repetitions));
+                        instructions.push_back(instruction);
                         /// Day of year range [1, 366]
                         reserve_size += std::max(repetitions, 3);
                         break;
+                    }
                     case 'M':
+                    {
                         if (repetitions <= 2)
                         {
-                            instructions.emplace_back(std::bind_front(&Instruction<T>::jodaMonthOfYear, repetitions));
+                            Instruction<T> instruction;
+                            instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaMonthOfYear, repetitions));
+                            instructions.push_back(instruction);
                             /// Month of year range [1, 12]
                             reserve_size += 2;
                         }
                         else
                         {
-                            instructions.emplace_back(std::bind_front(&Instruction<T>::jodaMonthOfYearText, repetitions));
+                            Instruction<T> instruction;
+                            instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaMonthOfYearText, repetitions));
+                            instructions.push_back(instruction);
                             /// Maximum length of short name is 3, maximum length of full name is 9.
                             reserve_size += repetitions <= 3 ? 3 : 9;
                         }
                         break;
+                    }
                     case 'd':
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaDayOfMonth, repetitions));
+                    {
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaDayOfMonth, repetitions));
+                        instructions.push_back(instruction);
                         /// Day of month range [1, 3]
                         reserve_size += std::max(repetitions, 3);
                         break;
+                    }
                     case 'a':
                         /// Default half day of day is "AM"
                         add_instruction(std::bind_front(&Instruction<T>::jodaHalfDayOfDay, repetitions), "AM");
@@ -1332,29 +1606,41 @@ public:
                         reserve_size += std::max(repetitions, 2);
                         break;
                     case 'S':
+                    {
                         /// Default fraction of second is 0
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaFractionOfSecond, repetitions));
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaFractionOfSecond, repetitions));
+                        instructions.push_back(instruction);
                         /// 'S' repetitions range [0, 9]
                         reserve_size += repetitions <= 9 ? repetitions : 9;
                         break;
+                    }
                     case 'z':
+                    {
                         if (repetitions <= 3)
                             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Short name time zone is not yet supported");
 
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::jodaTimezone, repetitions));
+                        Instruction<T> instruction;
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaTimezone, repetitions));
+                        instructions.push_back(instruction);
                         /// Longest length of full name of time zone is 32.
                         reserve_size += 32;
                         break;
+                    }
                     case 'Z':
                         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for TIMEZONE_OFFSET_ID");
                     default:
+                    {
                         if (isalpha(*cur_token))
                             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for {}", String(cur_token, repetitions));
 
+                        Instruction<T> instruction;
                         std::string_view literal(cur_token, pos - cur_token);
-                        instructions.emplace_back(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
+                        instruction.setJodaFunc(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
+                        instructions.push_back(instruction);
                         reserve_size += pos - cur_token;
                         break;
+                    }
                 }
             }
         }
diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index cd3c0d993d0..e281127468e 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -48,7 +48,7 @@ namespace
     const std::unordered_map<String, std::pair<String, Int32>> monthMap{
         {"jan", {"uary", 1}},
         {"feb", {"ruary", 2}},
-        {"mar", {"rch", 3}},
+        {"mar", {"ch", 3}},
         {"apr", {"il", 4}},
         {"may", {"", 5}},
         {"jun", {"e", 6}},
@@ -724,6 +724,38 @@ namespace
                 return cur;
             }
 
+            static Pos mysqlMonthOfYearTextLong(Pos cur, Pos end, const String & fragment, DateTime & date)
+            {
+                checkSpace(cur, end, 3, "mysqlMonthOfYearTextLong requires size >= 3", fragment);
+                String text1(cur, 3);
+                boost::to_lower(text1);
+                auto it = monthMap.find(text1);
+                if (it == monthMap.end())
+                    throw Exception(
+                        ErrorCodes::CANNOT_PARSE_DATETIME,
+                        "Unable to parse first part of fragment {} from {} because of unknown month of year text: {}",
+                        fragment,
+                        std::string_view(cur, end - cur),
+                        text1);
+                cur += 3;
+
+                size_t expected_remaining_size = it->second.first.size();
+                checkSpace(cur, end, expected_remaining_size, "mysqlMonthOfYearTextLong requires the second parg size >= " + std::to_string(expected_remaining_size), fragment);
+                String text2(cur, expected_remaining_size);
+                boost::to_lower(text2);
+                if (text2 != it->second.first)
+                    throw Exception(
+                        ErrorCodes::CANNOT_PARSE_DATETIME,
+                        "Unable to parse second part of fragment {} from {} because of unknown month of year text: {}",
+                        fragment,
+                        std::string_view(cur, end - cur),
+                        text1 + text2);
+                cur += expected_remaining_size;
+
+                date.setMonth(it->second.second);
+                return cur;
+            }
+
             static Pos mysqlMonth(Pos cur, Pos end, const String & fragment, DateTime & date)
             {
                 Int32 month;
@@ -856,7 +888,7 @@ namespace
 
             static Pos mysqlDayOfWeekTextLong(Pos cur, Pos end, const String & fragment, DateTime & date)
             {
-                checkSpace(cur, end, 6, "jodaDayOfWeekText requires size >= 6", fragment);
+                checkSpace(cur, end, 6, "mysqlDayOfWeekTextLong requires size >= 6", fragment);
                 String text1(cur, 3);
                 boost::to_lower(text1);
                 auto it = dayOfWeekMap.find(text1);
@@ -870,7 +902,7 @@ namespace
                 cur += 3;
 
                 size_t expected_remaining_size = it->second.first.size();
-                checkSpace(cur, end, expected_remaining_size, "jodaDayOfWeekText requires the second parg size >= " + std::to_string(expected_remaining_size), fragment);
+                checkSpace(cur, end, expected_remaining_size, "mysqlDayOfWeekTextLong requires the second parg size >= " + std::to_string(expected_remaining_size), fragment);
                 String text2(cur, expected_remaining_size);
                 boost::to_lower(text2);
                 if (text2 != it->second.first)
@@ -1470,7 +1502,7 @@ namespace
 
                         // Minute (00-59)
                         case 'M':
-                            instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMinute));
+                            instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthOfYearTextLong));
                             break;
 
                         // AM or PM
diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference
index e35484d4b0b..75eda219dba 100644
--- a/tests/queries/0_stateless/00718_format_datetime.reference
+++ b/tests/queries/0_stateless/00718_format_datetime.reference
@@ -17,7 +17,7 @@ Jan	Jan
 366	366
 00	00
 01	01
-33	00
+January	January
 \n	\n
 AM	AM
 AM
@@ -50,3 +50,5 @@ no formatting pattern	no formatting pattern
 2022-12-08 18:11:29.0
 2022-12-08 00:00:00.0
 2022-12-08 00:00:00.0
+16\t\n%MayMonday16
+00\t\n%MayMonday00
diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql
index 3f8c927dfe7..7ee9be91b3a 100644
--- a/tests/queries/0_stateless/00718_format_datetime.sql
+++ b/tests/queries/0_stateless/00718_format_datetime.sql
@@ -74,3 +74,6 @@ select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 0, 'UTC'), '%F %T
 select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
+
+select formatDateTime(toDateTime('2018-05-07 15:16:17', 'UTC'), '%i%t%n%%%M%W%i');
+select formatDateTime(toDate32('2018-05-07', 'UTC'), '%i%t%n%%%M%W%i');
diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python
index 2706c0f5b12..3db80aef845 100644
--- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python
+++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python
@@ -83,7 +83,7 @@ CAST(N as DateTime64(9, 'Europe/Minsk'))
 # CAST(N as DateTime64(12, 'Asia/Istanbul'))
 # DateTime64(18) will always fail due to zero precision, but it is Ok to test here:
 # CAST(N as DateTime64(18, 'Asia/Istanbul'))
-formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', 'Asia/Istanbul')
+formatDateTime(N, '%C %d %D %e %F %H %I %j %m %i %p %R %S %T %u %V %w %y %Y %%', 'Asia/Istanbul')
 """.splitlines()
 
 # Expanded later to cartesian product of all arguments, using format string.
diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
index 8a168ed0e9e..62de3a149a7 100644
--- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
+++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
@@ -353,7 +353,7 @@ SELECT CAST(N as DateTime64(9, \'Europe/Minsk\'))
 "DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.000000000"
 "DateTime64(9, 'Europe/Minsk')","2019-09-16 19:20:11.234000000"
 ------------------------------------------
-SELECT formatDateTime(N, \'%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%\', \'Asia/Istanbul\')
+SELECT formatDateTime(N, \'%C %d %D %e %F %H %I %j %m %i %p %R %S %T %u %V %w %y %Y %%\', \'Asia/Istanbul\')
 "String","20 16 09/16/19 16 2019-09-16 00 12 259 09 00 AM 00:00 00 00:00:00 1 38 1 19 2019 %"
 "String","20 16 09/16/19 16 2019-09-16 19 07 259 09 20 PM 19:20 11 19:20:11 1 38 1 19 2019 %"
 "String","20 16 09/16/19 16 2019-09-16 19 07 259 09 20 PM 19:20 11 19:20:11 1 38 1 19 2019 %"
diff --git a/tests/queries/0_stateless/01411_from_unixtime.reference b/tests/queries/0_stateless/01411_from_unixtime.reference
index 4114c9b98d3..f5b3be44e00 100644
--- a/tests/queries/0_stateless/01411_from_unixtime.reference
+++ b/tests/queries/0_stateless/01411_from_unixtime.reference
@@ -24,7 +24,7 @@ Jan	Jan
 366	366
 00	00
 01	01
-33	00
+January	January
 \n	\n
 AM	AM
 AM
diff --git a/tests/queries/0_stateless/02564_date_format.reference b/tests/queries/0_stateless/02564_date_format.reference
index a5f2e362cc6..38c0d39340b 100644
--- a/tests/queries/0_stateless/02564_date_format.reference
+++ b/tests/queries/0_stateless/02564_date_format.reference
@@ -17,7 +17,7 @@ Jan	Jan
 366	366
 00	00
 01	01
-33	00
+January	January
 \n	\n
 AM	AM
 AM
diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference
index b893c1bc6e9..502481b3729 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.reference
+++ b/tests/queries/0_stateless/02668_parse_datetime.reference
@@ -26,6 +26,13 @@ select parseDateTime('jun', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC');
 select parseDateTime('JUN', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC');
 1
 select parseDateTime('abc', '%b'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('may', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC');
+1
+select parseDateTime('MAY', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC');
+1
+select parseDateTime('september', '%M', 'UTC') = toDateTime('2000-09-01', 'UTC');
+1
+select parseDateTime('summer', '%M'); -- { serverError CANNOT_PARSE_DATETIME }
 -- day of month
 select parseDateTime('07', '%d', 'UTC') = toDateTime('2000-01-07', 'UTC');
 1
diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql
index 86e8877eedc..c8df8675840 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.sql
+++ b/tests/queries/0_stateless/02668_parse_datetime.sql
@@ -18,6 +18,10 @@ select parseDateTime('12345', '%c'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('jun', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC');
 select parseDateTime('JUN', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC');
 select parseDateTime('abc', '%b'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('may', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC');
+select parseDateTime('MAY', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC');
+select parseDateTime('september', '%M', 'UTC') = toDateTime('2000-09-01', 'UTC');
+select parseDateTime('summer', '%M'); -- { serverError CANNOT_PARSE_DATETIME }
 
 -- day of month
 select parseDateTime('07', '%d', 'UTC') = toDateTime('2000-01-07', 'UTC');
@@ -132,4 +136,4 @@ select parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s', 'UTC') = toDate
 select parseDateTime('2019-07-03 11:04:10', '%Y-%m-%d %H:%i:%s', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
 select parseDateTime('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
 
--- { echoOff }
\ No newline at end of file
+-- { echoOff }

From 83b68caccc395a2166263ba92fa2c62d57ced56f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 Mar 2023 17:27:15 +0100
Subject: [PATCH 015/277] Do not continue retrying to connect to ZK if the
 query is killed or over limits

---
 src/Backups/BackupCoordinationRemote.cpp           | 6 +++---
 src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 6 +++---
 src/Storages/MergeTree/ZooKeeperRetries.h          | 8 +++++++-
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 5ad95490c95..557b00aecbe 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -612,7 +612,7 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
 
     std::vector<Strings> batched_escaped_names;
     {
-        ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getChildren", zookeeper_retries_info);
+        ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getChildren", zookeeper_retries_info, nullptr);
         retries_ctl.retryLoop([&]()
         {
             auto zk = getZooKeeper();
@@ -633,7 +633,7 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
                 file_names_paths.emplace_back(zookeeper_path + "/file_names/" + escaped_name);
 
 
-            ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getSizesAndChecksums", zookeeper_retries_info);
+            ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getSizesAndChecksums", zookeeper_retries_info, nullptr);
             retries_ctl.retryLoop([&]
             {
                 auto zk = getZooKeeper();
@@ -675,7 +675,7 @@ std::vector<FileInfo> BackupCoordinationRemote::getAllFileInfos() const
         }
 
         zkutil::ZooKeeper::MultiGetResponse non_empty_file_infos_serialized;
-        ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getFileInfos", zookeeper_retries_info);
+        ZooKeeperRetriesControl retries_ctl("getAllFileInfos::getFileInfos", zookeeper_retries_info, nullptr);
         retries_ctl.retryLoop([&]()
         {
             auto zk = getZooKeeper();
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
index 035cbdac55e..c71f72d000a 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@@ -384,7 +384,7 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
       * TODO Too complex logic, you can do better.
       */
     size_t replicas_num = 0;
-    ZooKeeperRetriesControl quorum_retries_ctl("checkQuorumPrecondition", zookeeper_retries_info);
+    ZooKeeperRetriesControl quorum_retries_ctl("checkQuorumPrecondition", zookeeper_retries_info, context->getProcessListElement());
     quorum_retries_ctl.retryLoop(
         [&]()
         {
@@ -641,7 +641,7 @@ std::vector<String> ReplicatedMergeTreeSinkImpl<async_insert>::commitPart(
     Coordination::Error write_part_info_keeper_error = Coordination::Error::ZOK;
     std::vector<String> conflict_block_ids;
 
-    ZooKeeperRetriesControl retries_ctl("commitPart", zookeeper_retries_info);
+    ZooKeeperRetriesControl retries_ctl("commitPart", zookeeper_retries_info, context->getProcessListElement());
     retries_ctl.retryLoop([&]()
     {
         zookeeper->setKeeper(storage.getZooKeeper());
@@ -1077,7 +1077,7 @@ std::vector<String> ReplicatedMergeTreeSinkImpl<async_insert>::commitPart(
         return conflict_block_ids;
     if (isQuorumEnabled())
     {
-        ZooKeeperRetriesControl quorum_retries_ctl("waitForQuorum", zookeeper_retries_info);
+        ZooKeeperRetriesControl quorum_retries_ctl("waitForQuorum", zookeeper_retries_info, context->getProcessListElement());
         quorum_retries_ctl.retryLoop([&]()
         {
             zookeeper->setKeeper(storage.getZooKeeper());
diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Storages/MergeTree/ZooKeeperRetries.h
index 1a4d394857f..a24f9f63693 100644
--- a/src/Storages/MergeTree/ZooKeeperRetries.h
+++ b/src/Storages/MergeTree/ZooKeeperRetries.h
@@ -1,4 +1,5 @@
 #pragma once
+#include <Interpreters/ProcessList.h>
 #include <base/sleep.h>
 #include <Common/Exception.h>
 #include <Common/ZooKeeper/KeeperException.h>
@@ -35,7 +36,8 @@ struct ZooKeeperRetriesInfo
 class ZooKeeperRetriesControl
 {
 public:
-    ZooKeeperRetriesControl(std::string name_, ZooKeeperRetriesInfo & retries_info_) : name(std::move(name_)), retries_info(retries_info_)
+    ZooKeeperRetriesControl(std::string name_, ZooKeeperRetriesInfo & retries_info_, QueryStatusPtr elem)
+        : name(std::move(name_)), retries_info(retries_info_), process_list_element(elem)
     {
     }
 
@@ -160,6 +162,9 @@ private:
         if (0 == iteration_count)
             return true;
 
+        if (process_list_element && !process_list_element->checkTimeLimitSoft())
+            return false;
+
         if (unconditional_retry)
         {
             unconditional_retry = false;
@@ -260,6 +265,7 @@ private:
     bool unconditional_retry = false;
     bool iteration_succeeded = true;
     bool stop_retries = false;
+    QueryStatusPtr process_list_element;
 };
 
 }

From eafae6fdbacf1631560c4d14b481b11e09c587da Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 24 Mar 2023 09:00:15 +0000
Subject: [PATCH 016/277] Cosmetics

---
 src/Functions/formatDateTime.cpp | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index c243222db91..be5b5f9b068 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -38,6 +38,7 @@ namespace ErrorCodes
 
 namespace
 {
+using Pos = const char *;
 
 enum class SupportInteger
 {
@@ -149,8 +150,8 @@ private:
         /// Joda format generally requires capturing extra variables (i.e. holding state) which is more convenient with
         /// std::function and std::bind. Unfortunately, std::function causes a performance degradation by 0.45x compared to raw function
         /// pointers. For MySQL format, we generally prefer raw function pointers. Because of the special case that not all formatters are
-        /// fixed-width formatters (see mysqlLiteral), we still need to be able to store state. For that reason, we use member function
-        /// pointers instead of static function pointers.
+        /// fixed-width formatters (see mysqlLiteral instruction ), we still need to be able to store state. For that reason, we use member
+        /// function pointers (which come with even uglier syntax) instead of static function pointers.
         using FuncMysql = size_t (Instruction<Time>::*)(char *, Time, UInt64, UInt32, const DateLUTImpl &);
         FuncMysql func_mysql = nullptr;
 
@@ -666,7 +667,7 @@ private:
         }
     };
 
-    [[noreturn]] static void throwPercentIsLastCharacterException()
+    [[noreturn]] static void throwLastCharacterIsPercentException()
     {
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "'%' must not be the last character in the format string, use '%%' instead");
     }
@@ -681,7 +682,7 @@ private:
             {
                 case '%':
                     if (i + 1 >= format.size())
-                        throwPercentIsLastCharacterException();
+                        throwLastCharacterIsPercentException();
                     if (std::any_of(variable_width_formatter.begin(), variable_width_formatter.end(), [&](char c){ return c == format[i + 1]; }))
                         return false;
                     i += 1;
@@ -971,12 +972,12 @@ public:
                 add_extra_shift_or_literal_instruction(amount, literal);
         };
 
-        const char * pos = format.data();
-        const char * const end = format.data() + format.size();
+        Pos pos = format.data();
+        Pos const end = format.data() + format.size();
 
         while (true)
         {
-            const char * const percent_pos = find_first_symbols<'%'>(pos, end);
+            Pos const percent_pos = find_first_symbols<'%'>(pos, end);
 
             if (percent_pos < end)
             {
@@ -989,7 +990,7 @@ public:
 
                 pos = percent_pos + 1;
                 if (pos >= end)
-                    throwPercentIsLastCharacterException();
+                    throwLastCharacterIsPercentException();
 
                 switch (*pos)
                 {
@@ -1401,11 +1402,11 @@ public:
         };
 
         size_t reserve_size = 0;
-        const char * pos = format.data();
-        const char * end = format.data() + format.size();
+        Pos pos = format.data();
+        Pos end = format.data() + format.size();
         while (pos < end)
         {
-            const char * cur_token = pos;
+            Pos cur_token = pos;
             // Literal case
             if (*cur_token == '\'')
             {

From a6ab33a906d920144d3a52e5b08030134b816243 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 27 Mar 2023 14:31:29 +0200
Subject: [PATCH 017/277] no use query, but storage context

---
 src/Common/MemoryTracker.cpp                  | 10 ----
 src/Common/MemoryTracker.h                    |  2 -
 src/Common/ThreadStatus.h                     |  2 +
 src/Interpreters/ThreadStatusExt.cpp          | 26 ++++++++++
 .../MergeTree/MergeFromLogEntryTask.cpp       |  3 +-
 src/Storages/MergeTree/MergeList.cpp          | 50 ++-----------------
 src/Storages/MergeTree/MergeList.h            | 15 +-----
 .../MergeTree/MergePlainMergeTreeTask.cpp     |  5 +-
 src/Storages/MergeTree/MergeTask.cpp          |  4 +-
 .../MergeTree/MutateFromLogEntryTask.cpp      |  3 +-
 .../MergeTree/MutatePlainMergeTreeTask.cpp    |  5 +-
 src/Storages/MergeTree/MutateTask.cpp         |  4 +-
 .../ReplicatedMergeMutateTaskBase.cpp         |  2 +-
 13 files changed, 43 insertions(+), 88 deletions(-)

diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index 16c0d1e9eb1..f39fca91300 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -28,7 +28,6 @@
 #include <random>
 #include <cstdlib>
 #include <string>
-#include <magic_enum.hpp>
 
 
 namespace
@@ -119,15 +118,6 @@ MemoryTracker::~MemoryTracker()
     }
 }
 
-String MemoryTracker::getDebugLog() const
-{
-    return fmt::format("MemoryTracker(addr {} level {} peak {} amount {})",
-                       size_t(this),
-                       magic_enum::enum_name(level),
-                       ReadableSize(getPeak()),
-                       ReadableSize(get()));
-}
-
 void MemoryTracker::logPeakMemoryUsage()
 {
     log_peak_memory_usage_in_destructor = false;
diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h
index 66037345eb0..66b56730b75 100644
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@@ -215,8 +215,6 @@ public:
 
     /// Prints info about peak memory consumption into log.
     void logPeakMemoryUsage();
-
-    String getDebugLog() const;
 };
 
 extern MemoryTracker total_memory_tracker;
diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index 4f7e9ca6830..87ff1082e88 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -105,6 +105,8 @@ public:
     /// When new query starts, new thread group is created for it, current thread becomes master thread of the query
     static ThreadGroupStatusPtr createForQuery(ContextPtr query_context_, FatalErrorCallback fatal_error_callback_ = {});
 
+    static ThreadGroupStatusPtr createForBackgroundProcess(ContextPtr storage_context, const char * description);
+
     std::vector<UInt64> getInvolvedThreadIds() const;
     void linkThread(UInt64 thread_it);
 
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index b4e1da2c697..7e2001cb521 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -73,6 +73,32 @@ ThreadGroupStatusPtr ThreadGroupStatus::createForQuery(ContextPtr query_context_
     return group;
 }
 
+ThreadGroupStatusPtr ThreadGroupStatus::createForBackgroundProcess(ContextPtr storage_context, const char * description)
+{
+    /// Only for the case optimize query
+    /// Push the counters to the upper process level counters
+    auto * p_counters = CurrentThread::get().current_performance_counters;
+    while (p_counters && p_counters->level != VariableContext::Process)
+        p_counters = p_counters->getParent();
+
+    auto group = std::make_shared<ThreadGroupStatus>();
+    if (p_counters)
+        group->performance_counters.setParent(p_counters);
+
+    group->memory_tracker.setDescription(description);
+
+    /// No query context for background process
+    /// However settings from storage context have to be applied
+    const Settings & settings = storage_context->getSettingsRef();
+    group->memory_tracker.setProfilerStep(settings.memory_profiler_step);
+    group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability);
+    group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator);
+    if (settings.memory_tracker_fault_probability > 0.0)
+        group->memory_tracker.setFaultProbability(settings.memory_tracker_fault_probability);
+
+    return group;
+}
+
 void ThreadGroupStatus::attachQueryForLog(const String & query_, UInt64 normalized_hash)
 {
     auto hash = normalized_hash ? normalized_hash : normalizedQueryHash<false>(query_);
diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
index ed1b83048e1..e7673ee4365 100644
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@@ -261,11 +261,10 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
     auto table_id = storage.getStorageID();
 
     /// Add merge to list
-    const Settings & settings = storage.getContext()->getSettingsRef();
     merge_mutate_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_merged_part,
-        settings);
+        storage.getContext());
 
     transaction_ptr = std::make_unique<MergeTreeData::Transaction>(storage, NO_TRANSACTION_RAW);
     stopwatch_ptr = std::make_unique<Stopwatch>();
diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index c13c5e6105e..f37df3ca99b 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -15,9 +15,8 @@ ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupStatusPtr thread_group)
 {
     chassert(thread_group);
 
+    /// might be nullptr
     prev_thread_group = CurrentThread::getGroup();
-    if (!prev_thread_group)
-        return;
 
     CurrentThread::detachFromGroupIfNotDetached();
     CurrentThread::attachToGroup(thread_group);
@@ -25,17 +24,15 @@ ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupStatusPtr thread_group)
 
 ThreadGroupSwitcher::~ThreadGroupSwitcher()
 {
-    if (!prev_thread_group)
-        return;
-
     CurrentThread::detachFromGroupIfNotDetached();
-    CurrentThread::attachToGroup(prev_thread_group);
+    if (prev_thread_group)
+        CurrentThread::attachToGroup(prev_thread_group);
 }
 
 MergeListElement::MergeListElement(
     const StorageID & table_id_,
     FutureMergedMutatedPartPtr future_part,
-    const Settings & settings)
+    const ContextPtr & context)
     : table_id{table_id_}
     , partition_id{future_part->part_info.partition_id}
     , result_part_name{future_part->name}
@@ -64,44 +61,7 @@ MergeListElement::MergeListElement(
         is_mutation = (result_part_info.getDataVersion() != source_data_version);
     }
 
-    thread_group = ThreadGroupStatus::createForQuery(CurrentThread::get().getQueryContext(), {});
-
-    auto * p_counters = CurrentThread::get().current_performance_counters;
-    while (p_counters && p_counters->level != VariableContext::Process)
-        p_counters = p_counters->getParent();
-    thread_group->performance_counters.setParent(p_counters);
-
-    auto & memory_tracker = thread_group->memory_tracker;
-
-    memory_tracker.setDescription(description.c_str());
-    /// MemoryTracker settings should be set here, because
-    /// later (see MemoryTrackerThreadSwitcher)
-    /// parent memory tracker will be changed, and if merge executed from the
-    /// query (OPTIMIZE TABLE), all settings will be lost (since
-    /// current_thread::memory_tracker will have Thread level MemoryTracker,
-    /// which does not have any settings itself, it relies on the settings of the
-    /// thread_group::memory_tracker, but MemoryTrackerThreadSwitcher will reset parent).
-    memory_tracker.setProfilerStep(settings.memory_profiler_step);
-    memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability);
-    memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator);
-    if (settings.memory_tracker_fault_probability > 0.0)
-        memory_tracker.setFaultProbability(settings.memory_tracker_fault_probability);
-
-    /// Let's try to copy memory related settings from the query,
-    /// since settings that we have here is not from query, but global, from the table.
-    ///
-    /// NOTE: Remember, that Thread level MemoryTracker does not have any settings,
-    /// so it's parent is required.
-    MemoryTracker * cur_memory_tracker = CurrentThread::getMemoryTracker();
-
-    if (cur_memory_tracker->level == VariableContext::Thread)
-    {
-        MemoryTracker * query_memory_tracker = cur_memory_tracker->getParent();
-        if (query_memory_tracker != &total_memory_tracker)
-        {
-            memory_tracker.setOrRaiseHardLimit(query_memory_tracker->getHardLimit());
-        }
-    }
+    thread_group = ThreadGroupStatus::createForBackgroundProcess(context, description.c_str());
 }
 
 MergeInfo MergeListElement::getInfo() const
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 37587d9d517..bf9d6243315 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -69,20 +69,7 @@ struct Settings;
 class ThreadGroupSwitcher : private boost::noncopyable
 {
 public:
-    ThreadGroupSwitcher() noexcept = default;
     explicit ThreadGroupSwitcher(ThreadGroupStatusPtr thread_group);
-    ThreadGroupSwitcher(ThreadGroupSwitcher && other) noexcept
-        : prev_thread_group(std::move(other.prev_thread_group))
-    {
-        other.prev_thread_group = nullptr;
-    }
-    ThreadGroupSwitcher & operator=(ThreadGroupSwitcher && other) noexcept
-    {
-        chassert(this != &other);
-        prev_thread_group = std::move(other.prev_thread_group);
-        other.prev_thread_group = nullptr;
-        return *this;
-    }
     ~ThreadGroupSwitcher();
 
 private:
@@ -137,7 +124,7 @@ struct MergeListElement : boost::noncopyable
     MergeListElement(
         const StorageID & table_id_,
         FutureMergedMutatedPartPtr future_part,
-        const Settings & settings);
+        const ContextPtr & context);
 
     MergeInfo getInfo() const;
 
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
index 709a681619a..0181d0d1d12 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@@ -34,7 +34,7 @@ bool MergePlainMergeTreeTask::executeStep()
     /// Make out memory tracker a parent of current thread memory tracker
     std::optional<ThreadGroupSwitcher> switcher;
     if (merge_list_entry)
-        switcher = ThreadGroupSwitcher((*merge_list_entry)->thread_group);
+        switcher.emplace((*merge_list_entry)->thread_group);
 
     switch (state)
     {
@@ -81,11 +81,10 @@ void MergePlainMergeTreeTask::prepare()
     future_part = merge_mutate_entry->future_part;
     stopwatch_ptr = std::make_unique<Stopwatch>();
 
-    const Settings & settings = storage.getContext()->getSettingsRef();
     merge_list_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_part,
-        settings);
+        storage.getContext());
 
     write_part_log = [this] (const ExecutionStatus & execution_status)
     {
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index d1dfa96b87c..b2b0a74230a 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -702,13 +702,11 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
         if (projection.type == ProjectionDescription::Type::Aggregate)
             projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating;
 
-        const Settings & settings = global_ctx->context->getSettingsRef();
-
         ctx->tasks_for_projections.emplace_back(std::make_shared<MergeTask>(
             projection_future_part,
             projection.metadata,
             global_ctx->merge_entry,
-            std::make_unique<MergeListElement>((*global_ctx->merge_entry)->table_id, projection_future_part, settings),
+            std::make_unique<MergeListElement>((*global_ctx->merge_entry)->table_id, projection_future_part, global_ctx->context),
             global_ctx->time_of_merge,
             global_ctx->context,
             global_ctx->space_reservation,
diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
index a72a947ad56..bfa241e0c47 100644
--- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
@@ -164,11 +164,10 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare()
         }
     }
 
-    const Settings & settings = storage.getContext()->getSettingsRef();
     merge_mutate_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_mutated_part,
-        settings);
+        storage.getContext());
 
     stopwatch_ptr = std::make_unique<Stopwatch>();
 
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index 822098e4352..d582fa31af5 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -29,11 +29,10 @@ void MutatePlainMergeTreeTask::prepare()
 {
     future_part = merge_mutate_entry->future_part;
 
-    const Settings & settings = storage.getContext()->getSettingsRef();
     merge_list_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_part,
-        settings);
+        storage.getContext());
 
     stopwatch = std::make_unique<Stopwatch>();
 
@@ -70,7 +69,7 @@ bool MutatePlainMergeTreeTask::executeStep()
     /// Make out memory tracker a parent of current thread memory tracker
     std::optional<ThreadGroupSwitcher> switcher;
     if (merge_list_entry)
-        switcher = ThreadGroupSwitcher((*merge_list_entry)->thread_group);
+        switcher.emplace((*merge_list_entry)->thread_group);
 
     switch (state)
     {
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 9f7a12745c6..b4d0a310785 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -911,14 +911,12 @@ public:
             if (projection.type == ProjectionDescription::Type::Aggregate)
                 projection_merging_params.mode = MergeTreeData::MergingParams::Aggregating;
 
-            const Settings & settings = ctx->context->getSettingsRef();
-
             LOG_DEBUG(log, "Merged {} parts in level {} to {}", selected_parts.size(), current_level, projection_future_part->name);
             auto tmp_part_merge_task = ctx->mutator->mergePartsToTemporaryPart(
                 projection_future_part,
                 projection.metadata,
                 ctx->mutate_entry,
-                std::make_unique<MergeListElement>((*ctx->mutate_entry)->table_id, projection_future_part, settings),
+                std::make_unique<MergeListElement>((*ctx->mutate_entry)->table_id, projection_future_part, ctx->context),
                 *ctx->holder,
                 ctx->time_of_mutation,
                 ctx->context,
diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
index 9368f7d8c51..3b361abfc1a 100644
--- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp
@@ -130,7 +130,7 @@ bool ReplicatedMergeMutateTaskBase::executeImpl()
 {
     std::optional<ThreadGroupSwitcher> switcher;
     if (merge_mutate_entry)
-        switcher = ThreadGroupSwitcher((*merge_mutate_entry)->thread_group);
+        switcher.emplace((*merge_mutate_entry)->thread_group);
 
     auto remove_processed_entry = [&] () -> bool
     {

From d2cb4eb9fb6c63861aa03dd2ded71e906b7941a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 27 Mar 2023 16:40:15 +0200
Subject: [PATCH 018/277] Add integration test

---
 .../test_inserts_with_keeper_retries/test.py     | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/integration/test_inserts_with_keeper_retries/test.py b/tests/integration/test_inserts_with_keeper_retries/test.py
index dbf18365053..41acac629f5 100644
--- a/tests/integration/test_inserts_with_keeper_retries/test.py
+++ b/tests/integration/test_inserts_with_keeper_retries/test.py
@@ -98,3 +98,19 @@ def test_replica_inserts_with_keeper_disconnect(started_cluster):
 
     finally:
         node1.query("DROP TABLE IF EXISTS r SYNC")
+
+
+def test_query_timeout_with_zk_down(started_cluster):
+    node1.query(
+        "CREATE TABLE zk_down (a UInt64, b String) ENGINE=ReplicatedMergeTree('/test/zk_down', '0') ORDER BY tuple()"
+    )
+
+    cluster.stop_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
+
+    start_time = time.time()
+    with pytest.raises(QueryRuntimeException):
+        node1.query(
+            "INSERT INTO zk_down SELECT number, toString(number) FROM numbers(10) SETTINGS insert_keeper_max_retries=10000, insert_keeper_retry_max_backoff_ms=1000, max_execution_time=1"
+        )
+    finish_time = time.time()
+    assert finish_time - start_time < 10

From 920bb62ae15f12029397f96adacf1325734c2522 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 28 Mar 2023 10:46:22 +0200
Subject: [PATCH 019/277] Fix integration tests

---
 .../test_inserts_with_keeper_retries/test.py  | 26 +++++++++++--------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/tests/integration/test_inserts_with_keeper_retries/test.py b/tests/integration/test_inserts_with_keeper_retries/test.py
index 41acac629f5..3937823a37b 100644
--- a/tests/integration/test_inserts_with_keeper_retries/test.py
+++ b/tests/integration/test_inserts_with_keeper_retries/test.py
@@ -101,16 +101,20 @@ def test_replica_inserts_with_keeper_disconnect(started_cluster):
 
 
 def test_query_timeout_with_zk_down(started_cluster):
-    node1.query(
-        "CREATE TABLE zk_down (a UInt64, b String) ENGINE=ReplicatedMergeTree('/test/zk_down', '0') ORDER BY tuple()"
-    )
-
-    cluster.stop_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
-
-    start_time = time.time()
-    with pytest.raises(QueryRuntimeException):
+    try:
         node1.query(
-            "INSERT INTO zk_down SELECT number, toString(number) FROM numbers(10) SETTINGS insert_keeper_max_retries=10000, insert_keeper_retry_max_backoff_ms=1000, max_execution_time=1"
+            "CREATE TABLE zk_down (a UInt64, b String) ENGINE=ReplicatedMergeTree('/test/zk_down', '0') ORDER BY tuple()"
         )
-    finish_time = time.time()
-    assert finish_time - start_time < 10
+
+        cluster.stop_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
+
+        start_time = time.time()
+        with pytest.raises(QueryRuntimeException):
+            node1.query(
+                "INSERT INTO zk_down SELECT number, toString(number) FROM numbers(10) SETTINGS insert_keeper_max_retries=10000, insert_keeper_retry_max_backoff_ms=1000, max_execution_time=1"
+            )
+        finish_time = time.time()
+        assert finish_time - start_time < 10
+    finally:
+        cluster.start_zookeeper_nodes(["zoo1", "zoo2", "zoo3"])
+        node1.query("DROP TABLE IF EXISTS zk_down SYNC")

From b774310dc9772259525cc0588e37212c23be03bf Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Tue, 28 Mar 2023 14:39:36 +0200
Subject: [PATCH 020/277] keep query id for logging when optimize query

---
 src/Common/ThreadStatus.h                        |  2 +-
 src/Interpreters/ThreadStatusExt.cpp             |  7 +++----
 src/Storages/MergeTree/MergeFromLogEntryTask.cpp |  6 +++++-
 src/Storages/MergeTree/MergeList.cpp             |  4 +---
 .../MergeTree/MergePlainMergeTreeTask.cpp        | 15 ++++++++++++++-
 src/Storages/MergeTree/MergePlainMergeTreeTask.h |  4 ++++
 .../MergeTree/MutateFromLogEntryTask.cpp         | 14 +++++++-------
 src/Storages/MergeTree/MutateFromLogEntryTask.h  |  1 -
 .../MergeTree/MutatePlainMergeTreeTask.cpp       | 16 +++++++++++-----
 .../MergeTree/MutatePlainMergeTreeTask.h         |  6 ++++--
 .../MergeTree/ReplicatedMergeMutateTaskBase.h    |  1 +
 .../01641_memory_tracking_insert_optimize.sql    |  4 ++--
 12 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index 87ff1082e88..0da7e5fdd98 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -105,7 +105,7 @@ public:
     /// When new query starts, new thread group is created for it, current thread becomes master thread of the query
     static ThreadGroupStatusPtr createForQuery(ContextPtr query_context_, FatalErrorCallback fatal_error_callback_ = {});
 
-    static ThreadGroupStatusPtr createForBackgroundProcess(ContextPtr storage_context, const char * description);
+    static ThreadGroupStatusPtr createForBackgroundProcess(ContextPtr storage_context);
 
     std::vector<UInt64> getInvolvedThreadIds() const;
     void linkThread(UInt64 thread_it);
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 7e2001cb521..c9ad81c7fa3 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -73,7 +73,7 @@ ThreadGroupStatusPtr ThreadGroupStatus::createForQuery(ContextPtr query_context_
     return group;
 }
 
-ThreadGroupStatusPtr ThreadGroupStatus::createForBackgroundProcess(ContextPtr storage_context, const char * description)
+ThreadGroupStatusPtr ThreadGroupStatus::createForBackgroundProcess(ContextPtr storage_context)
 {
     /// Only for the case optimize query
     /// Push the counters to the upper process level counters
@@ -81,13 +81,12 @@ ThreadGroupStatusPtr ThreadGroupStatus::createForBackgroundProcess(ContextPtr st
     while (p_counters && p_counters->level != VariableContext::Process)
         p_counters = p_counters->getParent();
 
-    auto group = std::make_shared<ThreadGroupStatus>();
+    auto group = std::make_shared<ThreadGroupStatus>(storage_context);
     if (p_counters)
         group->performance_counters.setParent(p_counters);
 
-    group->memory_tracker.setDescription(description);
+    group->memory_tracker.setDescription("background process to apply mutate/merge in table");
 
-    /// No query context for background process
     /// However settings from storage context have to be applied
     const Settings & settings = storage_context->getSettingsRef();
     group->memory_tracker.setProfilerStep(settings.memory_profiler_step);
diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
index e7673ee4365..e21dbd135f4 100644
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@@ -260,11 +260,15 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
 
     auto table_id = storage.getStorageID();
 
+    fake_query_context = Context::createCopy(storage.getContext());
+    fake_query_context->makeQueryContext();
+    fake_query_context->setCurrentQueryId("");
+
     /// Add merge to list
     merge_mutate_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_merged_part,
-        storage.getContext());
+        fake_query_context);
 
     transaction_ptr = std::make_unique<MergeTreeData::Transaction>(storage, NO_TRANSACTION_RAW);
     stopwatch_ptr = std::make_unique<Stopwatch>();
diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index f37df3ca99b..7970efd1a20 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -39,11 +39,9 @@ MergeListElement::MergeListElement(
     , result_part_path{future_part->path}
     , result_part_info{future_part->part_info}
     , num_parts{future_part->parts.size()}
-    , query_id(table_id.getShortName() + "::" + result_part_name)
     , thread_id{getThreadId()}
     , merge_type{future_part->merge_type}
     , merge_algorithm{MergeAlgorithm::Undecided}
-    , description{"to apply mutate/merge in " + query_id}
 {
     for (const auto & source_part : future_part->parts)
     {
@@ -61,7 +59,7 @@ MergeListElement::MergeListElement(
         is_mutation = (result_part_info.getDataVersion() != source_data_version);
     }
 
-    thread_group = ThreadGroupStatus::createForBackgroundProcess(context, description.c_str());
+    thread_group = ThreadGroupStatus::createForBackgroundProcess(context);
 }
 
 MergeInfo MergeListElement::getInfo() const
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
index 0181d0d1d12..a31496d3021 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@@ -34,8 +34,11 @@ bool MergePlainMergeTreeTask::executeStep()
     /// Make out memory tracker a parent of current thread memory tracker
     std::optional<ThreadGroupSwitcher> switcher;
     if (merge_list_entry)
+    {
         switcher.emplace((*merge_list_entry)->thread_group);
 
+    }
+
     switch (state)
     {
         case State::NEED_PREPARE :
@@ -81,10 +84,11 @@ void MergePlainMergeTreeTask::prepare()
     future_part = merge_mutate_entry->future_part;
     stopwatch_ptr = std::make_unique<Stopwatch>();
 
+    fake_query_context = createFakeQueryContext();
     merge_list_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_part,
-        storage.getContext());
+        fake_query_context);
 
     write_part_log = [this] (const ExecutionStatus & execution_status)
     {
@@ -130,4 +134,13 @@ void MergePlainMergeTreeTask::finish()
     storage.incrementMergedPartsProfileEvent(new_part->getType());
 }
 
+ContextMutablePtr MergePlainMergeTreeTask::createFakeQueryContext() const
+{
+    auto context = Context::createCopy(storage.getContext());
+    context->makeQueryContext();
+    auto queryId = storage.getStorageID().getShortName() + "::" + future_part->name;
+    context->setCurrentQueryId(std::move(queryId));
+    return context;
+}
+
 }
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h
index 583ea2e11ae..00217401baf 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h
@@ -87,6 +87,10 @@ private:
     MergeTreeTransactionPtr txn;
 
     ProfileEvents::Counters profile_counters;
+
+    ContextMutablePtr fake_query_context;
+
+    ContextMutablePtr createFakeQueryContext() const;
 };
 
 
diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
index bfa241e0c47..c57d812e371 100644
--- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
@@ -164,17 +164,17 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare()
         }
     }
 
-    merge_mutate_entry = storage.getContext()->getMergeList().insert(
-        storage.getStorageID(),
-        future_mutated_part,
-        storage.getContext());
-
-    stopwatch_ptr = std::make_unique<Stopwatch>();
-
     fake_query_context = Context::createCopy(storage.getContext());
     fake_query_context->makeQueryContext();
     fake_query_context->setCurrentQueryId("");
 
+    merge_mutate_entry = storage.getContext()->getMergeList().insert(
+        storage.getStorageID(),
+        future_mutated_part,
+        fake_query_context);
+
+    stopwatch_ptr = std::make_unique<Stopwatch>();
+
     mutate_task = storage.merger_mutator.mutatePartToTemporaryPart(
             future_mutated_part, metadata_snapshot, commands, merge_mutate_entry.get(),
             entry.create_time, fake_query_context, NO_TRANSACTION_PTR, reserved_space, table_lock_holder);
diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.h b/src/Storages/MergeTree/MutateFromLogEntryTask.h
index c823df3b999..2a2cc308f85 100644
--- a/src/Storages/MergeTree/MutateFromLogEntryTask.h
+++ b/src/Storages/MergeTree/MutateFromLogEntryTask.h
@@ -55,7 +55,6 @@ private:
     MergeTreeData::MutableDataPartPtr new_part{nullptr};
     FutureMergedMutatedPartPtr future_mutated_part{nullptr};
 
-    ContextMutablePtr fake_query_context;
     MutateTaskPtr mutate_task;
 };
 
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index d582fa31af5..ebe2aeecdc8 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -29,10 +29,11 @@ void MutatePlainMergeTreeTask::prepare()
 {
     future_part = merge_mutate_entry->future_part;
 
+    fake_query_context = createFakeQueryContext();
     merge_list_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_part,
-        storage.getContext());
+        fake_query_context);
 
     stopwatch = std::make_unique<Stopwatch>();
 
@@ -51,10 +52,6 @@ void MutatePlainMergeTreeTask::prepare()
             std::move(profile_counters_snapshot));
     };
 
-    fake_query_context = Context::createCopy(storage.getContext());
-    fake_query_context->makeQueryContext();
-    fake_query_context->setCurrentQueryId("");
-
     mutate_task = storage.merger_mutator.mutatePartToTemporaryPart(
             future_part, metadata_snapshot, merge_mutate_entry->commands, merge_list_entry.get(),
             time(nullptr), fake_query_context, merge_mutate_entry->txn, merge_mutate_entry->tagger->reserved_space, table_lock_holder);
@@ -129,4 +126,13 @@ bool MutatePlainMergeTreeTask::executeStep()
     return false;
 }
 
+ContextMutablePtr MutatePlainMergeTreeTask::createFakeQueryContext() const
+{
+    auto context = Context::createCopy(storage.getContext());
+    context->makeQueryContext();
+    auto queryId = storage.getStorageID().getShortName() + "::" + future_part->name;
+    context->setCurrentQueryId(std::move(queryId));
+    return context;
+}
+
 }
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h
index ae2ac039543..74945ca26fd 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h
@@ -74,11 +74,13 @@ private:
     std::function<void(const ExecutionStatus & execution_status)> write_part_log;
 
     IExecutableTask::TaskResultCallback task_result_callback;
-
-    ContextMutablePtr fake_query_context;
     MutateTaskPtr mutate_task;
 
     ProfileEvents::Counters profile_counters;
+
+    ContextMutablePtr fake_query_context;
+
+    ContextMutablePtr createFakeQueryContext() const;
 };
 
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
index d9a1cbff166..0de8caac60f 100644
--- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
+++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
@@ -62,6 +62,7 @@ protected:
     StorageReplicatedMergeTree & storage;
     /// ProfileEvents for current part will be stored here
     ProfileEvents::Counters profile_counters;
+    ContextMutablePtr fake_query_context;
 
 private:
     enum class CheckExistingPartResult
diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
index 363f88c5ec9..5791a72dcb3 100644
--- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
+++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
@@ -13,9 +13,9 @@ create table data_01641 (key Int, value String) engine=MergeTree order by (key,
 SET max_block_size = 1000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0;
 insert into data_01641 select number, toString(number) from numbers(120000);
 
--- Definitely should fail and it proves that memory is tracked in OPTIMIZE query.
 set max_memory_usage='10Mi', max_untracked_memory=0;
 
-optimize table data_01641 final; -- { serverError 241 }
+-- It fails iif memory is tracked in OPTIMIZE query, but it doesn't. OPTIMIZE query doesn't rely on query context.
+optimize table data_01641 final;
 
 drop table data_01641;

From a03bf8568ad86d37d48732aee87fd65f41086d17 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Tue, 28 Mar 2023 17:03:35 +0000
Subject: [PATCH 021/277] implement
 use_structure_from_insertion_table_in_table_functions=auto

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 85 ++++++++++++++++++----
 src/Interpreters/Context.cpp              | 89 +++++++++++++++++++----
 2 files changed, 146 insertions(+), 28 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 87891cf148e..a622d274b09 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -3,6 +3,7 @@
 #include <Common/checkStackSize.h>
 #include <Common/NamePrompter.h>
 #include <Common/ProfileEvents.h>
+#include "Analyzer/Identifier.h"
 
 #include <IO/WriteBuffer.h>
 #include <IO/WriteHelpers.h>
@@ -6108,8 +6109,9 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                 table_function_name);
     }
 
+    unsigned long use_structure_from_insertion_table_in_table_functions = scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions;
     if (!nested_table_function &&
-        scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions &&
+        use_structure_from_insertion_table_in_table_functions &&
         scope_context->hasInsertionTable() &&
         table_function_ptr->needStructureHint())
     {
@@ -6119,11 +6121,29 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
             const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
             DB::ColumnsDescription structure_hint;
 
+            bool use_columns_from_insert_query = true;
+
+            auto find_identifier = [](const FunctionNode & function) -> bool
+            {
+                auto find_identifier_impl = [](const FunctionNode & function, auto && self) -> bool
+                {
+                    for (const auto & argument : function.getArguments())
+                    {
+                        if (argument->as<IdentifierNode>())
+                            return true;
+                        if (const auto * f = argument->as<FunctionNode>(); f && self(*f, self))
+                            return true;
+                    }
+                    return false;
+                };
+                return find_identifier_impl(function, find_identifier_impl);
+            };
+
             /// Insert table matches columns against SELECT expression by position, so we want to map
             /// insert table columns to table function columns through names from SELECT expression.
 
             auto insert_column = insert_structure.begin();
-            auto insert_structure_end = insert_structure.end();
+            auto insert_structure_end = insert_structure.end();  /// end iterator of the range covered by possible asterisk
             auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
             bool asterisk = false;
             const auto & expression_list = scope.scope_node->as<QueryNode &>().getProjection();
@@ -6133,10 +6153,17 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
             {
                 if (auto * identifier_node = (*expression)->as<IdentifierNode>())
                 {
+
                     if (!virtual_column_names.contains(identifier_node->getIdentifier().getFullName()))
                     {
                         if (asterisk)
-                            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+                        {
+                            if (use_structure_from_insertion_table_in_table_functions == 1)
+                                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+
+                            use_columns_from_insert_query = false;
+                            break;
+                        }
 
                         structure_hint.add({ identifier_node->getIdentifier().getFullName(), insert_column->type });
                     }
@@ -6149,12 +6176,37 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                 else if (auto * matcher_node = (*expression)->as<MatcherNode>(); matcher_node && matcher_node->getMatcherType() == MatcherNodeType::ASTERISK)
                 {
                     if (asterisk)
-                        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only one asterisk can be used in INSERT SELECT query.");
+                    {
+                        if (use_structure_from_insertion_table_in_table_functions == 1)
+                            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only one asterisk can be used in INSERT SELECT query.");
+
+                        use_columns_from_insert_query = false;
+                        break;
+                    }
                     if (!structure_hint.empty())
-                        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+                    {
+                        if (use_structure_from_insertion_table_in_table_functions == 1)
+                            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+
+                        use_columns_from_insert_query = false;
+                        break;
+                    }
 
                     asterisk = true;
                 }
+                else if (auto * function = (*expression)->as<FunctionNode>())
+                {
+                    if (use_structure_from_insertion_table_in_table_functions == 2 && find_identifier(*function))
+                    {
+                        use_columns_from_insert_query = false;
+                        break;
+                    }
+
+                    if (asterisk)
+                        --insert_structure_end;
+                    else
+                        ++insert_column;
+                }
                 else
                 {
                     if (asterisk)
@@ -6164,17 +6216,22 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                 }
             }
 
-            if (expression != expression_list.end())
-                throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
-
-            if (asterisk)
+            if (use_columns_from_insert_query)
             {
-                for (; insert_column != insert_structure_end; ++insert_column)
-                    structure_hint.add({ insert_column->name, insert_column->type });
-            }
+                if (expression == expression_list.end())
+                {
+                    if (asterisk)
+                    {
+                        for (; insert_column != insert_structure_end; ++insert_column)
+                            structure_hint.add({ insert_column->name, insert_column->type });
+                    }
 
-            if (!structure_hint.empty())
-                table_function_ptr->setStructureHint(structure_hint);
+                    if (!structure_hint.empty())
+                        table_function_ptr->setStructureHint(structure_hint);
+
+                } else if (use_structure_from_insertion_table_in_table_functions == 1)
+                    throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
+            }
         }
     }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index c787d387632..acdfb0a979f 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1394,16 +1394,41 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             }
             throw;
         }
-        if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
+
+        unsigned long use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
+        if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
         {
             const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
             DB::ColumnsDescription structure_hint;
 
+            bool use_columns_from_insert_query = true;
+
+            auto find_identifier = [](const ASTFunction * function) -> bool
+            {
+                auto find_identifier_impl = [](const ASTFunction * function, auto && self) -> bool
+                {
+                    if (!function || !function->arguments)
+                        return false;
+                    if (const auto * arguments = function->arguments->as<ASTExpressionList>())
+                    {
+                        for (const auto & argument : arguments->children)
+                        {
+                            if (argument->as<ASTIdentifier>())
+                                return true;
+                            if (const auto * f = argument->as<ASTFunction>(); f && self(f, self))
+                                return true;
+                        }
+                    }
+                    return false;
+                };
+                return find_identifier_impl(function, find_identifier_impl);
+            };
+
             /// Insert table matches columns against SELECT expression by position, so we want to map
             /// insert table columns to table function columns through names from SELECT expression.
 
             auto insert_column = insert_structure.begin();
-            auto insert_structure_end = insert_structure.end();
+            auto insert_structure_end = insert_structure.end();  /// end iterator of the range covered by possible asterisk
             auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
             bool asterisk = false;
             const auto & expression_list = select_query_hint->select()->as<ASTExpressionList>()->children;
@@ -1416,7 +1441,13 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                     if (!virtual_column_names.contains(identifier->name()))
                     {
                         if (asterisk)
-                            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+                        {
+                            if (use_structure_from_insertion_table_in_table_functions == 1)
+                                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+
+                            use_columns_from_insert_query = false;
+                            break;
+                        }
 
                         structure_hint.add({ identifier->name(), insert_column->type });
                     }
@@ -1429,12 +1460,37 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                 else if ((*expression)->as<ASTAsterisk>())
                 {
                     if (asterisk)
-                        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only one asterisk can be used in INSERT SELECT query.");
+                    {
+                        if (use_structure_from_insertion_table_in_table_functions == 1)
+                            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only one asterisk can be used in INSERT SELECT query.");
+
+                        use_columns_from_insert_query = false;
+                        break;
+                    }
                     if (!structure_hint.empty())
-                        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+                    {
+                        if (use_structure_from_insertion_table_in_table_functions == 1)
+                            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Asterisk cannot be mixed with column list in INSERT SELECT query.");
+
+                        use_columns_from_insert_query = false;
+                        break;
+                    }
 
                     asterisk = true;
                 }
+                else if (auto * function = (*expression)->as<ASTFunction>())
+                {
+                    if (use_structure_from_insertion_table_in_table_functions == 2 && find_identifier(function))
+                    {
+                        use_columns_from_insert_query = false;
+                        break;
+                    }
+
+                    if (asterisk)
+                        --insert_structure_end;
+                    else
+                        ++insert_column;
+                }
                 else
                 {
                     if (asterisk)
@@ -1444,17 +1500,22 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                 }
             }
 
-            if (expression != expression_list.end())
-                throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
-
-            if (asterisk)
+            if (use_columns_from_insert_query)
             {
-                for (; insert_column != insert_structure_end; ++insert_column)
-                    structure_hint.add({ insert_column->name, insert_column->type });
-            }
+                if (expression == expression_list.end())
+                {
+                    if (asterisk)
+                    {
+                        for (; insert_column != insert_structure_end; ++insert_column)
+                            structure_hint.add({ insert_column->name, insert_column->type });
+                    }
 
-            if (!structure_hint.empty())
-                table_function_ptr->setStructureHint(structure_hint);
+                    if (!structure_hint.empty())
+                        table_function_ptr->setStructureHint(structure_hint);
+
+                } else if (use_structure_from_insertion_table_in_table_functions == 1)
+                    throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns in insert table less than required by SELECT expression.");
+            }
         }
 
         res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());

From e7ff6e85c2e5dc01d86c4a99499475152d7bf061 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 28 Mar 2023 17:57:23 +0000
Subject: [PATCH 022/277] Support Enum output/input in BSONEachRow, allow all
 map key types and avoid extra calculations

---
 docs/en/interfaces/formats.md                 | 42 +++++++++----------
 .../Impl/BSONEachRowRowOutputFormat.cpp       |  3 +-
 .../Formats/Impl/BSONEachRowRowOutputFormat.h |  6 +--
 .../02593_bson_more_types.reference           |  5 +++
 .../0_stateless/02593_bson_more_types.sh      | 15 +++++++
 5 files changed, 46 insertions(+), 25 deletions(-)
 create mode 100644 tests/queries/0_stateless/02593_bson_more_types.reference
 create mode 100755 tests/queries/0_stateless/02593_bson_more_types.sh

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 3debea0087e..0d444916bdd 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -1235,8 +1235,8 @@ For output it uses the following correspondence between ClickHouse types and BSO
 | ClickHouse type                                                                                                       | BSON Type                                                                                                     |
 |-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
 | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                  | `\x08` boolean                                                                                                |
-| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                           | `\x10` int32                                                                                                  |
-| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md)                                                          | `\x10` int32                                                                                                  |
+| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md)        | `\x10` int32                                                                                                  |
+| [Int16/UInt16(/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md)      | `\x10` int32                                                                                                  |
 | [Int32](/docs/en/sql-reference/data-types/int-uint.md)                                                                | `\x10` int32                                                                                                  |
 | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)                                                               | `\x12` int64                                                                                                  |
 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                         | `\x12` int64                                                                                                  |
@@ -1255,30 +1255,30 @@ For output it uses the following correspondence between ClickHouse types and BSO
 | [Array](/docs/en/sql-reference/data-types/array.md)                                                                   | `\x04` array                                                                                                  |
 | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                   | `\x04` array                                                                                                  |
 | [Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                             | `\x03` document                                                                                               |
-| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys)                                                    | `\x03` document                                                                                               |
+| [Map](/docs/en/sql-reference/data-types/map.md)                                                                       | `\x03` document                                                                                               |
 | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                             | `\x10` int32                                                                                                  |
 | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                             | `\x05` binary, `\x00` binary subtype                                                                          |
 
 For input it uses the following correspondence between BSON types and ClickHouse types:
 
-| BSON Type                                | ClickHouse Type                                                                                                                                                                       |
-|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `\x01` double                            | [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                                                                                         |
-| `\x02` string                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x03` document                          | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                             |
-| `\x04` array                             | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                               |
-| `\x05` binary, `\x00` binary subtype     | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       |
-| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x05` binary, `\x03` old uuid subtype   | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                     |
-| `\x05` binary, `\x04` uuid subtype       | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                     |
-| `\x07` ObjectId                          | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x08` boolean                           | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                                                                                  |
-| `\x09` datetime                          | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                                                                                         |
-| `\x0A` null value                        | [NULL](/docs/en/sql-reference/data-types/nullable.md)                                                                                                                                 |
-| `\x0D` JavaScript code                   | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x0E` symbol                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)     |
-| `\x12` int64                             | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
+| BSON Type                                | ClickHouse Type                                                                                                                                                                                                                             |
+|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `\x01` double                            | [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                                                                                                                                               |
+| `\x02` string                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x03` document                          | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                                                                                   |
+| `\x04` array                             | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                                                                                     |
+| `\x05` binary, `\x00` binary subtype     | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                             |
+| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x05` binary, `\x03` old uuid subtype   | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                                                                           |
+| `\x05` binary, `\x04` uuid subtype       | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                                                                           |
+| `\x07` ObjectId                          | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x08` boolean                           | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                                                                                                                                        |
+| `\x09` datetime                          | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                                                                                                                                               |
+| `\x0A` null value                        | [NULL](/docs/en/sql-reference/data-types/nullable.md)                                                                                                                                                                                       |
+| `\x0D` JavaScript code                   | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x0E` symbol                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)/[Enum8/Enum16](/docs/en/sql-reference/data-types/enum.md) |
+| `\x12` int64                             | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                       |
 
 Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
 Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value.
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
index 9093f72e2b1..57ddbf56d38 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
@@ -39,7 +39,8 @@ static String toValidUTF8String(const String & name)
     WriteBufferValidUTF8 validating_buf(buf);
     writeString(name, validating_buf);
     validating_buf.finalize();
-    return buf.str();
+    /// Return value without quotes
+    return buf.str().substr(1, buf.str().size() - 2);
 }
 
 BSONEachRowRowOutputFormat::BSONEachRowRowOutputFormat(
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
index d6fcd38c841..324691a9937 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
@@ -17,8 +17,8 @@ namespace DB
  *
  * ClickHouse type         | BSON Type
  * Bool                    | \x08 boolean
- * Int8/UInt8              | \x10 int32
- * Int16UInt16             | \x10 int32
+ * Int8/UInt8/Enum8        | \x10 int32
+ * Int16UInt16/Enum16      | \x10 int32
  * Int32                   | \x10 int32
  * UInt32                  | \x12 int64
  * Int64                   | \x12 int64
@@ -38,7 +38,7 @@ namespace DB
  * Array                   | \x04 array
  * Tuple                   | \x04 array
  * Named Tuple             | \x03 document
- * Map (with String keys)  | \x03 document
+ * Map                     | \x03 document
  *
  * Note: on Big-Endian platforms this format will not work properly.
  */
diff --git a/tests/queries/0_stateless/02593_bson_more_types.reference b/tests/queries/0_stateless/02593_bson_more_types.reference
new file mode 100644
index 00000000000..e84b3c8efb8
--- /dev/null
+++ b/tests/queries/0_stateless/02593_bson_more_types.reference
@@ -0,0 +1,5 @@
+{'a\\u0000b':42}
+c1	Nullable(Int32)					
+c2	Nullable(Int32)					
+c3	Map(String, Nullable(Int32))					
+a	b	{42:42}
diff --git a/tests/queries/0_stateless/02593_bson_more_types.sh b/tests/queries/0_stateless/02593_bson_more_types.sh
new file mode 100755
index 00000000000..cd7b9e0aaf3
--- /dev/null
+++ b/tests/queries/0_stateless/02593_bson_more_types.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+
+$CLICKHOUSE_LOCAL -q "select map('a\0b', 42) as c1 format BSONEachRow" | $CLICKHOUSE_LOCAL --input-format BSONEachRow --table test --structure "c1 Map(String, UInt32)" -q "select * from test"
+
+$CLICKHOUSE_LOCAL -q "select 'a'::Enum8('a' = 1) as c1, 'b'::Enum16('b' = 1) as c2, map(42, 42) as c3 format BSONEachRow" | $CLICKHOUSE_LOCAL --input-format BSONEachRow --table test -q "desc test"
+
+$CLICKHOUSE_LOCAL -q "select 'a'::Enum8('a' = 1) as c1, 'b'::Enum16('b' = 1) as c2, map(42, 42) as c3 format BSONEachRow" | $CLICKHOUSE_LOCAL --input-format BSONEachRow --table test --structure "c1 Enum8('a' = 1), c2 Enum16('b' = 1), c3 Map(UInt32, UInt32)" -q "select * from test"
+
+

From 04770cd444afc77e1f75022b3efd3756360d03f4 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 28 Mar 2023 17:58:29 +0000
Subject: [PATCH 023/277] Better

---
 .../Impl/BSONEachRowRowOutputFormat.cpp       | 114 +++++++++---------
 1 file changed, 56 insertions(+), 58 deletions(-)

diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
index 57ddbf56d38..2bb5410781c 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
@@ -33,11 +33,11 @@ namespace ErrorCodes
 }
 
 /// In BSON all names should be valid UTF8 sequences
-static String toValidUTF8String(const String & name)
+static String toValidUTF8String(const String & name, const FormatSettings & settings)
 {
     WriteBufferFromOwnString buf;
     WriteBufferValidUTF8 validating_buf(buf);
-    writeString(name, validating_buf);
+    writeJSONString(name, validating_buf, settings);
     validating_buf.finalize();
     /// Return value without quotes
     return buf.str().substr(1, buf.str().size() - 2);
@@ -50,7 +50,7 @@ BSONEachRowRowOutputFormat::BSONEachRowRowOutputFormat(
     const auto & sample = getPort(PortKind::Main).getHeader();
     fields.reserve(sample.columns());
     for (const auto & field : sample.getNamesAndTypes())
-        fields.emplace_back(toValidUTF8String(field.name), field.type);
+        fields.emplace_back(toValidUTF8String(field.name, settings), field.type);
 }
 
 static void writeBSONSize(size_t size, WriteBuffer & buf)
@@ -113,7 +113,7 @@ static void writeBSONBigInteger(const IColumn & column, size_t row_num, const St
     buf.write(data.data, data.size);
 }
 
-size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name)
+size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name, const String & path, std::unordered_map<String, size_t> & nested_document_sizes)
 {
     size_t size = 1; // Field type
     size += name.size() + 1; // Field name and \0
@@ -126,6 +126,8 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
         case TypeIndex::Date32: [[fallthrough]];
         case TypeIndex::Decimal32: [[fallthrough]];
         case TypeIndex::IPv4: [[fallthrough]];
+        case TypeIndex::Enum8: [[fallthrough]];
+        case TypeIndex::Enum16: [[fallthrough]];
         case TypeIndex::Int32:
         {
             return size + sizeof(Int32);
@@ -184,7 +186,7 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
             auto dict_type = assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType();
             auto dict_column = lc_column.getDictionary().getNestedColumn();
             size_t index = lc_column.getIndexAt(row_num);
-            return countBSONFieldSize(*dict_column, dict_type, index, name);
+            return countBSONFieldSize(*dict_column, dict_type, index, name, path, nested_document_sizes);
         }
         case TypeIndex::Nullable:
         {
@@ -192,11 +194,11 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
             const ColumnNullable & column_nullable = assert_cast<const ColumnNullable &>(column);
             if (column_nullable.isNullAt(row_num))
                 return size; /// Null has no value, just type
-            return countBSONFieldSize(column_nullable.getNestedColumn(), nested_type, row_num, name);
+            return countBSONFieldSize(column_nullable.getNestedColumn(), nested_type, row_num, name, path, nested_document_sizes);
         }
         case TypeIndex::Array:
         {
-            size += sizeof(BSONSizeT); // Size of a document
+            size_t document_size = sizeof(BSONSizeT); // Size of a document
 
             const auto & nested_type = assert_cast<const DataTypeArray *>(data_type.get())->getNestedType();
             const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
@@ -205,39 +207,41 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
             size_t offset = offsets[row_num - 1];
             size_t array_size = offsets[row_num] - offset;
 
+            String current_path = path + "." + name;
             for (size_t i = 0; i < array_size; ++i)
-                size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i)); // Add size of each value from array
+                document_size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i), current_path, nested_document_sizes); // Add size of each value from array
 
-            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+            nested_document_sizes[current_path] = document_size;
+            return size + document_size;
         }
         case TypeIndex::Tuple:
         {
-            size += sizeof(BSONSizeT); // Size of a document
+            size_t document_size = sizeof(BSONSizeT); // Size of a document
 
             const auto * tuple_type = assert_cast<const DataTypeTuple *>(data_type.get());
             const auto & nested_types = tuple_type->getElements();
-            bool have_explicit_names = tuple_type->haveExplicitNames();
             const auto & nested_names = tuple_type->getElementNames();
             const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
             const auto & nested_columns = tuple_column.getColumns();
 
+            String current_path = path + "." + name;
             for (size_t i = 0; i < nested_columns.size(); ++i)
             {
-                String key_name = have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i);
-                size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name); // Add size of each value from tuple
+                String key_name = toValidUTF8String(nested_names[i], settings);
+                document_size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name, current_path, nested_document_sizes); // Add size of each value from tuple
             }
 
-            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+            nested_document_sizes[current_path] = document_size;
+            return size + document_size;
         }
         case TypeIndex::Map:
         {
-            size += sizeof(BSONSizeT); // Size of a document
+            size_t document_size = sizeof(BSONSizeT); // Size of a document
 
             const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
-            if (!isStringOrFixedString(map_type.getKeyType()))
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                                "Only maps with String key type are supported in BSON, got key type: {}",
-                                map_type.getKeyType()->getName());
+            const auto & key_type = map_type.getKeyType();
             const auto & value_type = map_type.getValueType();
 
             const auto & map_column = assert_cast<const ColumnMap &>(column);
@@ -249,20 +253,26 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
             size_t offset = offsets[row_num - 1];
             size_t map_size = offsets[row_num] - offset;
 
+            WriteBufferFromOwnString buf;
+            String current_path = path + "." + name;
             for (size_t i = 0; i < map_size; ++i)
             {
-                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
-                size += countBSONFieldSize(*value_column, value_type, offset + i, key);
+                key_type->getDefaultSerialization()->serializeText(*key_column, offset + i, buf, settings);
+                auto s = countBSONFieldSize(*value_column, value_type, offset + i, toValidUTF8String(buf.str(), settings), current_path, nested_document_sizes);
+                document_size += s;
+                buf.restart();
             }
 
-            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+            nested_document_sizes[current_path] = document_size;
+            return size + document_size;
         }
         default:
             throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported in BSON output format", data_type->getName());
     }
 }
 
-void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name)
+void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name, const String & path, std::unordered_map<String, size_t> & nested_document_sizes)
 {
     switch (data_type->getTypeId())
     {
@@ -276,6 +286,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
             writeBSONNumber<ColumnFloat64, double>(BSONType::DOUBLE, column, row_num, name, out);
             break;
         }
+        case TypeIndex::Enum8: [[fallthrough]];
         case TypeIndex::Int8:
         {
             writeBSONNumber<ColumnInt8, Int32>(BSONType::INT32, column, row_num, name, out);
@@ -289,6 +300,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
                 writeBSONNumber<ColumnUInt8, Int32>(BSONType::INT32, column, row_num, name, out);
             break;
         }
+        case TypeIndex::Enum16: [[fallthrough]];
         case TypeIndex::Int16:
         {
             writeBSONNumber<ColumnInt16, Int32>(BSONType::INT32, column, row_num, name, out);
@@ -404,7 +416,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
             auto dict_type = assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType();
             auto dict_column = lc_column.getDictionary().getNestedColumn();
             size_t index = lc_column.getIndexAt(row_num);
-            serializeField(*dict_column, dict_type, index, name);
+            serializeField(*dict_column, dict_type, index, name, path, nested_document_sizes);
             break;
         }
         case TypeIndex::Nullable:
@@ -412,7 +424,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
             auto nested_type = removeNullable(data_type);
             const ColumnNullable & column_nullable = assert_cast<const ColumnNullable &>(column);
             if (!column_nullable.isNullAt(row_num))
-                serializeField(column_nullable.getNestedColumn(), nested_type, row_num, name);
+                serializeField(column_nullable.getNestedColumn(), nested_type, row_num, name, path, nested_document_sizes);
             else
                 writeBSONTypeAndKeyName(BSONType::NULL_VALUE, name, out);
             break;
@@ -428,15 +440,12 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
 
             writeBSONTypeAndKeyName(BSONType::ARRAY, name, out);
 
-            size_t document_size = sizeof(BSONSizeT);
-            for (size_t i = 0; i < array_size; ++i)
-                document_size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i)); // Add size of each value from array
-            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
-
+            String current_path = path + "." + name;
+            size_t document_size = nested_document_sizes[current_path];
             writeBSONSize(document_size, out);
 
             for (size_t i = 0; i < array_size; ++i)
-                serializeField(nested_column, nested_type, offset + i, std::to_string(i));
+                serializeField(nested_column, nested_type, offset + i, std::to_string(i), current_path, nested_document_sizes);
 
             writeChar(BSON_DOCUMENT_END, out);
             break;
@@ -445,26 +454,19 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
         {
             const auto * tuple_type = assert_cast<const DataTypeTuple *>(data_type.get());
             const auto & nested_types = tuple_type->getElements();
-            bool have_explicit_names = tuple_type->haveExplicitNames();
             const auto & nested_names = tuple_type->getElementNames();
             const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
             const auto & nested_columns = tuple_column.getColumns();
 
-            BSONType bson_type = have_explicit_names ? BSONType::DOCUMENT : BSONType::ARRAY;
+            BSONType bson_type =  tuple_type->haveExplicitNames() ? BSONType::DOCUMENT : BSONType::ARRAY;
             writeBSONTypeAndKeyName(bson_type, name, out);
 
-            size_t document_size = sizeof(BSONSizeT);
-            for (size_t i = 0; i < nested_columns.size(); ++i)
-            {
-                String key_name = have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i);
-                document_size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name); // Add size of each value from tuple
-            }
-            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
-
+            String current_path = path + "." + name;
+            size_t document_size = nested_document_sizes[current_path];
             writeBSONSize(document_size, out);
 
             for (size_t i = 0; i < nested_columns.size(); ++i)
-                serializeField(*nested_columns[i], nested_types[i], row_num, have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i));
+                serializeField(*nested_columns[i], nested_types[i], row_num, toValidUTF8String(nested_names[i], settings), current_path, nested_document_sizes);
 
             writeChar(BSON_DOCUMENT_END, out);
             break;
@@ -472,10 +474,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
         case TypeIndex::Map:
         {
             const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
-            if (!isStringOrFixedString(map_type.getKeyType()))
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                                "Only maps with String key type are supported in BSON, got key type: {}",
-                                map_type.getKeyType()->getName());
+            const auto & key_type = map_type.getKeyType();
             const auto & value_type = map_type.getValueType();
 
             const auto & map_column = assert_cast<const ColumnMap &>(column);
@@ -489,20 +488,16 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
 
             writeBSONTypeAndKeyName(BSONType::DOCUMENT, name, out);
 
-            size_t document_size = sizeof(BSONSizeT);
-            for (size_t i = 0; i < map_size; ++i)
-            {
-                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
-                document_size += countBSONFieldSize(*value_column, value_type, offset + i, key);
-            }
-            document_size += sizeof(BSON_DOCUMENT_END);
-
+            String current_path = path + "." + name;
+            size_t document_size = nested_document_sizes[current_path];
             writeBSONSize(document_size, out);
 
+            WriteBufferFromOwnString buf;
             for (size_t i = 0; i < map_size; ++i)
             {
-                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
-                serializeField(*value_column, value_type, offset + i, key);
+                key_type->getDefaultSerialization()->serializeText(*key_column, offset + i, buf, settings);
+                serializeField(*value_column, value_type, offset + i, toValidUTF8String(buf.str(), settings), current_path, nested_document_sizes);
+                buf.restart();
             }
 
             writeChar(BSON_DOCUMENT_END, out);
@@ -517,15 +512,18 @@ void BSONEachRowRowOutputFormat::write(const Columns & columns, size_t row_num)
 {
     /// We should calculate and write document size before its content
     size_t document_size = sizeof(BSONSizeT);
+    /// Remember calculated sizes for nested documents (map document path -> size), so we won't need
+    /// to recalculate it while serializing.
+    std::unordered_map<String, size_t> nested_document_sizes;
     for (size_t i = 0; i != columns.size(); ++i)
-        document_size += countBSONFieldSize(*columns[i], fields[i].type, row_num, fields[i].name);
+        document_size += countBSONFieldSize(*columns[i], fields[i].type, row_num, fields[i].name, "$", nested_document_sizes);
     document_size += sizeof(BSON_DOCUMENT_END);
 
     size_t document_start = out.count();
     writeBSONSize(document_size, out);
 
     for (size_t i = 0; i != columns.size(); ++i)
-        serializeField(*columns[i], fields[i].type, row_num, fields[i].name);
+        serializeField(*columns[i], fields[i].type, row_num, fields[i].name, "$", nested_document_sizes);
 
     writeChar(BSON_DOCUMENT_END, out);
 

From 2321530bb072e0667bc90d93410b75e0bcae775e Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 28 Mar 2023 18:50:30 +0000
Subject: [PATCH 024/277] Fix

---
 .../Formats/Impl/BSONEachRowRowInputFormat.cpp   | 13 +++++++------
 .../Formats/Impl/BSONEachRowRowOutputFormat.h    | 16 ++++++++++++++--
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
index 9e3a4a85c9a..370cddd2dcb 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@@ -446,11 +446,6 @@ void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & da
 
     const auto * data_type_map = assert_cast<const DataTypeMap *>(data_type.get());
     const auto & key_data_type = data_type_map->getKeyType();
-    if (!isStringOrFixedString(key_data_type))
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                        "Only maps with String key type are supported in BSON, got key type: {}",
-                        key_data_type->getName());
-
     const auto & value_data_type = data_type_map->getValueType();
     auto & column_map = assert_cast<ColumnMap &>(column);
     auto & key_column = column_map.getNestedData().getColumn(0);
@@ -464,7 +459,8 @@ void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & da
     {
         auto nested_bson_type = getBSONType(readBSONType(*in));
         auto name = readBSONKeyName(*in, current_key_name);
-        key_column.insertData(name.data, name.size);
+        ReadBufferFromMemory buf(name.data, name.size);
+        key_data_type->getDefaultSerialization()->deserializeWholeText(key_column, buf, format_settings);
         readField(value_column, value_data_type, nested_bson_type);
     }
 
@@ -511,6 +507,7 @@ bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr &
             lc_column.insertFromFullColumn(*tmp_column, 0);
             return res;
         }
+        case TypeIndex::Enum8: [[fallthrough]];
         case TypeIndex::Int8:
         {
             readAndInsertInteger<Int8>(*in, column, data_type, bson_type);
@@ -521,6 +518,7 @@ bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr &
             readAndInsertInteger<UInt8>(*in, column, data_type, bson_type);
             return true;
         }
+        case TypeIndex::Enum16: [[fallthrough]];
         case TypeIndex::Int16:
         {
             readAndInsertInteger<Int16>(*in, column, data_type, bson_type);
@@ -1008,6 +1006,9 @@ fileSegmentationEngineBSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t
                 "the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely BSON is malformed",
                 min_bytes, document_size);
 
+        if (document_size < sizeof(document_size))
+            throw ParsingException(ErrorCodes::INCORRECT_DATA, "Size of BSON document is invalid");
+
         size_t old_size = memory.size();
         memory.resize(old_size + document_size);
         unalignedStore<BSONSizeT>(memory.data() + old_size, document_size);
diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
index 324691a9937..d885cd497af 100644
--- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
@@ -55,12 +55,24 @@ private:
     void write(const Columns & columns, size_t row_num) override;
     void writeField(const IColumn &, const ISerialization &, size_t) override { }
 
-    void serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name);
+    void serializeField(
+        const IColumn & column,
+        const DataTypePtr & data_type,
+        size_t row_num,
+        const String & name,
+        const String & path,
+        std::unordered_map<String, size_t> & nested_document_sizes);
 
     /// Count field size in bytes that we will get after serialization in BSON format.
     /// It's needed to calculate document size before actual serialization,
     /// because in BSON format we should write the size of the document before its content.
-    size_t countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name);
+    size_t countBSONFieldSize(
+        const IColumn & column,
+        const DataTypePtr & data_type,
+        size_t row_num,
+        const String & name,
+        const String & path,
+        std::unordered_map<String, size_t> & nested_document_sizes);
 
     NamesAndTypes fields;
     FormatSettings settings;

From bc56c028587aed2b1326b1ccc1e40c5a26917dea Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 28 Mar 2023 20:02:42 +0000
Subject: [PATCH 025/277] Support more ClickHouse types in ORC/Arrow/Parquet
 formats

---
 docs/en/interfaces/formats.md                 | 165 +++++-----
 .../Formats/Impl/ArrowColumnToCHColumn.cpp    | 169 +++++++++-
 .../Formats/Impl/CHColumnToArrowColumn.cpp    | 303 ++++++++++--------
 .../Formats/Impl/ORCBlockOutputFormat.cpp     |  35 ++
 ...595_orc_arrow_parquet_more_types.reference |   5 +
 .../02595_orc_arrow_parquet_more_types.sh     |  17 +
 6 files changed, 467 insertions(+), 227 deletions(-)
 create mode 100644 tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.reference
 create mode 100755 tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.sh

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 3debea0087e..e55ca8c9d53 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -205,7 +205,7 @@ Differs from the `TabSeparated` format in that the column names are written in t
 
 During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness.
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -217,7 +217,7 @@ This format is also available under the name `TSVWithNames`.
 
 Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row.
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -470,7 +470,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
 
 Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -480,7 +480,7 @@ Otherwise, the first row will be skipped.
 
 Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -500,7 +500,7 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat
 
 Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -510,7 +510,7 @@ Otherwise, the first row will be skipped.
 
 Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -969,7 +969,7 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie
 
 Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -979,7 +979,7 @@ Otherwise, the first row will be skipped.
 
 Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -991,7 +991,7 @@ the types from input data will be compared with the types of the corresponding c
 
 Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1001,7 +1001,7 @@ Otherwise, the first row will be skipped.
 
 Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1120,7 +1120,7 @@ CREATE TABLE IF NOT EXISTS example_table
 -   If `input_format_defaults_for_omitted_fields = 0`, then the default value for `x` and `a` equals `0` (as the default value for the `UInt32` data type).
 -   If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`.
 
-:::warning
+:::note
 When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`.
 :::
 
@@ -1450,7 +1450,7 @@ Similar to [RowBinary](#rowbinary), but with added header:
 -   [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N)
 -   N `String`s specifying column names
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1464,7 +1464,7 @@ Similar to [RowBinary](#rowbinary), but with added header:
 -   N `String`s specifying column names
 -   N `String`s specifying column types
 
-:::warning
+:::note
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1915,7 +1915,7 @@ SET format_avro_schema_registry_url = 'http://schema-registry';
 SELECT * FROM topic1_stream;
 ```
 
-:::warning
+:::note
 Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine.
 :::
 
@@ -1927,30 +1927,31 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml`
 
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
 
-| Parquet data type (`INSERT`)                       | ClickHouse data type                                            | Parquet data type (`SELECT`) |
-|----------------------------------------------------|-----------------------------------------------------------------|------------------------------|
-| `BOOL`                                             | [Bool](/docs/en/sql-reference/data-types/boolean.md)            | `BOOL`                       |
-| `UINT8`, `BOOL`                                    | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)          | `UINT8`                      |
-| `INT8`                                             | [Int8](/docs/en/sql-reference/data-types/int-uint.md)           | `INT8`                       |
-| `UINT16`                                           | [UInt16](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT16`                     |
-| `INT16`                                            | [Int16](/docs/en/sql-reference/data-types/int-uint.md)          | `INT16`                      |
-| `UINT32`                                           | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT32`                     |
-| `INT32`                                            | [Int32](/docs/en/sql-reference/data-types/int-uint.md)          | `INT32`                      |
-| `UINT64`                                           | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT64`                     |
-| `INT64`                                            | [Int64](/docs/en/sql-reference/data-types/int-uint.md)          | `INT64`                      |
-| `FLOAT`                                            | [Float32](/docs/en/sql-reference/data-types/float.md)           | `FLOAT`                      |
-| `DOUBLE`                                           | [Float64](/docs/en/sql-reference/data-types/float.md)           | `DOUBLE`                     |
-| `DATE`                                             | [Date32](/docs/en/sql-reference/data-types/date.md)             | `DATE`                       |
-| `TIME (ms)`                                        | [DateTime](/docs/en/sql-reference/data-types/datetime.md)       | `UINT32`                     |
-| `TIMESTAMP`, `TIME (us, ns)`                       | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)   | `TIMESTAMP`                  |
-| `STRING`, `BINARY`                                 | [String](/docs/en/sql-reference/data-types/string.md)           | `BINARY`                     |
-| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY`      | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY`    |
-| `DECIMAL`                                          | [Decimal](/docs/en/sql-reference/data-types/decimal.md)         | `DECIMAL`                    |
-| `LIST`                                             | [Array](/docs/en/sql-reference/data-types/array.md)             | `LIST`                       |
-| `STRUCT`                                           | [Tuple](/docs/en/sql-reference/data-types/tuple.md)             | `STRUCT`                     |
-| `MAP`                                              | [Map](/docs/en/sql-reference/data-types/map.md)                 | `MAP`                        |
-| `UINT32`                                           | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)       | `UINT32`                     |
-| `FIXED_LENGTH_BYTE_ARRAY`                          | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       | `FIXED_LENGTH_BYTE_ARRAY`                     |
+| Parquet data type (`INSERT`)                  | ClickHouse data type                                                                                       | Parquet data type (`SELECT`)  |
+|-----------------------------------------------|------------------------------------------------------------------------------------------------------------|-------------------------------|
+| `BOOL`                                        | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                       | `BOOL`                        |
+| `UINT8`, `BOOL`                               | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                     | `UINT8`                       |
+| `INT8`                                        | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md)   | `INT8`                        |
+| `UINT16`                                      | [UInt16](/docs/en/sql-reference/data-types/int-uint.md)                                                    | `UINT16`                      |
+| `INT16`                                       | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16`                       |
+| `UINT32`                                      | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)                                                    | `UINT32`                      |
+| `INT32`                                       | [Int32](/docs/en/sql-reference/data-types/int-uint.md)                                                     | `INT32`                       |
+| `UINT64`                                      | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                    | `UINT64`                      |
+| `INT64`                                       | [Int64](/docs/en/sql-reference/data-types/int-uint.md)                                                     | `INT64`                       |
+| `FLOAT`                                       | [Float32](/docs/en/sql-reference/data-types/float.md)                                                      | `FLOAT`                       |
+| `DOUBLE`                                      | [Float64](/docs/en/sql-reference/data-types/float.md)                                                      | `DOUBLE`                      |
+| `DATE`                                        | [Date32](/docs/en/sql-reference/data-types/date.md)                                                        | `DATE`                        |
+| `TIME (ms)`                                   | [DateTime](/docs/en/sql-reference/data-types/datetime.md)                                                  | `UINT32`                      |
+| `TIMESTAMP`, `TIME (us, ns)`                  | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                              | `TIMESTAMP`                   |
+| `STRING`, `BINARY`                            | [String](/docs/en/sql-reference/data-types/string.md)                                                      | `BINARY`                      |
+| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                            | `FIXED_LENGTH_BYTE_ARRAY`     |
+| `DECIMAL`                                     | [Decimal](/docs/en/sql-reference/data-types/decimal.md)                                                    | `DECIMAL`                     |
+| `LIST`                                        | [Array](/docs/en/sql-reference/data-types/array.md)                                                        | `LIST`                        |
+| `STRUCT`                                      | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                        | `STRUCT`                      |
+| `MAP`                                         | [Map](/docs/en/sql-reference/data-types/map.md)                                                            | `MAP`                         |
+| `UINT32`                                      | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                  | `UINT32`                      |
+| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY`           | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                  | `FIXED_LENGTH_BYTE_ARRAY`     |
+| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY`           | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/domains/int-uint.md)                     | `FIXED_LENGTH_BYTE_ARRAY`     |
 
 Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
 
@@ -1996,31 +1997,32 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
 
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
 
-| Arrow data type (`INSERT`)              | ClickHouse data type                                            | Arrow data type (`SELECT`) |
-|-----------------------------------------|-----------------------------------------------------------------|----------------------------|
-| `BOOL`                                  | [Bool](/docs/en/sql-reference/data-types/boolean.md)            | `BOOL`                     |
-| `UINT8`, `BOOL`                         | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)          | `UINT8`                    |
-| `INT8`                                  | [Int8](/docs/en/sql-reference/data-types/int-uint.md)           | `INT8`                     |
-| `UINT16`                                | [UInt16](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT16`                   |
-| `INT16`                                 | [Int16](/docs/en/sql-reference/data-types/int-uint.md)          | `INT16`                    |
-| `UINT32`                                | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT32`                   |
-| `INT32`                                 | [Int32](/docs/en/sql-reference/data-types/int-uint.md)          | `INT32`                    |
-| `UINT64`                                | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)         | `UINT64`                   |
-| `INT64`                                 | [Int64](/docs/en/sql-reference/data-types/int-uint.md)          | `INT64`                    |
-| `FLOAT`, `HALF_FLOAT`                   | [Float32](/docs/en/sql-reference/data-types/float.md)           | `FLOAT32`                  |
-| `DOUBLE`                                | [Float64](/docs/en/sql-reference/data-types/float.md)           | `FLOAT64`                  |
-| `DATE32`                                | [Date32](/docs/en/sql-reference/data-types/date32.md)           | `UINT16`                   |
-| `DATE64`                                | [DateTime](/docs/en/sql-reference/data-types/datetime.md)       | `UINT32`                   |
-| `TIMESTAMP`, `TIME32`, `TIME64`         | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)   | `UINT32`                   |
-| `STRING`, `BINARY`                      | [String](/docs/en/sql-reference/data-types/string.md)           | `BINARY`                   |
-| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY`        |
-| `DECIMAL`                               | [Decimal](/docs/en/sql-reference/data-types/decimal.md)         | `DECIMAL`                  |
-| `DECIMAL256`                            | [Decimal256](/docs/en/sql-reference/data-types/decimal.md)      | `DECIMAL256`               |
-| `LIST`                                  | [Array](/docs/en/sql-reference/data-types/array.md)             | `LIST`                     |
-| `STRUCT`                                | [Tuple](/docs/en/sql-reference/data-types/tuple.md)             | `STRUCT`                   |
-| `MAP`                                   | [Map](/docs/en/sql-reference/data-types/map.md)                 | `MAP`                      |
-| `UINT32`                                | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)       | `UINT32`                   |
-| `FIXED_SIZE_BINARY`, `BINARY`           | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       | `FIXED_SIZE_BINARY`        |
+| Arrow data type (`INSERT`)              | ClickHouse data type                                                                                       | Arrow data type (`SELECT`) |
+|-----------------------------------------|------------------------------------------------------------------------------------------------------------|----------------------------|
+| `BOOL`                                  | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                       | `BOOL`                     |
+| `UINT8`, `BOOL`                         | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                     | `UINT8`                    |
+| `INT8`                                  | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md)   | `INT8`                     |
+| `UINT16`                                | [UInt16](/docs/en/sql-reference/data-types/int-uint.md)                                                    | `UINT16`                   |
+| `INT16`                                 | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16`                    |
+| `UINT32`                                | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)                                                    | `UINT32`                   |
+| `INT32`                                 | [Int32](/docs/en/sql-reference/data-types/int-uint.md)                                                     | `INT32`                    |
+| `UINT64`                                | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                    | `UINT64`                   |
+| `INT64`                                 | [Int64](/docs/en/sql-reference/data-types/int-uint.md)                                                     | `INT64`                    |
+| `FLOAT`, `HALF_FLOAT`                   | [Float32](/docs/en/sql-reference/data-types/float.md)                                                      | `FLOAT32`                  |
+| `DOUBLE`                                | [Float64](/docs/en/sql-reference/data-types/float.md)                                                      | `FLOAT64`                  |
+| `DATE32`                                | [Date32](/docs/en/sql-reference/data-types/date32.md)                                                      | `UINT16`                   |
+| `DATE64`                                | [DateTime](/docs/en/sql-reference/data-types/datetime.md)                                                  | `UINT32`                   |
+| `TIMESTAMP`, `TIME32`, `TIME64`         | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                              | `UINT32`                   |
+| `STRING`, `BINARY`                      | [String](/docs/en/sql-reference/data-types/string.md)                                                      | `BINARY`                   |
+| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                            | `FIXED_SIZE_BINARY`        |
+| `DECIMAL`                               | [Decimal](/docs/en/sql-reference/data-types/decimal.md)                                                    | `DECIMAL`                  |
+| `DECIMAL256`                            | [Decimal256](/docs/en/sql-reference/data-types/decimal.md)                                                 | `DECIMAL256`               |
+| `LIST`                                  | [Array](/docs/en/sql-reference/data-types/array.md)                                                        | `LIST`                     |
+| `STRUCT`                                | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                        | `STRUCT`                   |
+| `MAP`                                   | [Map](/docs/en/sql-reference/data-types/map.md)                                                            | `MAP`                      |
+| `UINT32`                                | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                  | `UINT32`                   |
+| `FIXED_SIZE_BINARY`, `BINARY`           | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                  | `FIXED_SIZE_BINARY`        |
+| `FIXED_SIZE_BINARY`, `BINARY`           | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/domains/int-uint.md)                     | `FIXED_SIZE_BINARY`        |
 
 Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
 
@@ -2069,23 +2071,26 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
 
 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
 
-| ORC data type (`INSERT`)              | ClickHouse data type                                          | ORC data type (`SELECT`) |
-|---------------------------------------|---------------------------------------------------------------|--------------------------|
-| `Boolean`                             | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)        | `Boolean`                |
-| `Tinyint`                             | [Int8](/docs/en/sql-reference/data-types/int-uint.md)         | `Tinyint`                |
-| `Smallint`                            | [Int16](/docs/en/sql-reference/data-types/int-uint.md)        | `Smallint`               |
-| `Int`                                 | [Int32](/docs/en/sql-reference/data-types/int-uint.md)        | `Int`                    |
-| `Bigint`                              | [Int64](/docs/en/sql-reference/data-types/int-uint.md)        | `Bigint`                 |
-| `Float`                               | [Float32](/docs/en/sql-reference/data-types/float.md)         | `Float`                  |
-| `Double`                              | [Float64](/docs/en/sql-reference/data-types/float.md)         | `Double`                 |
-| `Decimal`                             | [Decimal](/docs/en/sql-reference/data-types/decimal.md)       | `Decimal`                |
-| `Date`                                | [Date32](/docs/en/sql-reference/data-types/date32.md)         | `Date`                   |
-| `Timestamp`                           | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp`              |
-| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md)         | `Binary`                 |
-| `List`                                | [Array](/docs/en/sql-reference/data-types/array.md)           | `List`                   |
-| `Struct`                              | [Tuple](/docs/en/sql-reference/data-types/tuple.md)           | `Struct`                 |
-| `Map`                                 | [Map](/docs/en/sql-reference/data-types/map.md)               | `Map`                    |
-| `-`                                   | [IPv4](/docs/en/sql-reference/data-types/int-uint.md)         | `Int`                    |
+| ORC data type (`INSERT`)              | ClickHouse data type                                                                                              | ORC data type (`SELECT`) |
+|---------------------------------------|-------------------------------------------------------------------------------------------------------------------|--------------------------|
+| `Boolean`                             | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                            | `Boolean`                |
+| `Tinyint`                             | [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md)    | `Tinyint`                |
+| `Smallint`                            | [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `Smallint`               |
+| `Int`                                 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)                                                     | `Int`                    |
+| `Bigint`                              | [Int64/UInt32](/docs/en/sql-reference/data-types/int-uint.md)                                                     | `Bigint`                 |
+| `Float`                               | [Float32](/docs/en/sql-reference/data-types/float.md)                                                             | `Float`                  |
+| `Double`                              | [Float64](/docs/en/sql-reference/data-types/float.md)                                                             | `Double`                 |
+| `Decimal`                             | [Decimal](/docs/en/sql-reference/data-types/decimal.md)                                                           | `Decimal`                |
+| `Date`                                | [Date32](/docs/en/sql-reference/data-types/date32.md)                                                             | `Date`                   |
+| `Timestamp`                           | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                     | `Timestamp`              |
+| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md)                                                             | `Binary`                 |
+| `List`                                | [Array](/docs/en/sql-reference/data-types/array.md)                                                               | `List`                   |
+| `Struct`                              | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                               | `Struct`                 |
+| `Map`                                 | [Map](/docs/en/sql-reference/data-types/map.md)                                                                   | `Map`                    |
+| `Int`                                 | [IPv4](/docs/en/sql-reference/data-types/int-uint.md)                                                             | `Int`                    |
+| `Binary`                              | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                         | `Binary`                 |
+| `Binary`                              | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md)                                    | `Binary`                 |
+| `Binary`                              | [Decimal256](/docs/en/sql-reference/data-types/decimal.md)                                                        | `Binary`                 |
 
 Other types are not supported.
 
diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index 54a6c8493ea..c81598f97d7 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -44,7 +44,6 @@
         M(arrow::Type::UINT8, DB::UInt8) \
         M(arrow::Type::INT8, DB::Int8) \
         M(arrow::Type::INT16, DB::Int16) \
-        M(arrow::Type::INT32, DB::Int32) \
         M(arrow::Type::UINT64, DB::UInt64) \
         M(arrow::Type::INT64, DB::Int64) \
         M(arrow::Type::DURATION, DB::Int64) \
@@ -105,6 +104,7 @@ static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr<arrow::Ch
 template <typename ArrowArray>
 static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name)
 {
+    readColumnWithNumericData<Int128>(arrow_column, column_name);
     auto internal_type = std::make_shared<DataTypeString>();
     auto internal_column = internal_type->createColumn();
     PaddedPODArray<UInt8> & column_chars_t = assert_cast<ColumnString &>(*internal_column).getChars();
@@ -165,6 +165,73 @@ static ColumnWithTypeAndName readColumnWithFixedStringData(std::shared_ptr<arrow
     return {std::move(internal_column), std::move(internal_type), column_name};
 }
 
+template <typename ValueType>
+static ColumnWithTypeAndName readColumnWithBigIntegerFromFixedBinaryData(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name, const DataTypePtr & column_type)
+{
+    const auto * fixed_type = assert_cast<arrow::FixedSizeBinaryType *>(arrow_column->type().get());
+    size_t fixed_len = fixed_type->byte_width();
+    if (fixed_len != sizeof(ValueType))
+        throw Exception(
+            ErrorCodes::BAD_ARGUMENTS,
+            "Cannot insert data into {} column from fixed size binary, expected data with size {}, got {}",
+            column_type->getName(),
+            sizeof(ValueType),
+            fixed_len);
+
+    auto internal_column = column_type->createColumn();
+    auto & data = assert_cast<ColumnVector<ValueType> &>(*internal_column).getData();
+    data.reserve(arrow_column->length());
+
+    for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
+    {
+        arrow::FixedSizeBinaryArray & chunk = dynamic_cast<arrow::FixedSizeBinaryArray &>(*(arrow_column->chunk(chunk_i)));
+        const auto * raw_data = reinterpret_cast<const ValueType *>(chunk.raw_values());
+        data.insert_assume_reserved(raw_data, raw_data + chunk.length());
+    }
+
+    return {std::move(internal_column), column_type, column_name};
+}
+
+template <typename ColumnType, typename ValueType = typename ColumnType::ValueType>
+static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name, const DataTypePtr & column_type)
+{
+    size_t total_size = 0;
+    for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
+    {
+        auto & chunk = dynamic_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
+        const size_t chunk_length = chunk.length();
+
+        for (size_t i = 0; i != chunk_length; ++i)
+        {
+            if (!chunk.IsNull(i) && chunk.value_length(i) != sizeof(ValueType))
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "Cannot insert data into {} column from binary value, expected data with size {}, got {}",
+                    column_type->getName(),
+                    sizeof(ValueType),
+                    chunk.value_length(i));
+            total_size += chunk_length;
+        }
+    }
+
+    auto internal_column = column_type->createColumn();
+    auto & integer_column = assert_cast<ColumnType &>(*internal_column);
+    integer_column.reserve(total_size);
+
+    for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
+    {
+        auto & chunk = dynamic_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
+        for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
+        {
+            if (chunk.IsNull(value_i))
+                integer_column.insertDefault();
+            else
+                integer_column.insertData(chunk.Value(value_i).data(), chunk.Value(value_i).size());
+        }
+    }
+    return {std::move(internal_column), column_type, column_name};
+}
+
 static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name)
 {
     auto internal_type = DataTypeFactory::instance().get("Bool");
@@ -537,7 +604,7 @@ static ColumnWithTypeAndName readIPv6ColumnFromBinaryData(std::shared_ptr<arrow:
         for (size_t i = 0; i != chunk_length; ++i)
         {
             /// If at least one value size is not 16 bytes, fallback to reading String column and further cast to IPv6.
-            if (chunk.value_length(i) != sizeof(IPv6))
+            if (!chunk.IsNull(i) && chunk.value_length(i) != sizeof(IPv6))
                 return readColumnWithStringData<arrow::BinaryArray>(arrow_column, column_name);
         }
         total_size += chunk_length;
@@ -545,14 +612,40 @@ static ColumnWithTypeAndName readIPv6ColumnFromBinaryData(std::shared_ptr<arrow:
 
     auto internal_type = std::make_shared<DataTypeIPv6>();
     auto internal_column = internal_type->createColumn();
-    auto & data = assert_cast<ColumnIPv6 &>(*internal_column).getData();
-    data.reserve(total_size * sizeof(IPv6));
+    auto & ipv6_column = assert_cast<ColumnIPv6 &>(*internal_column);
+    ipv6_column.reserve(total_size);
 
     for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
     {
         auto & chunk = dynamic_cast<arrow::BinaryArray &>(*(arrow_column->chunk(chunk_i)));
-        const auto * raw_data = reinterpret_cast<const IPv6 *>(chunk.raw_data() + chunk.raw_value_offsets()[0]);
-        data.insert_assume_reserved(raw_data, raw_data + chunk.length());
+        for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
+        {
+            if (chunk.IsNull(value_i))
+                ipv6_column.insertDefault();
+            else
+                ipv6_column.insertData(chunk.Value(value_i).data(), chunk.Value(value_i).size());
+        }
+    }
+    return {std::move(internal_column), std::move(internal_type), column_name};
+}
+
+static ColumnWithTypeAndName readIPv4ColumnWithInt32Data(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name)
+{
+    auto internal_type = std::make_shared<DataTypeIPv4>();
+    auto internal_column = internal_type->createColumn();
+    auto & column_data = static_cast<ColumnIPv4 &>(*internal_column).getData();
+    column_data.reserve(arrow_column->length());
+
+    for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)
+    {
+        std::shared_ptr<arrow::Array> chunk = arrow_column->chunk(chunk_i);
+        if (chunk->length() == 0)
+            continue;
+
+        /// buffers[0] is a null bitmap and buffers[1] are actual values
+        std::shared_ptr<arrow::Buffer> buffer = chunk->data()->buffers[1];
+        const auto * raw_data = reinterpret_cast<const IPv4 *>(buffer->data()) + chunk->offset();
+        column_data.insert_assume_reserved(raw_data, raw_data + chunk->length());
     }
     return {std::move(internal_column), std::move(internal_type), column_name};
 }
@@ -566,7 +659,8 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
     bool allow_null_type,
     bool skip_columns_with_unsupported_types,
     bool & skipped,
-    DataTypePtr type_hint = nullptr)
+    DataTypePtr type_hint = nullptr,
+    bool is_map_nested = false)
 {
     if (!is_nullable && (arrow_column->null_count() || (type_hint && type_hint->isNullable())) && arrow_column->type()->id() != arrow::Type::LIST
         && arrow_column->type()->id() != arrow::Type::MAP && arrow_column->type()->id() != arrow::Type::STRUCT &&
@@ -589,12 +683,49 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
         case arrow::Type::STRING:
         case arrow::Type::BINARY:
         {
-            if (type_hint && isIPv6(type_hint))
-                return readIPv6ColumnFromBinaryData(arrow_column, column_name);
+            if (type_hint)
+            {
+                switch (type_hint->getTypeId())
+                {
+                    case TypeIndex::IPv6:
+                        return readIPv6ColumnFromBinaryData(arrow_column, column_name);
+                    /// ORC format outputs big integers as binary column, because there is no fixed binary in ORC.
+                    case TypeIndex::Int128:
+                        return readColumnWithBigNumberFromBinaryData<ColumnInt128>(arrow_column, column_name, type_hint);
+                    case TypeIndex::UInt128:
+                        return readColumnWithBigNumberFromBinaryData<ColumnUInt128>(arrow_column, column_name, type_hint);
+                    case TypeIndex::Int256:
+                        return readColumnWithBigNumberFromBinaryData<ColumnInt256>(arrow_column, column_name, type_hint);
+                    case TypeIndex::UInt256:
+                        return readColumnWithBigNumberFromBinaryData<ColumnUInt256>(arrow_column, column_name, type_hint);
+                    /// ORC doesn't support Decimal256 as separate type. We read and write it as binary data.
+                    case TypeIndex::Decimal256:
+                        return readColumnWithBigNumberFromBinaryData<ColumnDecimal<Decimal256>>(arrow_column, column_name, type_hint);
+                    default:;
+                }
+            }
             return readColumnWithStringData<arrow::BinaryArray>(arrow_column, column_name);
         }
         case arrow::Type::FIXED_SIZE_BINARY:
+        {
+            if (type_hint)
+            {
+                switch (type_hint->getTypeId())
+                {
+                    case TypeIndex::Int128:
+                        return readColumnWithBigIntegerFromFixedBinaryData<Int128>(arrow_column, column_name, type_hint);
+                    case TypeIndex::UInt128:
+                        return readColumnWithBigIntegerFromFixedBinaryData<UInt128>(arrow_column, column_name, type_hint);
+                    case TypeIndex::Int256:
+                        return readColumnWithBigIntegerFromFixedBinaryData<Int256>(arrow_column, column_name, type_hint);
+                    case TypeIndex::UInt256:
+                        return readColumnWithBigIntegerFromFixedBinaryData<UInt256>(arrow_column, column_name, type_hint);
+                    default:;
+                }
+            }
+
             return readColumnWithFixedStringData(arrow_column, column_name);
+        }
         case arrow::Type::LARGE_BINARY:
         case arrow::Type::LARGE_STRING:
             return readColumnWithStringData<arrow::LargeBinaryArray>(arrow_column, column_name);
@@ -621,6 +752,14 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
                 column.type = std::make_shared<DataTypeDateTime>();
             return column;
         }
+        case arrow::Type::INT32:
+        {
+            /// ORC format doesn't have unsigned integers and we output IPv4 as Int32.
+            /// We should allow to read it back from Int32.
+            if (type_hint && isIPv4(type_hint))
+                return readIPv4ColumnWithInt32Data(arrow_column, column_name);
+            return readColumnWithNumericData<Int32>(arrow_column, column_name);
+        }
         case arrow::Type::TIMESTAMP:
             return readColumnWithTimestampData(arrow_column, column_name);
         case arrow::Type::DECIMAL128:
@@ -637,10 +776,18 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
                     nested_type_hint = assert_cast<const DataTypeArray *>(map_type_hint->getNestedType().get())->getNestedType();
             }
             auto arrow_nested_column = getNestedArrowColumn(arrow_column);
-            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint);
+            auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint, true);
             if (skipped)
                 return {};
 
+            if (nested_type_hint && !nested_type_hint->equals(*nested_column.type))
+            {
+                /// Cast to target type, because it can happen that type from nested_column
+                /// cannot be Map key type.
+                nested_column.column = castColumn(nested_column, nested_type_hint);
+                nested_column.type = nested_type_hint;
+            }
+
             auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
 
             const auto * tuple_column = assert_cast<const ColumnTuple *>(nested_column.column.get());
@@ -690,7 +837,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
                 DataTypePtr nested_type_hint;
                 if (tuple_type_hint)
                 {
-                    if (tuple_type_hint->haveExplicitNames())
+                    if (tuple_type_hint->haveExplicitNames() && !is_map_nested)
                     {
                         auto pos = tuple_type_hint->tryGetPositionByName(field_name);
                         if (pos)
diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
index 8698b343eb3..b61c586a636 100644
--- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
+++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
@@ -26,7 +26,6 @@
 #include <arrow/util/decimal.h>
 
 #define FOR_INTERNAL_NUMERIC_TYPES(M) \
-        M(UInt8, arrow::UInt8Builder) \
         M(Int8, arrow::Int8Builder) \
         M(UInt16, arrow::UInt16Builder) \
         M(Int16, arrow::Int16Builder) \
@@ -65,8 +64,10 @@ namespace DB
     {
         {"UInt8", arrow::uint8()},
         {"Int8", arrow::int8()},
+        {"Enum8", arrow::int8()},
         {"UInt16", arrow::uint16()},
         {"Int16", arrow::int16()},
+        {"Enum16", arrow::int16()},
         {"UInt32", arrow::uint32()},
         {"Int32", arrow::int32()},
         {"UInt64", arrow::uint64()},
@@ -80,6 +81,11 @@ namespace DB
 
         {"String", arrow::binary()},
         {"FixedString", arrow::binary()},
+
+        {"Int128", arrow::fixed_size_binary(sizeof(Int128))},
+        {"UInt128", arrow::fixed_size_binary(sizeof(UInt128))},
+        {"Int256", arrow::fixed_size_binary(sizeof(Int256))},
+        {"UInt256", arrow::fixed_size_binary(sizeof(UInt256))},
     };
 
 
@@ -148,7 +154,7 @@ namespace DB
     }
 
     static void fillArrowArrayWithDateTime64ColumnData(
-        const DataTypeDateTime64 * type,
+        const DataTypePtr & type,
         ColumnPtr write_column,
         const PaddedPODArray<UInt8> * null_bytemap,
         const String & format_name,
@@ -156,11 +162,12 @@ namespace DB
         size_t start,
         size_t end)
     {
+        const auto * datetime64_type = assert_cast<const DataTypeDateTime64 *>(type.get());
         const auto & column = assert_cast<const ColumnDecimal<DateTime64> &>(*write_column);
         arrow::TimestampBuilder & builder = assert_cast<arrow::TimestampBuilder &>(*array_builder);
         arrow::Status status;
 
-        auto scale = type->getScale();
+        auto scale = datetime64_type->getScale();
         bool need_rescale = scale % 3;
         auto rescale_multiplier = DecimalUtils::scaleMultiplier<DateTime64::NativeType>(3 - scale % 3);
         for (size_t value_i = start; value_i < end; ++value_i)
@@ -186,7 +193,7 @@ namespace DB
     static void fillArrowArray(
         const String & column_name,
         ColumnPtr & column,
-        const std::shared_ptr<const IDataType> & column_type,
+        const DataTypePtr & column_type,
         const PaddedPODArray<UInt8> * null_bytemap,
         arrow::ArrayBuilder * array_builder,
         String format_name,
@@ -200,7 +207,7 @@ namespace DB
     static void fillArrowArrayWithArrayColumnData(
         const String & column_name,
         ColumnPtr & column,
-        const std::shared_ptr<const IDataType> & column_type,
+        const DataTypePtr & column_type,
         const PaddedPODArray<UInt8> * null_bytemap,
         arrow::ArrayBuilder * array_builder,
         String format_name,
@@ -231,7 +238,7 @@ namespace DB
     static void fillArrowArrayWithTupleColumnData(
         const String & column_name,
         ColumnPtr & column,
-        const std::shared_ptr<const IDataType> & column_type,
+        const DataTypePtr & column_type,
         const PaddedPODArray<UInt8> * null_bytemap,
         arrow::ArrayBuilder * array_builder,
         String format_name,
@@ -303,7 +310,7 @@ namespace DB
     static void fillArrowArrayWithLowCardinalityColumnDataImpl(
         const String & column_name,
         ColumnPtr & column,
-        const std::shared_ptr<const IDataType> & column_type,
+        const DataTypePtr & column_type,
         const PaddedPODArray<UInt8> *,
         arrow::ArrayBuilder * array_builder,
         String format_name,
@@ -359,7 +366,7 @@ namespace DB
     static void fillArrowArrayWithLowCardinalityColumnData(
         const String & column_name,
         ColumnPtr & column,
-        const std::shared_ptr<const IDataType> & column_type,
+        const DataTypePtr & column_type,
         const PaddedPODArray<UInt8> * null_bytemap,
         arrow::ArrayBuilder * array_builder,
         String format_name,
@@ -541,134 +548,6 @@ namespace DB
         }
     }
 
-    static void fillArrowArray(
-        const String & column_name,
-        ColumnPtr & column,
-        const std::shared_ptr<const IDataType> & column_type,
-        const PaddedPODArray<UInt8> * null_bytemap,
-        arrow::ArrayBuilder * array_builder,
-        String format_name,
-        size_t start,
-        size_t end,
-        bool output_string_as_string,
-        bool output_fixed_string_as_fixed_byte_array,
-        std::unordered_map<String, std::shared_ptr<arrow::Array>> & dictionary_values)
-    {
-        const String column_type_name = column_type->getFamilyName();
-
-        if (column_type->isNullable())
-        {
-            const ColumnNullable * column_nullable = assert_cast<const ColumnNullable *>(column.get());
-            ColumnPtr nested_column = column_nullable->getNestedColumnPtr();
-            DataTypePtr nested_type = assert_cast<const DataTypeNullable *>(column_type.get())->getNestedType();
-            const ColumnPtr & null_column = column_nullable->getNullMapColumnPtr();
-            const PaddedPODArray<UInt8> & bytemap = assert_cast<const ColumnVector<UInt8> &>(*null_column).getData();
-            fillArrowArray(column_name, nested_column, nested_type, &bytemap, array_builder, format_name, start, end, output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
-        }
-        else if (isString(column_type))
-        {
-            if (output_string_as_string)
-                fillArrowArrayWithStringColumnData<ColumnString, arrow::StringBuilder>(column, null_bytemap, format_name, array_builder, start, end);
-            else
-                fillArrowArrayWithStringColumnData<ColumnString, arrow::BinaryBuilder>(column, null_bytemap, format_name, array_builder, start, end);
-        }
-        else if (isFixedString(column_type))
-        {
-            if (output_fixed_string_as_fixed_byte_array)
-                fillArrowArrayWithFixedStringColumnData(column, null_bytemap, format_name, array_builder, start, end);
-            else if (output_string_as_string)
-                fillArrowArrayWithStringColumnData<ColumnFixedString, arrow::StringBuilder>(column, null_bytemap, format_name, array_builder, start, end);
-            else
-                fillArrowArrayWithStringColumnData<ColumnFixedString, arrow::BinaryBuilder>(column, null_bytemap, format_name, array_builder, start, end);
-        }
-        else if (isIPv6(column_type))
-        {
-            fillArrowArrayWithIPv6ColumnData(column, null_bytemap, format_name, array_builder, start, end);
-        }
-        else if (isIPv4(column_type))
-        {
-            fillArrowArrayWithIPv4ColumnData(column, null_bytemap, format_name, array_builder, start, end);
-        }
-        else if (isDate(column_type))
-        {
-            fillArrowArrayWithDateColumnData(column, null_bytemap, format_name, array_builder, start, end);
-        }
-        else if (isDateTime(column_type))
-        {
-            fillArrowArrayWithDateTimeColumnData(column, null_bytemap, format_name, array_builder, start, end);
-        }
-        else if (isDate32(column_type))
-        {
-            fillArrowArrayWithDate32ColumnData(column, null_bytemap, format_name, array_builder, start, end);
-        }
-        else if (isArray(column_type))
-        {
-            fillArrowArrayWithArrayColumnData<arrow::ListBuilder>(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
-        }
-        else if (isTuple(column_type))
-        {
-            fillArrowArrayWithTupleColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
-        }
-        else if (column_type->getTypeId() == TypeIndex::LowCardinality)
-        {
-            fillArrowArrayWithLowCardinalityColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
-        }
-        else if (isMap(column_type))
-        {
-            ColumnPtr column_array = assert_cast<const ColumnMap *>(column.get())->getNestedColumnPtr();
-            DataTypePtr array_type = assert_cast<const DataTypeMap *>(column_type.get())->getNestedType();
-            fillArrowArrayWithArrayColumnData<arrow::MapBuilder>(column_name, column_array, array_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
-        }
-        else if (isDecimal(column_type))
-        {
-            auto fill_decimal = [&](const auto & types) -> bool
-            {
-                using Types = std::decay_t<decltype(types)>;
-                using ToDataType = typename Types::LeftType;
-                if constexpr (
-                    std::is_same_v<ToDataType,DataTypeDecimal<Decimal32>>
-                    || std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>>
-                    || std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>)
-                {
-                    fillArrowArrayWithDecimalColumnData<ToDataType, Int128, arrow::Decimal128, arrow::Decimal128Builder>(column, null_bytemap, array_builder, format_name, start, end);
-                    return true;
-                }
-                if constexpr (std::is_same_v<ToDataType,DataTypeDecimal<Decimal256>>)
-                {
-                    fillArrowArrayWithDecimalColumnData<ToDataType, Int256, arrow::Decimal256, arrow::Decimal256Builder>(column, null_bytemap, array_builder, format_name, start, end);
-                    return true;
-                }
-
-                return false;
-            };
-
-            if (!callOnIndexAndDataType<void>(column_type->getTypeId(), fill_decimal))
-                throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot fill arrow array with decimal data with type {}", column_type_name};
-        }
-        else if (isDateTime64(column_type))
-        {
-            const auto * datetime64_type = assert_cast<const DataTypeDateTime64 *>(column_type.get());
-            fillArrowArrayWithDateTime64ColumnData(datetime64_type, column, null_bytemap, format_name, array_builder, start, end);
-        }
-        else if (isBool(column_type))
-        {
-            fillArrowArrayWithBoolColumnData(column, null_bytemap, format_name, array_builder, start, end);
-        }
-    #define DISPATCH(CPP_NUMERIC_TYPE, ARROW_BUILDER_TYPE) \
-        else if (#CPP_NUMERIC_TYPE == column_type_name) \
-        { \
-            fillArrowArrayWithNumericColumnData<CPP_NUMERIC_TYPE, ARROW_BUILDER_TYPE>(column, null_bytemap, format_name, array_builder, start, end); \
-        }
-
-        FOR_INTERNAL_NUMERIC_TYPES(DISPATCH)
-    #undef DISPATCH
-        else
-        {
-            throw Exception(ErrorCodes::UNKNOWN_TYPE,
-                    "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type_name, column_name, format_name);
-        }
-    }
-
     template <typename DataType, typename FieldType, typename ArrowDecimalType, typename ArrowBuilder>
     static void fillArrowArrayWithDecimalColumnData(
         ColumnPtr write_column,
@@ -697,6 +576,158 @@ namespace DB
         checkStatus(status, write_column->getName(), format_name);
     }
 
+    template <typename ColumnType>
+    static void fillArrowArrayWithBigIntegerColumnData(
+        ColumnPtr write_column,
+        const PaddedPODArray<UInt8> * null_bytemap,
+        const String & format_name,
+        arrow::ArrayBuilder* array_builder,
+        size_t start,
+        size_t end)
+    {
+        const auto & internal_column = assert_cast<const ColumnType &>(*write_column);
+        const auto & internal_data = internal_column.getData();
+        size_t fixed_length = sizeof(typename ColumnType::ValueType);
+        arrow::FixedSizeBinaryBuilder & builder = assert_cast<arrow::FixedSizeBinaryBuilder &>(*array_builder);
+        arrow::Status status;
+
+        PaddedPODArray<UInt8> arrow_null_bytemap = revertNullByteMap(null_bytemap, start, end);
+        const UInt8 * arrow_null_bytemap_raw_ptr = arrow_null_bytemap.empty() ? nullptr : arrow_null_bytemap.data();
+
+        const uint8_t * data_start = reinterpret_cast<const uint8_t *>(internal_data.data()) + start * fixed_length;
+        status = builder.AppendValues(data_start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
+        checkStatus(status, write_column->getName(), format_name);
+    }
+
+    static void fillArrowArray(
+        const String & column_name,
+        ColumnPtr & column,
+        const DataTypePtr & column_type,
+        const PaddedPODArray<UInt8> * null_bytemap,
+        arrow::ArrayBuilder * array_builder,
+        String format_name,
+        size_t start,
+        size_t end,
+        bool output_string_as_string,
+        bool output_fixed_string_as_fixed_byte_array,
+        std::unordered_map<String, std::shared_ptr<arrow::Array>> & dictionary_values)
+    {
+        const String column_type_name = column_type->getFamilyName();
+        WhichDataType which(column_type);
+
+        switch (column_type->getTypeId())
+        {
+            case TypeIndex::Nullable:
+            {
+                const ColumnNullable * column_nullable = assert_cast<const ColumnNullable *>(column.get());
+                ColumnPtr nested_column = column_nullable->getNestedColumnPtr();
+                DataTypePtr nested_type = assert_cast<const DataTypeNullable *>(column_type.get())->getNestedType();
+                const ColumnPtr & null_column = column_nullable->getNullMapColumnPtr();
+                const PaddedPODArray<UInt8> & bytemap = assert_cast<const ColumnVector<UInt8> &>(*null_column).getData();
+                fillArrowArray(column_name, nested_column, nested_type, &bytemap, array_builder, format_name, start, end, output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
+                break;
+            }
+            case TypeIndex::String:
+            {
+                if (output_string_as_string)
+                    fillArrowArrayWithStringColumnData<ColumnString, arrow::StringBuilder>(column, null_bytemap, format_name, array_builder, start, end);
+                else
+                    fillArrowArrayWithStringColumnData<ColumnString, arrow::BinaryBuilder>(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            }
+            case TypeIndex::FixedString:
+            {
+                if (output_fixed_string_as_fixed_byte_array)
+                    fillArrowArrayWithFixedStringColumnData(column, null_bytemap, format_name, array_builder, start, end);
+                else if (output_string_as_string)
+                    fillArrowArrayWithStringColumnData<ColumnFixedString, arrow::StringBuilder>(column, null_bytemap, format_name, array_builder, start, end);
+                else
+                    fillArrowArrayWithStringColumnData<ColumnFixedString, arrow::BinaryBuilder>(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            }
+            case TypeIndex::IPv6:
+                fillArrowArrayWithIPv6ColumnData(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::IPv4:
+                fillArrowArrayWithIPv4ColumnData(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::Date:
+                fillArrowArrayWithDateColumnData(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::DateTime:
+                fillArrowArrayWithDateTimeColumnData(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::Date32:
+                fillArrowArrayWithDate32ColumnData(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::Array:
+                fillArrowArrayWithArrayColumnData<arrow::ListBuilder>(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
+                break;
+            case TypeIndex::Tuple:
+                fillArrowArrayWithTupleColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
+                break;
+            case TypeIndex::LowCardinality:
+                fillArrowArrayWithLowCardinalityColumnData(column_name, column, column_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
+                break;
+            case TypeIndex::Map:
+            {
+                ColumnPtr column_array = assert_cast<const ColumnMap *>(column.get())->getNestedColumnPtr();
+                DataTypePtr array_type = assert_cast<const DataTypeMap *>(column_type.get())->getNestedType();
+                fillArrowArrayWithArrayColumnData<arrow::MapBuilder>(column_name, column_array, array_type, null_bytemap, array_builder, format_name, start, end, output_string_as_string, output_fixed_string_as_fixed_byte_array, dictionary_values);
+                break;
+            }
+            case TypeIndex::Decimal32:
+                fillArrowArrayWithDecimalColumnData<DataTypeDecimal32, Int128, arrow::Decimal128, arrow::Decimal128Builder>(column, null_bytemap, array_builder, format_name, start, end);
+                break;
+            case TypeIndex::Decimal64:
+                fillArrowArrayWithDecimalColumnData<DataTypeDecimal64, Int128, arrow::Decimal128, arrow::Decimal128Builder>(column, null_bytemap, array_builder, format_name, start, end);
+                break;
+            case TypeIndex::Decimal128:
+                fillArrowArrayWithDecimalColumnData<DataTypeDecimal128, Int128, arrow::Decimal128, arrow::Decimal128Builder>(column, null_bytemap, array_builder, format_name, start, end);
+                break;
+            case TypeIndex::Decimal256:
+                fillArrowArrayWithDecimalColumnData<DataTypeDecimal256, Int256, arrow::Decimal256, arrow::Decimal256Builder>(column, null_bytemap, array_builder, format_name, start, end);
+                break;
+            case TypeIndex::DateTime64:
+                fillArrowArrayWithDateTime64ColumnData(column_type, column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::UInt8:
+            {
+                if (isBool(column_type))
+                    fillArrowArrayWithBoolColumnData(column, null_bytemap, format_name, array_builder, start, end);
+                else
+                    fillArrowArrayWithNumericColumnData<UInt8, arrow::UInt8Builder>(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            }
+            case TypeIndex::Enum8:
+                fillArrowArrayWithNumericColumnData<Int8, arrow::Int8Builder>(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::Enum16:
+                fillArrowArrayWithNumericColumnData<Int16, arrow::Int16Builder>(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::Int128:
+                fillArrowArrayWithBigIntegerColumnData<ColumnInt128>(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::UInt128:
+                fillArrowArrayWithBigIntegerColumnData<ColumnUInt128>(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::Int256:
+                fillArrowArrayWithBigIntegerColumnData<ColumnInt256>(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+            case TypeIndex::UInt256:
+                fillArrowArrayWithBigIntegerColumnData<ColumnUInt256>(column, null_bytemap, format_name, array_builder, start, end);
+                break;
+#define DISPATCH(CPP_NUMERIC_TYPE, ARROW_BUILDER_TYPE) \
+            case TypeIndex::CPP_NUMERIC_TYPE: \
+                fillArrowArrayWithNumericColumnData<CPP_NUMERIC_TYPE, ARROW_BUILDER_TYPE>(column, null_bytemap, format_name, array_builder, start, end); \
+                break;
+                FOR_INTERNAL_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+            default:
+                throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type_name, column_name, format_name);
+        }
+    }
+
     static std::shared_ptr<arrow::DataType> getArrowTypeForLowCardinalityIndexes(ColumnPtr indexes_column)
     {
         /// Arrow docs recommend preferring signed integers over unsigned integers for representing dictionary indices.
diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
index 67345a0dfb0..01439fa1e20 100644
--- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp
@@ -93,10 +93,12 @@ std::unique_ptr<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr &
                 return orc::createPrimitiveType(orc::TypeKind::BOOLEAN);
             return orc::createPrimitiveType(orc::TypeKind::BYTE);
         }
+        case TypeIndex::Enum8: [[fallthrough]];
         case TypeIndex::Int8:
         {
             return orc::createPrimitiveType(orc::TypeKind::BYTE);
         }
+        case TypeIndex::Enum16: [[fallthrough]];
         case TypeIndex::UInt16: [[fallthrough]];
         case TypeIndex::Int16:
         {
@@ -131,6 +133,12 @@ std::unique_ptr<orc::Type> ORCBlockOutputFormat::getORCType(const DataTypePtr &
         {
             return orc::createPrimitiveType(orc::TypeKind::TIMESTAMP);
         }
+        case TypeIndex::Int128: [[fallthrough]];
+        case TypeIndex::UInt128: [[fallthrough]];
+        case TypeIndex::Int256: [[fallthrough]];
+        case TypeIndex::UInt256: [[fallthrough]];
+        case TypeIndex::Decimal256:
+            return orc::createPrimitiveType(orc::TypeKind::BINARY);
         case TypeIndex::FixedString: [[fallthrough]];
         case TypeIndex::String:
         {
@@ -309,6 +317,7 @@ void ORCBlockOutputFormat::writeColumn(
 
     switch (type->getTypeId())
     {
+        case TypeIndex::Enum8: [[fallthrough]];
         case TypeIndex::Int8:
         {
             /// Note: Explicit cast to avoid clang-tidy error: 'signed char' to 'long' conversion; consider casting to 'unsigned char' first.
@@ -320,6 +329,7 @@ void ORCBlockOutputFormat::writeColumn(
             writeNumbers<UInt8, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt8 & value){ return value; });
             break;
         }
+        case TypeIndex::Enum16: [[fallthrough]];
         case TypeIndex::Int16:
         {
             writeNumbers<Int16, orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const Int16 & value){ return value; });
@@ -357,6 +367,26 @@ void ORCBlockOutputFormat::writeColumn(
             writeNumbers<UInt64,orc::LongVectorBatch>(orc_column, column, null_bytemap, [](const UInt64 & value){ return value; });
             break;
         }
+        case TypeIndex::Int128:
+        {
+            writeStrings<ColumnInt128>(orc_column, column, null_bytemap);
+            break;
+        }
+        case TypeIndex::UInt128:
+        {
+            writeStrings<ColumnUInt128>(orc_column, column, null_bytemap);
+            break;
+        }
+        case TypeIndex::Int256:
+        {
+            writeStrings<ColumnInt256>(orc_column, column, null_bytemap);
+            break;
+        }
+        case TypeIndex::UInt256:
+        {
+            writeStrings<ColumnUInt256>(orc_column, column, null_bytemap);
+            break;
+        }
         case TypeIndex::Float32:
         {
             writeNumbers<Float32, orc::DoubleVectorBatch>(orc_column, column, null_bytemap, [](const Float32 & value){ return value; });
@@ -432,6 +462,11 @@ void ORCBlockOutputFormat::writeColumn(
                     [](Int128 value){ return orc::Int128(value >> 64, (value << 64) >> 64); });
             break;
         }
+        case TypeIndex::Decimal256:
+        {
+            writeStrings<ColumnDecimal<Decimal256>>(orc_column, column, null_bytemap);
+            break;
+        }
         case TypeIndex::Nullable:
         {
             const auto & nullable_column = assert_cast<const ColumnNullable &>(column);
diff --git a/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.reference b/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.reference
new file mode 100644
index 00000000000..dee75e5a50c
--- /dev/null
+++ b/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.reference
@@ -0,0 +1,5 @@
+42	42	42	42	a	b
+42	42	42	42	a	b
+42	42	42	42	a	b	42.42	0.0.0.0
+\N
+\N
diff --git a/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.sh b/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.sh
new file mode 100755
index 00000000000..f2127d69f37
--- /dev/null
+++ b/tests/queries/0_stateless/02595_orc_arrow_parquet_more_types.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_LOCAL -q "select 42::Int128 as c1, 42::UInt128 as c2, 42::Int256 as c3, 42::UInt256 as c4, 'a'::Enum8('a' = 1) as c5, 'b'::Enum16('b' = 1) as c6 format Parquet" | $CLICKHOUSE_LOCAL --input-format Parquet --structure="c1 Int128, c2 UInt128, c3 Int256, c4 UInt256, c5 Enum8('a' = 1), c6 Enum16('b' = 1)"  -q "select * from table"
+
+$CLICKHOUSE_LOCAL -q "select 42::Int128 as c1, 42::UInt128 as c2, 42::Int256 as c3, 42::UInt256 as c4, 'a'::Enum8('a' = 1) as c5, 'b'::Enum16('b' = 1) as c6 format Arrow" | $CLICKHOUSE_LOCAL --input-format Arrow --structure="c1 Int128, c2 UInt128, c3 Int256, c4 UInt256, c5 Enum8('a' = 1), c6 Enum16('b' = 1)"  -q "select * from table"
+
+$CLICKHOUSE_LOCAL -q "select 42::Int128 as c1, 42::UInt128 as c2, 42::Int256 as c3, 42::UInt256 as c4, 'a'::Enum8('a' = 1) as c5, 'b'::Enum16('b' = 1) as c6, 42.42::Decimal256(2) as c7, '0.0.0.0'::IPv4 as c8 format ORC" | $CLICKHOUSE_LOCAL --input-format ORC --structure="c1 Int128, c2 UInt128, c3 Int256, c4 UInt256, c5 Enum8('a' = 1), c6 Enum16('b' = 1), c7 Decimal256(2), c8 IPv4"  -q "select * from table"
+
+$CLICKHOUSE_LOCAL -q "select NULL::Nullable(IPv6) as x format ORC" | $CLICKHOUSE_LOCAL --input-format ORC --structure="x Nullable(IPv6)"  -q "select * from table"
+
+$CLICKHOUSE_LOCAL -q "select NULL::Nullable(UInt256) as x format ORC" | $CLICKHOUSE_LOCAL --input-format ORC --structure="x Nullable(UInt256)"  -q "select * from table"
+

From 8740b201f1b6a6322c7c1a65251aa2f8a4425e9d Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 28 Mar 2023 20:07:59 +0000
Subject: [PATCH 026/277] Fix

---
 src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 1 -
 src/Storages/StorageGenerateRandom.cpp                | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index c81598f97d7..5d21b38ae85 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -104,7 +104,6 @@ static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr<arrow::Ch
 template <typename ArrowArray>
 static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptr<arrow::ChunkedArray> & arrow_column, const String & column_name)
 {
-    readColumnWithNumericData<Int128>(arrow_column, column_name);
     auto internal_type = std::make_shared<DataTypeString>();
     auto internal_column = internal_type->createColumn();
     PaddedPODArray<UInt8> & column_chars_t = assert_cast<ColumnString &>(*internal_column).getChars();
diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp
index 601306bd1bf..f616313a595 100644
--- a/src/Storages/StorageGenerateRandom.cpp
+++ b/src/Storages/StorageGenerateRandom.cpp
@@ -422,7 +422,7 @@ class GenerateSource : public ISource
 {
 public:
     GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_, ContextPtr context_)
-        : ISource(Nested::flatten(prepareBlockToFill(block_header_)))
+        : ISource(Nested::flattenArrayOfTuples(prepareBlockToFill(block_header_)))
         , block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_)
         , block_to_fill(std::move(block_header_)), rng(random_seed_), context(context_) {}
 
@@ -437,7 +437,7 @@ protected:
         for (const auto & elem : block_to_fill)
             columns.emplace_back(fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context));
 
-        columns = Nested::flatten(block_to_fill.cloneWithColumns(columns)).getColumns();
+        columns = Nested::flattenArrayOfTuples(block_to_fill.cloneWithColumns(columns)).getColumns();
         return {std::move(columns), block_size};
     }
 

From 211fa2117c8ab71bef37ee1d72a4ca4158cf0a31 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 29 Mar 2023 03:50:21 +0000
Subject: [PATCH 027/277] clang wants it

---
 src/Interpreters/Context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index acdfb0a979f..cadaddd5287 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1395,7 +1395,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             throw;
         }
 
-        unsigned long use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
+        uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
         if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
         {
             const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();

From 1031e2001bbe572b683b7eab65ee2d42dc09e7e2 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 29 Mar 2023 07:41:31 +0200
Subject: [PATCH 028/277] fix build

---
 src/Storages/MergeTree/MergeList.h                  | 6 ------
 src/Storages/MergeTree/MergePlainMergeTreeTask.cpp  | 2 +-
 src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp | 2 +-
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index bf9d6243315..c3ba4f00f7f 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -108,17 +108,11 @@ struct MergeListElement : boost::noncopyable
     /// Updated only for Vertical algorithm
     std::atomic<UInt64> columns_written{};
 
-    /// Used for identifying mutations/merges in trace_log
-    std::string query_id;
-
     UInt64 thread_id;
     MergeType merge_type;
     /// Detected after merge already started
     std::atomic<MergeAlgorithm> merge_algorithm;
 
-    /// Description used for logging
-    /// Needs to outlive memory_tracker since it's used in its destructor
-    const String description{"Mutate/Merge"};
     ThreadGroupStatusPtr thread_group;
 
     MergeListElement(
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
index a31496d3021..8b1ae5484d2 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@@ -139,7 +139,7 @@ ContextMutablePtr MergePlainMergeTreeTask::createFakeQueryContext() const
     auto context = Context::createCopy(storage.getContext());
     context->makeQueryContext();
     auto queryId = storage.getStorageID().getShortName() + "::" + future_part->name;
-    context->setCurrentQueryId(std::move(queryId));
+    context->setCurrentQueryId(queryId);
     return context;
 }
 
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index ebe2aeecdc8..8ac815e85ed 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -131,7 +131,7 @@ ContextMutablePtr MutatePlainMergeTreeTask::createFakeQueryContext() const
     auto context = Context::createCopy(storage.getContext());
     context->makeQueryContext();
     auto queryId = storage.getStorageID().getShortName() + "::" + future_part->name;
-    context->setCurrentQueryId(std::move(queryId));
+    context->setCurrentQueryId(queryId);
     return context;
 }
 

From 3410082cb22b6803a563b65e24e1f66523282a2e Mon Sep 17 00:00:00 2001
From: lzydmxy <13126752315@163.com>
Date: Wed, 29 Mar 2023 19:58:25 +0800
Subject: [PATCH 029/277] add columns perform_ttl_move_on_insert,
 load_balancing for table system.storage_policies

---
 .../system-tables/storage_policies.md         |  8 +++++
 .../System/StorageSystemStoragePolicies.cpp   | 29 ++++++++++++++++---
 .../02117_show_create_table_system.reference  |  6 ++--
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md
index 966b677c7e3..69e0f7f0a55 100644
--- a/docs/en/operations/system-tables/storage_policies.md
+++ b/docs/en/operations/system-tables/storage_policies.md
@@ -11,8 +11,16 @@ Columns:
 -   `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy.
 -   `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration, the data fills the volumes according this priority, i.e. data during inserts and merges is written to volumes with a lower priority (taking into account other rules: TTL, `max_data_part_size`, `move_factor`).
 -   `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy.
+-   `volume_type` ([Enum8](../../sql-reference/data-types/enum.md))  — Type of volume. Can have one of the following values:
+    -   `JBOD` 
+    -   `SINGLE_DISK`
+    -   `UNKNOWN`
 -   `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit).
 -   `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order.
 -   `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks.
+-   `perform_ttl_move_on_insert` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `perform_ttl_move_on_insert` setting. — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
+-   `load_balancing` ([Enum8](../../sql-reference/data-types/enum.md))  — Policy for disk balancing. Can have one of the following values:
+    -   `ROUND_ROBIN`
+    -   `LEAST_USED`
 
 If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table.
diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp
index b42bd7859dd..39e99884e1d 100644
--- a/src/Storages/System/StorageSystemStoragePolicies.cpp
+++ b/src/Storages/System/StorageSystemStoragePolicies.cpp
@@ -3,6 +3,7 @@
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnNullable.h>
 #include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <Interpreters/Context.h>
@@ -17,6 +18,18 @@ namespace ErrorCodes
 {
 }
 
+namespace
+{
+    template <typename Type>
+    DataTypeEnum8::Values getTypeEnumValues()
+    {
+        DataTypeEnum8::Values enum_values;
+        for (auto value : magic_enum::enum_values<Type>())
+            enum_values.emplace_back(magic_enum::enum_name(value), magic_enum::enum_integer(value));
+        return enum_values;
+    }
+}
+
 
 StorageSystemStoragePolicies::StorageSystemStoragePolicies(const StorageID & table_id_)
         : IStorage(table_id_)
@@ -28,10 +41,12 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const StorageID & tab
              {"volume_name", std::make_shared<DataTypeString>()},
              {"volume_priority", std::make_shared<DataTypeUInt64>()},
              {"disks", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
-             {"volume_type", std::make_shared<DataTypeString>()},
+             {"volume_type", std::make_shared<DataTypeEnum8>(getTypeEnumValues<VolumeType>())},
              {"max_data_part_size", std::make_shared<DataTypeUInt64>()},
              {"move_factor", std::make_shared<DataTypeFloat32>()},
-             {"prefer_not_to_merge", std::make_shared<DataTypeUInt8>()}
+             {"prefer_not_to_merge", std::make_shared<DataTypeUInt8>()},
+             {"perform_ttl_move_on_insert", std::make_shared<DataTypeUInt8>()},
+             {"load_balancing", std::make_shared<DataTypeEnum8>(getTypeEnumValues<VolumeLoadBalancing>())}
     }));
     // TODO: Add string column with custom volume-type-specific options
     setInMemoryMetadata(storage_metadata);
@@ -52,10 +67,12 @@ Pipe StorageSystemStoragePolicies::read(
     MutableColumnPtr col_volume_name = ColumnString::create();
     MutableColumnPtr col_priority = ColumnUInt64::create();
     MutableColumnPtr col_disks = ColumnArray::create(ColumnString::create());
-    MutableColumnPtr col_volume_type = ColumnString::create();
+    MutableColumnPtr col_volume_type = ColumnInt8::create();
     MutableColumnPtr col_max_part_size = ColumnUInt64::create();
     MutableColumnPtr col_move_factor = ColumnFloat32::create();
     MutableColumnPtr col_prefer_not_to_merge = ColumnUInt8::create();
+    MutableColumnPtr col_perform_ttl_move_on_insert = ColumnUInt8::create();
+    MutableColumnPtr col_load_balancing = ColumnInt8::create();
 
     for (const auto & [policy_name, policy_ptr] : context->getPoliciesMap())
     {
@@ -70,10 +87,12 @@ Pipe StorageSystemStoragePolicies::read(
             for (const auto & disk_ptr : volumes[i]->getDisks())
                 disks.push_back(disk_ptr->getName());
             col_disks->insert(disks);
-            col_volume_type->insert(magic_enum::enum_name(volumes[i]->getType()));
+            col_volume_type->insert(static_cast<Int8>(volumes[i]->getType()));
             col_max_part_size->insert(volumes[i]->max_data_part_size);
             col_move_factor->insert(policy_ptr->getMoveFactor());
             col_prefer_not_to_merge->insert(volumes[i]->areMergesAvoided() ? 1 : 0);
+            col_perform_ttl_move_on_insert->insert(volumes[i]->perform_ttl_move_on_insert);
+            col_load_balancing->insert(static_cast<Int8>(volumes[i]->load_balancing));
         }
     }
 
@@ -86,6 +105,8 @@ Pipe StorageSystemStoragePolicies::read(
     res_columns.emplace_back(std::move(col_max_part_size));
     res_columns.emplace_back(std::move(col_move_factor));
     res_columns.emplace_back(std::move(col_prefer_not_to_merge));
+    res_columns.emplace_back(std::move(col_perform_ttl_move_on_insert));
+    res_columns.emplace_back(std::move(col_load_balancing));
 
     UInt64 num_rows = res_columns.at(0)->size();
     Chunk chunk(std::move(res_columns), num_rows);
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index 6fb59657305..f67d22dffdd 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -1024,10 +1024,12 @@ CREATE TABLE system.storage_policies
     `volume_name` String,
     `volume_priority` UInt64,
     `disks` Array(String),
-    `volume_type` String,
+    `volume_type` Enum8('JBOD' = 0, 'SINGLE_DISK' = 1, 'UNKNOWN' = 2),
     `max_data_part_size` UInt64,
     `move_factor` Float32,
-    `prefer_not_to_merge` UInt8
+    `prefer_not_to_merge` UInt8,
+    `perform_ttl_move_on_insert` UInt8,
+    `load_balancing` Enum8('ROUND_ROBIN' = 0, 'LEAST_USED' = 1)
 )
 ENGINE = SystemStoragePolicies
 COMMENT 'SYSTEM TABLE is built on the fly.'

From 739c20abdded150b3360fa5d91284d89751ce1f6 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 29 Mar 2023 14:05:31 +0000
Subject: [PATCH 030/277] clang issue, test

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp                     | 2 +-
 .../0_stateless/02458_use_structure_from_insertion_table.sql  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index a622d274b09..30ef28abe9c 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6109,7 +6109,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                 table_function_name);
     }
 
-    unsigned long use_structure_from_insertion_table_in_table_functions = scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions;
+    uint64_t use_structure_from_insertion_table_in_table_functions = scope_context->getSettingsRef().use_structure_from_insertion_table_in_table_functions;
     if (!nested_table_function &&
         use_structure_from_insertion_table_in_table_functions &&
         scope_context->hasInsertionTable() &&
diff --git a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
index ac53e003521..a199a69cde8 100644
--- a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
+++ b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
@@ -9,13 +9,13 @@ set use_structure_from_insertion_table_in_table_functions=2;
 insert into test select * from file(02458_data.jsonl);
 insert into test select x, 1 from file(02458_data.jsonl);
 insert into test select x, y from file(02458_data.jsonl);
-insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError UNKNOWN_IDENTIFIER}
+insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
 insert into test select x, z from file(02458_data.jsonl);
 
 insert into test select * from file(02458_data.jsoncompacteachrow);
 insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
 insert into test select x, y from file(02458_data.jsoncompacteachrow);
-insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError UNKNOWN_IDENTIFIER}
+insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
 insert into test select x, z from file(02458_data.jsoncompacteachrow);
 
 insert into test select * from input() format CSV 1,2

From 499b58dcf6142382bcfe583e754fba067cacb475 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 29 Mar 2023 17:03:57 +0200
Subject: [PATCH 031/277] add scripts for sparse checkout of some contribs

---
 .../sparse-checkout/setup-sparse-checkout.sh  | 11 +++++++++
 contrib/sparse-checkout/update-arrow.sh       | 12 ++++++++++
 contrib/sparse-checkout/update-aws.sh         | 13 +++++++++++
 contrib/sparse-checkout/update-boringssl.sh   | 11 +++++++++
 contrib/sparse-checkout/update-croaring.sh    | 12 ++++++++++
 contrib/sparse-checkout/update-grpc.sh        | 20 ++++++++++++++++
 contrib/sparse-checkout/update-h3.sh          | 12 ++++++++++
 .../sparse-checkout/update-llvm-project.sh    | 23 +++++++++++++++++++
 contrib/sparse-checkout/update-openssl.sh     | 12 ++++++++++
 contrib/sparse-checkout/update-orc.sh         | 12 ++++++++++
 10 files changed, 138 insertions(+)
 create mode 100755 contrib/sparse-checkout/setup-sparse-checkout.sh
 create mode 100755 contrib/sparse-checkout/update-arrow.sh
 create mode 100755 contrib/sparse-checkout/update-aws.sh
 create mode 100755 contrib/sparse-checkout/update-boringssl.sh
 create mode 100755 contrib/sparse-checkout/update-croaring.sh
 create mode 100755 contrib/sparse-checkout/update-grpc.sh
 create mode 100755 contrib/sparse-checkout/update-h3.sh
 create mode 100755 contrib/sparse-checkout/update-llvm-project.sh
 create mode 100755 contrib/sparse-checkout/update-openssl.sh
 create mode 100755 contrib/sparse-checkout/update-orc.sh

diff --git a/contrib/sparse-checkout/setup-sparse-checkout.sh b/contrib/sparse-checkout/setup-sparse-checkout.sh
new file mode 100755
index 00000000000..ac039b964e9
--- /dev/null
+++ b/contrib/sparse-checkout/setup-sparse-checkout.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+git config submodule."contrib/llvm-project".update '!../sparse-checkout/update-llvm-project.sh'
+git config submodule."contrib/croaring".update '!../sparse-checkout/update-croaring.sh'
+git config submodule."contrib/aws".update '!../sparse-checkout/update-aws.sh'
+git config submodule."contrib/openssl".update '!../sparse-checkout/update-openssl.sh'
+git config submodule."contrib/boringssl".update '!../sparse-checkout/update-boringssl.sh'
+git config submodule."contrib/arrow".update '!../sparse-checkout/update-arrow.sh'
+git config submodule."contrib/grpc".update '!../sparse-checkout/update-grpc.sh'
+git config submodule."contrib/orc".update '!../sparse-checkout/update-orc.sh'
+git config submodule."contrib/h3".update '!../sparse-checkout/update-h3.sh'
diff --git a/contrib/sparse-checkout/update-arrow.sh b/contrib/sparse-checkout/update-arrow.sh
new file mode 100755
index 00000000000..e004b60da02
--- /dev/null
+++ b/contrib/sparse-checkout/update-arrow.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for arrow"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/cpp/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-aws.sh b/contrib/sparse-checkout/update-aws.sh
new file mode 100755
index 00000000000..c8d4c5a89c2
--- /dev/null
+++ b/contrib/sparse-checkout/update-aws.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+echo "Using sparse checkout for aws"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
+echo '/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-boringssl.sh b/contrib/sparse-checkout/update-boringssl.sh
new file mode 100755
index 00000000000..dee19e3ca97
--- /dev/null
+++ b/contrib/sparse-checkout/update-boringssl.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+echo "Using sparse checkout for boringsll"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-croaring.sh b/contrib/sparse-checkout/update-croaring.sh
new file mode 100755
index 00000000000..9b7bba19df4
--- /dev/null
+++ b/contrib/sparse-checkout/update-croaring.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for croaring"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/benchmarks/*' >> $FILES_TO_CHECKOUT
+echo '!/tests/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-grpc.sh b/contrib/sparse-checkout/update-grpc.sh
new file mode 100755
index 00000000000..18045e6ec90
--- /dev/null
+++ b/contrib/sparse-checkout/update-grpc.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+echo "Using sparse checkout for grpc"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+# FIXME for some reason we need it for build
+# echo '!/test/*' >> $FILES_TO_CHECKOUT
+# echo '!/tools/*' >> $FILES_TO_CHECKOUT
+echo '!/examples/*' >> $FILES_TO_CHECKOUT
+echo '!/doc/*' >> $FILES_TO_CHECKOUT
+# echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
+echo '!/src/python/*' >> $FILES_TO_CHECKOUT
+echo '!/src/objective-c/*' >> $FILES_TO_CHECKOUT
+echo '!/src/php/*' >> $FILES_TO_CHECKOUT
+echo '!/src/ruby/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-h3.sh b/contrib/sparse-checkout/update-h3.sh
new file mode 100755
index 00000000000..127885f89cc
--- /dev/null
+++ b/contrib/sparse-checkout/update-h3.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for h3"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/tests/*' >> $FILES_TO_CHECKOUT
+echo '!/website/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-llvm-project.sh b/contrib/sparse-checkout/update-llvm-project.sh
new file mode 100755
index 00000000000..43c20bd8bfe
--- /dev/null
+++ b/contrib/sparse-checkout/update-llvm-project.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+echo "Using sparse checkout for llvm-project"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/llvm/test/*' >> $FILES_TO_CHECKOUT
+echo '!/llvm/docs/*' >> $FILES_TO_CHECKOUT
+echo '!/llvm/unittests/*' >> $FILES_TO_CHECKOUT
+echo '!/llvm/tools/*' >> $FILES_TO_CHECKOUT
+echo '!/clang/*' >> $FILES_TO_CHECKOUT
+echo '!/clang-tools-extra/*' >> $FILES_TO_CHECKOUT
+echo '!/lldb/*' >> $FILES_TO_CHECKOUT
+echo '!/mlir/*' >> $FILES_TO_CHECKOUT
+echo '!/polly/*' >> $FILES_TO_CHECKOUT
+echo '!/lld/*' >> $FILES_TO_CHECKOUT
+echo '!/flang/*' >> $FILES_TO_CHECKOUT
+echo '!/libcxx/test/*' >> $FILES_TO_CHECKOUT
+echo '!/compiler-rt/test/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-openssl.sh b/contrib/sparse-checkout/update-openssl.sh
new file mode 100755
index 00000000000..5528f0e70ba
--- /dev/null
+++ b/contrib/sparse-checkout/update-openssl.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for opensll"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
+echo '!/test/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-orc.sh b/contrib/sparse-checkout/update-orc.sh
new file mode 100755
index 00000000000..963db8f306c
--- /dev/null
+++ b/contrib/sparse-checkout/update-orc.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for orc"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/c++/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD

From 905975f6e6ae7dbae5683b4f4891fd40c08dffae Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 29 Mar 2023 18:33:15 +0200
Subject: [PATCH 032/277] update docs

---
 contrib/sparse-checkout/setup-sparse-checkout.sh |  2 ++
 contrib/update-submodules.sh                     | 11 +++++++++++
 docs/en/development/developer-instruction.md     |  6 ++++++
 docs/ru/development/developer-instruction.md     |  8 +++++++-
 4 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100755 contrib/update-submodules.sh

diff --git a/contrib/sparse-checkout/setup-sparse-checkout.sh b/contrib/sparse-checkout/setup-sparse-checkout.sh
index ac039b964e9..f646e16412f 100755
--- a/contrib/sparse-checkout/setup-sparse-checkout.sh
+++ b/contrib/sparse-checkout/setup-sparse-checkout.sh
@@ -1,5 +1,7 @@
 #!/bin/sh
 
+set -e
+
 git config submodule."contrib/llvm-project".update '!../sparse-checkout/update-llvm-project.sh'
 git config submodule."contrib/croaring".update '!../sparse-checkout/update-croaring.sh'
 git config submodule."contrib/aws".update '!../sparse-checkout/update-aws.sh'
diff --git a/contrib/update-submodules.sh b/contrib/update-submodules.sh
new file mode 100755
index 00000000000..c94681e6240
--- /dev/null
+++ b/contrib/update-submodules.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+set -e
+
+WORKDIR=$(dirname "$0")
+WORKDIR=$(readlink -f "${WORKDIR}")
+
+"$WORKDIR/sparse-checkout/setup-sparse-checkout.sh"
+git submodule init
+git submodule sync
+git submodule update --depth=1
diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index ace5ab79bb4..ea98b2da5e6 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -42,6 +42,12 @@ In the command line terminal run:
     git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git
     cd ClickHouse
 
+Or (if you'd like to use sparse checkout for submodules and avoid checking out unneeded files):
+
+    git clone git@github.com:your_github_username/ClickHouse.git
+    cd ClickHouse
+    ./contrib/update-submodules.sh
+
 Note: please, substitute *your_github_username* with what is appropriate!
 
 This command will create a directory `ClickHouse` containing the working copy of the project.
diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md
index 80472178ae2..c208439678a 100644
--- a/docs/ru/development/developer-instruction.md
+++ b/docs/ru/development/developer-instruction.md
@@ -41,9 +41,15 @@ ClickHouse не работает и не собирается на 32-битны
 
 Выполните в терминале:
 
-    git clone git@github.com:your_github_username/ClickHouse.git --recursive
+    git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git
     cd ClickHouse
 
+Или (если вы хотите использовать sparse checkout для submodules):
+
+    git clone git@github.com:your_github_username/ClickHouse.git
+    cd ClickHouse
+    ./contrib/update-submodules.sh
+
 Замените слово `your_github_username` в команде для git на имя вашего аккаунта на GitHub.
 
 Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта.

From 4f360e76e9ec5549df7c7a7c11b1928baefb2d61 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 29 Mar 2023 18:05:25 +0000
Subject: [PATCH 033/277] Add kafkaMurmurHash function

---
 .../sql-reference/functions/hash-functions.md | 40 +++++++++++++++++++
 src/Functions/FunctionsHashing.h              | 21 ++++++++++
 src/Functions/FunctionsHashingMurmur.cpp      |  1 +
 ...new_functions_must_be_documented.reference |  1 +
 .../02676_kafka_murmur_hash.reference         |  5 +++
 .../0_stateless/02676_kafka_murmur_hash.sql   |  7 ++++
 6 files changed, 75 insertions(+)
 create mode 100644 tests/queries/0_stateless/02676_kafka_murmur_hash.reference
 create mode 100644 tests/queries/0_stateless/02676_kafka_murmur_hash.sql

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 69dc73e2fb0..544254350fc 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -660,6 +660,46 @@ Result:
 └──────────────────────┴─────────────────────┘
 ```
 
+
+## kafkaMurmurHash
+
+Calculates a 32-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [Kafka](https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L482).
+
+**Syntax**
+
+```sql
+MurmurHash(par1, ...)
+```
+
+**Arguments**
+
+-   `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types).
+
+**Returned value**
+
+-   Calculated hash value.
+
+Type: [Int32](/docs/en/sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    kafkaMurmurHash('foobar') AS res1,
+    kafkaMurmurHash(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS res2,
+    toTypeName(res1) AS type;
+```
+
+Result:
+
+```response
+┌───────res1─┬─────res2─┬─type──┐
+│ -790332482 │ 16984959 │ Int32 │
+└────────────┴──────────┴───────┘
+```
+
 ## murmurHash3_32, murmurHash3_64
 
 Produces a [MurmurHash3](https://github.com/aappleby/smhasher) hash value.
diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 7b6f4213cd3..87c80c795e5 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -465,6 +465,26 @@ struct GccMurmurHashImpl
     static constexpr bool use_int_hash_for_pods = false;
 };
 
+/// To be compatible with Kafka: https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L480
+struct KafkaMurmurHashImpl
+{
+    static constexpr auto name = "kafkaMurmurHash";
+
+    using ReturnType = Int32;
+
+    static Int32 apply(const char * data, const size_t size)
+    {
+        return static_cast<ReturnType>(MurmurHash2(data, size, 0x9747b28cU));
+    }
+
+    static Int32 combineHashes(Int32 h1, Int32 h2)
+    {
+        return static_cast<ReturnType>(IntHash32Impl::apply(static_cast<UInt32>(h1)) ^ static_cast<UInt32>(h2));
+    }
+
+    static constexpr bool use_int_hash_for_pods = false;
+};
+
 struct MurmurHash3Impl32
 {
     static constexpr auto name = "murmurHash3_32";
@@ -1698,6 +1718,7 @@ using FunctionMetroHash64 = FunctionAnyHash<ImplMetroHash64>;
 using FunctionMurmurHash2_32 = FunctionAnyHash<MurmurHash2Impl32>;
 using FunctionMurmurHash2_64 = FunctionAnyHash<MurmurHash2Impl64>;
 using FunctionGccMurmurHash = FunctionAnyHash<GccMurmurHashImpl>;
+using FunctionKafkaMurmurHash = FunctionAnyHash<KafkaMurmurHashImpl>;
 using FunctionMurmurHash3_32 = FunctionAnyHash<MurmurHash3Impl32>;
 using FunctionMurmurHash3_64 = FunctionAnyHash<MurmurHash3Impl64>;
 using FunctionMurmurHash3_128 = FunctionAnyHash<MurmurHash3Impl128>;
diff --git a/src/Functions/FunctionsHashingMurmur.cpp b/src/Functions/FunctionsHashingMurmur.cpp
index 9648c21dbf0..df1a945b967 100644
--- a/src/Functions/FunctionsHashingMurmur.cpp
+++ b/src/Functions/FunctionsHashingMurmur.cpp
@@ -17,5 +17,6 @@ REGISTER_FUNCTION(HashingMurmur)
     factory.registerFunction<FunctionMurmurHash3_64>();
     factory.registerFunction<FunctionMurmurHash3_128>();
     factory.registerFunction<FunctionGccMurmurHash>();
+    factory.registerFunction<FunctionKafkaMurmurHash>();
 }
 }
diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
index 571a3c3afb5..dbead8e5143 100644
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@@ -389,6 +389,7 @@ javaHashUTF16LE
 joinGet
 joinGetOrNull
 jumpConsistentHash
+kafkaMurmurHash
 kostikConsistentHash
 lcm
 least
diff --git a/tests/queries/0_stateless/02676_kafka_murmur_hash.reference b/tests/queries/0_stateless/02676_kafka_murmur_hash.reference
new file mode 100644
index 00000000000..3d9931b57f4
--- /dev/null
+++ b/tests/queries/0_stateless/02676_kafka_murmur_hash.reference
@@ -0,0 +1,5 @@
+-973932308
+-790332482
+-985981536
+-1486304829
+-58897971
diff --git a/tests/queries/0_stateless/02676_kafka_murmur_hash.sql b/tests/queries/0_stateless/02676_kafka_murmur_hash.sql
new file mode 100644
index 00000000000..a54a7562eda
--- /dev/null
+++ b/tests/queries/0_stateless/02676_kafka_murmur_hash.sql
@@ -0,0 +1,7 @@
+-- https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java#L93
+
+SELECT kafkaMurmurHash('21');
+SELECT kafkaMurmurHash('foobar');
+SELECT kafkaMurmurHash('a-little-bit-long-string');
+SELECT kafkaMurmurHash('a-little-bit-longer-string');
+SELECT kafkaMurmurHash('lkjh234lh9fiuh90y23oiuhsafujhadof229phr9h19h89h8');

From c82b1d2b5994d60699bd2c9363cf3f3f94028997 Mon Sep 17 00:00:00 2001
From: Dmitrii Kovalkov <dakovalkov@yandex-team.ru>
Date: Wed, 29 Mar 2023 08:59:57 +0300
Subject: [PATCH 034/277] Move FunctionsJSON.cpp -> .h

---
 src/Functions/{FunctionsJSON.cpp => FunctionsJSON.h} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/Functions/{FunctionsJSON.cpp => FunctionsJSON.h} (100%)

diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.h
similarity index 100%
rename from src/Functions/FunctionsJSON.cpp
rename to src/Functions/FunctionsJSON.h

From c984526288a007f1efc54205339b1d653e7a3744 Mon Sep 17 00:00:00 2001
From: Dmitrii Kovalkov <dakovalkov@yandex-team.ru>
Date: Wed, 29 Mar 2023 09:45:32 +0300
Subject: [PATCH 035/277] Move functions registration to .cpp

---
 src/Functions/FunctionsJSON.cpp | 28 ++++++++++++++++++++++++++++
 src/Functions/FunctionsJSON.h   | 23 ++---------------------
 2 files changed, 30 insertions(+), 21 deletions(-)
 create mode 100644 src/Functions/FunctionsJSON.cpp

diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp
new file mode 100644
index 00000000000..fbd987577e9
--- /dev/null
+++ b/src/Functions/FunctionsJSON.cpp
@@ -0,0 +1,28 @@
+#include <Functions/FunctionsJSON.h>
+#include <Functions/FunctionFactory.h>
+
+
+namespace DB
+{
+
+REGISTER_FUNCTION(JSON)
+{
+    factory.registerFunction<JSONOverloadResolver<NameJSONHas, JSONHasImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameIsValidJSON, IsValidJSONImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONLength, JSONLengthImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONKey, JSONKeyImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONType, JSONTypeImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtractInt, JSONExtractInt64Impl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtractUInt, JSONExtractUInt64Impl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtractFloat, JSONExtractFloat64Impl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtractBool, JSONExtractBoolImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtractString, JSONExtractStringImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtract, JSONExtractImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtractKeysAndValues, JSONExtractKeysAndValuesImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtractRaw, JSONExtractRawImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtractArrayRaw, JSONExtractArrayRawImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtractKeysAndValuesRaw, JSONExtractKeysAndValuesRawImpl>>();
+    factory.registerFunction<JSONOverloadResolver<NameJSONExtractKeys, JSONExtractKeysImpl>>();
+}
+
+}
diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h
index 8bcb1f4d849..20f3e9f185d 100644
--- a/src/Functions/FunctionsJSON.h
+++ b/src/Functions/FunctionsJSON.h
@@ -1,3 +1,5 @@
+#pragma once
+
 #include <type_traits>
 #include <boost/tti/has_member_function.hpp>
 
@@ -34,7 +36,6 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/Serializations/SerializationDecimal.h>
 
-#include <Functions/FunctionFactory.h>
 #include <Functions/IFunction.h>
 #include <Common/JSONParsers/DummyJSONParser.h>
 #include <Common/JSONParsers/SimdJSONParser.h>
@@ -1619,24 +1620,4 @@ public:
     }
 };
 
-REGISTER_FUNCTION(JSON)
-{
-    factory.registerFunction<JSONOverloadResolver<NameJSONHas, JSONHasImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameIsValidJSON, IsValidJSONImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONLength, JSONLengthImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONKey, JSONKeyImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONType, JSONTypeImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtractInt, JSONExtractInt64Impl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtractUInt, JSONExtractUInt64Impl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtractFloat, JSONExtractFloat64Impl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtractBool, JSONExtractBoolImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtractString, JSONExtractStringImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtract, JSONExtractImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtractKeysAndValues, JSONExtractKeysAndValuesImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtractRaw, JSONExtractRawImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtractArrayRaw, JSONExtractArrayRawImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtractKeysAndValuesRaw, JSONExtractKeysAndValuesRawImpl>>();
-    factory.registerFunction<JSONOverloadResolver<NameJSONExtractKeys, JSONExtractKeysImpl>>();
-}
-
 }

From 50bdea3c76d106c4354e903bf4bc14e75ca263ad Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 30 Mar 2023 09:57:17 +0200
Subject: [PATCH 036/277] Fix bug in aggregate functions JIT

---
 src/Core/Settings.h                           |  2 +-
 .../02703_jit_external_aggregation.reference  |  1 +
 .../02703_jit_external_aggregation.sh         | 35 +++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02703_jit_external_aggregation.reference
 create mode 100755 tests/queries/0_stateless/02703_jit_external_aggregation.sh

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index fe8e9d4dc7d..5ab44fa2c1e 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -129,7 +129,7 @@ class IColumn;
     M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \
     M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
     M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
-    M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \
+    M(Bool, compile_aggregate_expressions, false, "Compile aggregate functions to native code. This feature has a bug and should not be used.", 0) \
     M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \
     M(Bool, compile_sort_description, true, "Compile sort description to native code.", 0) \
     M(UInt64, min_count_to_compile_sort_description, 3, "The number of identical sort descriptions before they are JIT-compiled", 0) \
diff --git a/tests/queries/0_stateless/02703_jit_external_aggregation.reference b/tests/queries/0_stateless/02703_jit_external_aggregation.reference
new file mode 100644
index 00000000000..cdeec60f4ef
--- /dev/null
+++ b/tests/queries/0_stateless/02703_jit_external_aggregation.reference
@@ -0,0 +1 @@
+.....
diff --git a/tests/queries/0_stateless/02703_jit_external_aggregation.sh b/tests/queries/0_stateless/02703_jit_external_aggregation.sh
new file mode 100755
index 00000000000..386d4cd0e3c
--- /dev/null
+++ b/tests/queries/0_stateless/02703_jit_external_aggregation.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+# This query should return empty result in every of five runs:
+
+for _ in {1..5}
+do
+    $CLICKHOUSE_CLIENT --query "
+SELECT
+    COUNT() AS c,
+    group_key,
+    anyIf(r, key = 0) AS x0,
+    anyIf(r, key = 1) AS x1,
+    anyIf(r, key = 2) AS x2
+FROM
+(
+    SELECT
+        CRC32(toString(number)) % 1000000 AS group_key,
+        number % 3 AS key,
+        number AS r
+    FROM numbers(10000000)
+)
+GROUP BY group_key
+HAVING (c = 2) AND (x0 > 0) AND (x1 > 0) AND (x2 > 0)
+ORDER BY group_key ASC
+LIMIT 10
+SETTINGS max_bytes_before_external_group_by = 200000
+" && echo -n '.'
+done
+
+echo

From 2d153664dc3946c4b50f4c171fa6829f95b9d5bc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 30 Mar 2023 13:13:33 +0300
Subject: [PATCH 037/277] Update 02703_jit_external_aggregation.sh

---
 tests/queries/0_stateless/02703_jit_external_aggregation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02703_jit_external_aggregation.sh b/tests/queries/0_stateless/02703_jit_external_aggregation.sh
index 386d4cd0e3c..2e56177a339 100755
--- a/tests/queries/0_stateless/02703_jit_external_aggregation.sh
+++ b/tests/queries/0_stateless/02703_jit_external_aggregation.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 for _ in {1..5}
 do
-    $CLICKHOUSE_CLIENT --query "
+    $CLICKHOUSE_CLIENT --compile_aggregate_expressions 0 --query "
 SELECT
     COUNT() AS c,
     group_key,

From 66f546ef705f08ab895b310ac0be3483a2334f0a Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 30 Mar 2023 15:02:12 +0000
Subject: [PATCH 038/277] Do not partially cancel processors which were added
 from expand pipeline.

---
 src/Processors/Executors/ExecutingGraph.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp
index 9ec9cd2e0e4..3a0cbc83ad6 100644
--- a/src/Processors/Executors/ExecutingGraph.cpp
+++ b/src/Processors/Executors/ExecutingGraph.cpp
@@ -122,13 +122,7 @@ bool ExecutingGraph::expandPipeline(std::stack<uint64_t> & stack, uint64_t pid)
         }
         processors->insert(processors->end(), new_processors.begin(), new_processors.end());
 
-        source_processors.reserve(source_processors.size() + new_processors.size());
-
-        for (auto & proc: new_processors)
-        {
-            bool is_source = proc->getInputs().empty();
-            source_processors.emplace_back(is_source);
-        }
+        source_processors.resize(source_processors.size() + new_processors.size(), false);
     }
 
     uint64_t num_processors = processors->size();

From c71deea9aae89d798ef88dd80a84b81199613b9d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Thu, 30 Mar 2023 18:05:42 +0200
Subject: [PATCH 039/277] Update src/Processors/Executors/ExecutingGraph.cpp

Co-authored-by: Sergei Trifonov <sergei@clickhouse.com>
---
 src/Processors/Executors/ExecutingGraph.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp
index 3a0cbc83ad6..6b408e53c47 100644
--- a/src/Processors/Executors/ExecutingGraph.cpp
+++ b/src/Processors/Executors/ExecutingGraph.cpp
@@ -122,6 +122,7 @@ bool ExecutingGraph::expandPipeline(std::stack<uint64_t> & stack, uint64_t pid)
         }
         processors->insert(processors->end(), new_processors.begin(), new_processors.end());
 
+        // Do not consider sources added during pipeline expansion as cancelable to avoid tricky corner cases (e.g. ConvertingAggregatedToChunksWithMergingSource cancelation)
         source_processors.resize(source_processors.size() + new_processors.size(), false);
     }
 

From afa3052565ce1ac81264d0290e58d1314e7c057d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Thu, 30 Mar 2023 21:33:33 +0200
Subject: [PATCH 040/277] Update ExecutingGraph.cpp

---
 src/Processors/Executors/ExecutingGraph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp
index 6b408e53c47..574b1ccbcd2 100644
--- a/src/Processors/Executors/ExecutingGraph.cpp
+++ b/src/Processors/Executors/ExecutingGraph.cpp
@@ -122,7 +122,7 @@ bool ExecutingGraph::expandPipeline(std::stack<uint64_t> & stack, uint64_t pid)
         }
         processors->insert(processors->end(), new_processors.begin(), new_processors.end());
 
-        // Do not consider sources added during pipeline expansion as cancelable to avoid tricky corner cases (e.g. ConvertingAggregatedToChunksWithMergingSource cancelation)
+        // Do not consider sources added during pipeline expansion as cancelable to avoid tricky corner cases (e.g. ConvertingAggregatedToChunksWithMergingSource cancellation)
         source_processors.resize(source_processors.size() + new_processors.size(), false);
     }
 

From ec260081e658e8820b3a860f6c48c220d592c8f0 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 30 Mar 2023 20:03:12 +0000
Subject: [PATCH 041/277] Update tests

---
 .../02475_bson_each_row_format.reference          |   9 ++++++---
 .../0_stateless/02475_bson_each_row_format.sh     |   8 ++++----
 .../0_stateless/02591_bson_long_tuple.reference   | Bin 91 -> 92 bytes
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.reference b/tests/queries/0_stateless/02475_bson_each_row_format.reference
index 5922167dc97..f90867d92b1 100644
--- a/tests/queries/0_stateless/02475_bson_each_row_format.reference
+++ b/tests/queries/0_stateless/02475_bson_each_row_format.reference
@@ -121,7 +121,7 @@ Nullable
 2
 0
 4
-FAIL
+OK
 null	Nullable(Int64)					
 0
 \N
@@ -191,8 +191,11 @@ tuple	Tuple(Nullable(Int64), Nullable(String))
 (3,'Hello')
 (4,'Hello')
 Map
-OK
-OK
+{1:0,2:1}
+{1:1,2:2}
+{1:2,2:3}
+{1:3,2:4}
+{1:4,2:5}
 {'a':0,'b':1}
 {'a':1,'b':2}
 {'a':2,'b':3}
diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh
index b4efea7e326..aa58d27fa50 100755
--- a/tests/queries/0_stateless/02475_bson_each_row_format.sh
+++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh
@@ -88,7 +88,7 @@ echo "Nullable"
 $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'null Nullable(UInt32)') select number % 2 ? NULL : number from numbers(5) settings engine_file_truncate_on_insert=1"
 $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null Nullable(UInt32)')"
 $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32')"
-$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32') settings input_format_null_as_default=0" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32') settings input_format_null_as_default=0" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL"
 
 $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
 $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
@@ -132,10 +132,10 @@ $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)"
 
 
 echo "Map"
-$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)') select map(1, number, 2, number + 1) from numbers(5) settings engine_file_truncate_on_insert=1" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL"
-$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)') select map('a', number, 'b', number + 1) from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)') select map(1, number, 2, number + 1) from numbers(5) settings engine_file_truncate_on_insert=1"
+$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)')"
 
-$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)')" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL"
+$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)') select map('a', number, 'b', number + 1) from numbers(5) settings engine_file_truncate_on_insert=1"
 $CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)')"
 
 $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)"
diff --git a/tests/queries/0_stateless/02591_bson_long_tuple.reference b/tests/queries/0_stateless/02591_bson_long_tuple.reference
index 008a3c51138ef4f8cc81d2056a66b865cd25fab9..98eb634721ea23321ceff73029028dd869910267 100644
GIT binary patch
literal 92
zcmW;7O9DV37=_U<@`#D0_)}yb4K2j-b)Ct10RK{3o8eTXQwP0Dc$FE{$*3+SRhZRH
LP`0!D8*3Xr_r3(b

literal 91
zcmWl~w*i14006;PF*l9^nCGx`5yzLm=3XFtJf$U4S)#T~V=KWnTHERDptqC3E=IeV
IEJfxr{O+^_ivR!s


From 4e1e5434c664c5bfd0430bf29ba12603419c8cad Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 30 Mar 2023 22:04:58 +0200
Subject: [PATCH 042/277] Fix build

---
 src/Storages/StorageGenerateRandom.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp
index f616313a595..601306bd1bf 100644
--- a/src/Storages/StorageGenerateRandom.cpp
+++ b/src/Storages/StorageGenerateRandom.cpp
@@ -422,7 +422,7 @@ class GenerateSource : public ISource
 {
 public:
     GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_, ContextPtr context_)
-        : ISource(Nested::flattenArrayOfTuples(prepareBlockToFill(block_header_)))
+        : ISource(Nested::flatten(prepareBlockToFill(block_header_)))
         , block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_)
         , block_to_fill(std::move(block_header_)), rng(random_seed_), context(context_) {}
 
@@ -437,7 +437,7 @@ protected:
         for (const auto & elem : block_to_fill)
             columns.emplace_back(fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context));
 
-        columns = Nested::flattenArrayOfTuples(block_to_fill.cloneWithColumns(columns)).getColumns();
+        columns = Nested::flatten(block_to_fill.cloneWithColumns(columns)).getColumns();
         return {std::move(columns), block_size};
     }
 

From cef4a5b29c9e7386d761a01e5ecfd110bda537c6 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 30 Mar 2023 22:07:57 +0200
Subject: [PATCH 043/277] Fix docs

---
 docs/en/interfaces/formats.md | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index e55ca8c9d53..41d66166bb0 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -205,7 +205,7 @@ Differs from the `TabSeparated` format in that the column names are written in t
 
 During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness.
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -217,7 +217,7 @@ This format is also available under the name `TSVWithNames`.
 
 Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row.
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -470,7 +470,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
 
 Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -480,7 +480,7 @@ Otherwise, the first row will be skipped.
 
 Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -500,7 +500,7 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat
 
 Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -510,7 +510,7 @@ Otherwise, the first row will be skipped.
 
 Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -969,7 +969,7 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie
 
 Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -979,7 +979,7 @@ Otherwise, the first row will be skipped.
 
 Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -991,7 +991,7 @@ the types from input data will be compared with the types of the corresponding c
 
 Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1001,7 +1001,7 @@ Otherwise, the first row will be skipped.
 
 Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1120,7 +1120,7 @@ CREATE TABLE IF NOT EXISTS example_table
 -   If `input_format_defaults_for_omitted_fields = 0`, then the default value for `x` and `a` equals `0` (as the default value for the `UInt32` data type).
 -   If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`.
 
-:::note
+:::warning
 When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`.
 :::
 
@@ -1450,7 +1450,7 @@ Similar to [RowBinary](#rowbinary), but with added header:
 -   [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N)
 -   N `String`s specifying column names
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1464,7 +1464,7 @@ Similar to [RowBinary](#rowbinary), but with added header:
 -   N `String`s specifying column names
 -   N `String`s specifying column types
 
-:::note
+:::warning
 If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1,
 the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1.
 Otherwise, the first row will be skipped.
@@ -1915,7 +1915,7 @@ SET format_avro_schema_registry_url = 'http://schema-registry';
 SELECT * FROM topic1_stream;
 ```
 
-:::note
+:::warning
 Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` to maintain it’s value after a restart. Also you can use the `format_avro_schema_registry_url` setting of the `Kafka` table engine.
 :::
 
@@ -1951,7 +1951,7 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `MAP`                                         | [Map](/docs/en/sql-reference/data-types/map.md)                                                            | `MAP`                         |
 | `UINT32`                                      | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                  | `UINT32`                      |
 | `FIXED_LENGTH_BYTE_ARRAY`, `BINARY`           | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                  | `FIXED_LENGTH_BYTE_ARRAY`     |
-| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY`           | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/domains/int-uint.md)                     | `FIXED_LENGTH_BYTE_ARRAY`     |
+| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY`           | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md)                     | `FIXED_LENGTH_BYTE_ARRAY`     |
 
 Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
 
@@ -2022,7 +2022,7 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `MAP`                                   | [Map](/docs/en/sql-reference/data-types/map.md)                                                            | `MAP`                      |
 | `UINT32`                                | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                  | `UINT32`                   |
 | `FIXED_SIZE_BINARY`, `BINARY`           | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                  | `FIXED_SIZE_BINARY`        |
-| `FIXED_SIZE_BINARY`, `BINARY`           | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/domains/int-uint.md)                     | `FIXED_SIZE_BINARY`        |
+| `FIXED_SIZE_BINARY`, `BINARY`           | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md)                     | `FIXED_SIZE_BINARY`        |
 
 Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
 

From 47ea27fd92abdc828e7205747832fccfc6182b38 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Thu, 30 Mar 2023 23:07:49 +0000
Subject: [PATCH 044/277] Fix

---
 .../sql-reference/functions/hash-functions.md | 23 +++++++++----------
 src/Functions/FunctionsHashing.h              | 14 ++++++-----
 .../02676_kafka_murmur_hash.reference         | 10 ++++----
 .../0_stateless/02676_kafka_murmur_hash.sql   |  3 ++-
 4 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 544254350fc..95c864d9043 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -441,11 +441,11 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0
 
 ## javaHash
 
-Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452), 
-[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405), 
-[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410), 
-[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959), 
-[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060). 
+Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452),
+[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405),
+[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410),
+[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959),
+[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060).
 This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
 
 Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types.
@@ -663,7 +663,7 @@ Result:
 
 ## kafkaMurmurHash
 
-Calculates a 32-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [Kafka](https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L482).
+Calculates a 32-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [Kafka](https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L482) and without the highest bit to be compatible with [Default Partitioner](https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/main/java/org/apache/kafka/clients/producer/internals/BuiltInPartitioner.java#L328).
 
 **Syntax**
 
@@ -679,7 +679,7 @@ MurmurHash(par1, ...)
 
 -   Calculated hash value.
 
-Type: [Int32](/docs/en/sql-reference/data-types/int-uint.md).
+Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -688,16 +688,15 @@ Query:
 ```sql
 SELECT
     kafkaMurmurHash('foobar') AS res1,
-    kafkaMurmurHash(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS res2,
-    toTypeName(res1) AS type;
+    kafkaMurmurHash(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS res2
 ```
 
 Result:
 
 ```response
-┌───────res1─┬─────res2─┬─type──┐
-│ -790332482 │ 16984959 │ Int32 │
-└────────────┴──────────┴───────┘
+┌───────res1─┬─────res2─┐
+│ 1357151166 │ 85479775 │
+└────────────┴──────────┘
 ```
 
 ## murmurHash3_32, murmurHash3_64
diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 87c80c795e5..8afcc1540c8 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -465,21 +465,23 @@ struct GccMurmurHashImpl
     static constexpr bool use_int_hash_for_pods = false;
 };
 
-/// To be compatible with Kafka: https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L480
+/// To be compatible with Default Partitioner in Kafka:
+///     murmur2: https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L480
+///     Default Partitioner: https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/main/java/org/apache/kafka/clients/producer/internals/BuiltInPartitioner.java#L328
 struct KafkaMurmurHashImpl
 {
     static constexpr auto name = "kafkaMurmurHash";
 
-    using ReturnType = Int32;
+    using ReturnType = UInt32;
 
-    static Int32 apply(const char * data, const size_t size)
+    static UInt32 apply(const char * data, const size_t size)
     {
-        return static_cast<ReturnType>(MurmurHash2(data, size, 0x9747b28cU));
+        return MurmurHash2(data, size, 0x9747b28cU) & 0x7fffffff;
     }
 
-    static Int32 combineHashes(Int32 h1, Int32 h2)
+    static UInt32 combineHashes(UInt32 h1, UInt32 h2)
     {
-        return static_cast<ReturnType>(IntHash32Impl::apply(static_cast<UInt32>(h1)) ^ static_cast<UInt32>(h2));
+        return IntHash32Impl::apply(h1) ^ h2;
     }
 
     static constexpr bool use_int_hash_for_pods = false;
diff --git a/tests/queries/0_stateless/02676_kafka_murmur_hash.reference b/tests/queries/0_stateless/02676_kafka_murmur_hash.reference
index 3d9931b57f4..43f8440a5fc 100644
--- a/tests/queries/0_stateless/02676_kafka_murmur_hash.reference
+++ b/tests/queries/0_stateless/02676_kafka_murmur_hash.reference
@@ -1,5 +1,5 @@
--973932308
--790332482
--985981536
--1486304829
--58897971
+1173551340
+1357151166
+1161502112
+661178819
+2088585677
diff --git a/tests/queries/0_stateless/02676_kafka_murmur_hash.sql b/tests/queries/0_stateless/02676_kafka_murmur_hash.sql
index a54a7562eda..d2847b757e2 100644
--- a/tests/queries/0_stateless/02676_kafka_murmur_hash.sql
+++ b/tests/queries/0_stateless/02676_kafka_murmur_hash.sql
@@ -1,4 +1,5 @@
--- https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java#L93
+-- Test are taken from: https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java#L93
+-- and the reference is generated with: https://pastila.nl/?06465d36/87f8ab2c9f6501c54f1c0879a13c8626
 
 SELECT kafkaMurmurHash('21');
 SELECT kafkaMurmurHash('foobar');

From 53736dd74517b9a0cbab611f9e46b0a40d338bb3 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <104093494+CheSema@users.noreply.github.com>
Date: Fri, 31 Mar 2023 12:51:58 +0200
Subject: [PATCH 045/277] Update 01641_memory_tracking_insert_optimize.sql

---
 .../0_stateless/01641_memory_tracking_insert_optimize.sql       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
index 5791a72dcb3..aeabc05fdd3 100644
--- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
+++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql
@@ -15,7 +15,7 @@ insert into data_01641 select number, toString(number) from numbers(120000);
 
 set max_memory_usage='10Mi', max_untracked_memory=0;
 
--- It fails iif memory is tracked in OPTIMIZE query, but it doesn't. OPTIMIZE query doesn't rely on query context.
+-- It fails iff memory is tracked in OPTIMIZE query, but it doesn't. OPTIMIZE query doesn't rely on query context.
 optimize table data_01641 final;
 
 drop table data_01641;

From aa78728f17b216062c2a312eb9a8c7172b78f49c Mon Sep 17 00:00:00 2001
From: natasha <murfel@users.noreply.github.com>
Date: Fri, 31 Mar 2023 14:44:26 +0100
Subject: [PATCH 046/277] copy forgotten show_secrets in FormatSettings
 semi-copy-ctor

---
 src/Parsers/IAST.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index 606a822ecee..814dba609b4 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -208,7 +208,8 @@ public:
 
         FormatSettings(WriteBuffer & ostr_, const FormatSettings & other)
             : ostr(ostr_), hilite(other.hilite), one_line(other.one_line),
-            always_quote_identifiers(other.always_quote_identifiers), identifier_quoting_style(other.identifier_quoting_style)
+            always_quote_identifiers(other.always_quote_identifiers), identifier_quoting_style(other.identifier_quoting_style),
+            show_secrets(other.show_secrets)
         {
             nl_or_ws = one_line ? ' ' : '\n';
         }

From ee3da3854a64e6d7074634a3c0403d998e630f86 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 31 Mar 2023 17:47:23 +0200
Subject: [PATCH 047/277] check sparse checkout in CI

---
 .github/workflows/pull_request.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index ab0cbbb7ec1..291c0fac05a 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -550,6 +550,11 @@ jobs:
         with:
           clear-repository: true
           submodules: true
+      - name: Apply sparse checkout for contrib # in order to check that it doesn't break build
+        run: |
+          rm -rf "$GITHUB_WORKSPACE/contrib"
+          git -c "$GITHUB_WORKSPACE" checkout .
+          "$GITHUB_WORKSPACE/contrib/update-submodules.sh"
       - name: Build
         run: |
           sudo rm -fr "$TEMP_PATH"

From cdf9cf2fd411d89f36d08f33afcce0802120a352 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Fri, 31 Mar 2023 16:06:20 +0000
Subject: [PATCH 048/277] do not build sets for indexes if they are not used

---
 src/Storages/MergeTree/MergeTreeData.cpp      | 67 +++++++++----------
 .../02707_skip_index_with_in.reference        |  0
 .../0_stateless/02707_skip_index_with_in.sql  | 20 ++++++
 3 files changed, 52 insertions(+), 35 deletions(-)
 create mode 100644 tests/queries/0_stateless/02707_skip_index_with_in.reference
 create mode 100644 tests/queries/0_stateless/02707_skip_index_with_in.sql

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index f9848b572f9..65a3391b4e0 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -6067,51 +6067,48 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(
 }
 
 bool MergeTreeData::mayBenefitFromIndexForIn(
-    const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const
+    const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const
 {
     /// Make sure that the left side of the IN operator contain part of the key.
     /// If there is a tuple on the left side of the IN operator, at least one item of the tuple
-    ///  must be part of the key (probably wrapped by a chain of some acceptable functions).
+    /// must be part of the key (probably wrapped by a chain of some acceptable functions).
     const auto * left_in_operand_tuple = left_in_operand->as<ASTFunction>();
-    const auto & index_wrapper_factory = MergeTreeIndexFactory::instance();
+    const auto & index_factory = MergeTreeIndexFactory::instance();
+    const auto & query_settings = query_context->getSettingsRef();
+
+    auto check_for_one_argument = [&](const auto & ast)
+    {
+        if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(ast, metadata_snapshot))
+            return true;
+
+        if (query_settings.use_skip_indexes)
+        {
+            for (const auto & index : metadata_snapshot->getSecondaryIndices())
+                if (index_factory.get(index)->mayBenefitFromIndexForIn(ast))
+                    return true;
+        }
+
+        if (query_settings.allow_experimental_projection_optimization)
+        {
+            for (const auto & projection : metadata_snapshot->getProjections())
+                if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(ast))
+                    return true;
+        }
+
+        return false;
+    };
+
     if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple")
     {
         for (const auto & item : left_in_operand_tuple->arguments->children)
-        {
-            if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item, metadata_snapshot))
-                return true;
-            for (const auto & index : metadata_snapshot->getSecondaryIndices())
-                if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(item))
-                    return true;
-            for (const auto & projection : metadata_snapshot->getProjections())
-            {
-                if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(item))
-                    return true;
-            }
-        }
-        /// The tuple itself may be part of the primary key, so check that as a last resort.
-        if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot))
-            return true;
-        for (const auto & projection : metadata_snapshot->getProjections())
-        {
-            if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand))
-                return true;
-        }
-        return false;
-    }
-    else
-    {
-        for (const auto & index : metadata_snapshot->getSecondaryIndices())
-            if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(left_in_operand))
+            if (check_for_one_argument(item))
                 return true;
 
-        for (const auto & projection : metadata_snapshot->getProjections())
-        {
-            if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(left_in_operand))
-                return true;
-        }
-        return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot);
+        /// The tuple itself may be part of the primary key
+        /// or skip index, so check that as a last resort.
     }
+
+    return check_for_one_argument(left_in_operand);
 }
 
 using PartitionIdToMaxBlock = std::unordered_map<String, Int64>;
diff --git a/tests/queries/0_stateless/02707_skip_index_with_in.reference b/tests/queries/0_stateless/02707_skip_index_with_in.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02707_skip_index_with_in.sql b/tests/queries/0_stateless/02707_skip_index_with_in.sql
new file mode 100644
index 00000000000..4767619cee1
--- /dev/null
+++ b/tests/queries/0_stateless/02707_skip_index_with_in.sql
@@ -0,0 +1,20 @@
+DROP TABLE IF EXISTS t_skip_index_in;
+
+CREATE TABLE t_skip_index_in
+(
+    a String,
+    b String,
+    c String,
+    INDEX idx_c c TYPE bloom_filter GRANULARITY 1
+)
+ENGINE = MergeTree
+ORDER BY (a, b);
+
+INSERT INTO t_skip_index_in VALUES ('a', 'b', 'c');
+
+-- This query checks that set is not being built if indexes are not used,
+-- because with EXPLAIN the set will be built only for analysis of indexes.
+EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 0 FORMAT Null;
+EXPLAIN SELECT count() FROM t_skip_index_in WHERE c IN (SELECT throwIf(1)) SETTINGS use_skip_indexes = 1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
+
+DROP TABLE t_skip_index_in;

From 8169f01a24e34c5e749cf699a9748564481774f5 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 31 Mar 2023 20:58:57 +0200
Subject: [PATCH 049/277] exclude more subdirs

---
 .../sparse-checkout/setup-sparse-checkout.sh  |  6 ++
 contrib/sparse-checkout/update-aws-s2n-tls.sh | 13 +++
 contrib/sparse-checkout/update-boost.sh       | 85 +++++++++++++++++++
 contrib/sparse-checkout/update-boringssl.sh   |  3 +
 contrib/sparse-checkout/update-brotli.sh      | 12 +++
 contrib/sparse-checkout/update-grpc.sh        | 10 ++-
 contrib/sparse-checkout/update-icu.sh         | 12 +++
 contrib/sparse-checkout/update-libxml2.sh     | 16 ++++
 .../sparse-checkout/update-llvm-project.sh    | 26 +++---
 contrib/sparse-checkout/update-openssl.sh     |  5 +-
 contrib/sparse-checkout/update-protobuf.sh    | 13 +++
 11 files changed, 185 insertions(+), 16 deletions(-)
 create mode 100755 contrib/sparse-checkout/update-aws-s2n-tls.sh
 create mode 100755 contrib/sparse-checkout/update-boost.sh
 create mode 100755 contrib/sparse-checkout/update-brotli.sh
 create mode 100755 contrib/sparse-checkout/update-icu.sh
 create mode 100755 contrib/sparse-checkout/update-libxml2.sh
 create mode 100755 contrib/sparse-checkout/update-protobuf.sh

diff --git a/contrib/sparse-checkout/setup-sparse-checkout.sh b/contrib/sparse-checkout/setup-sparse-checkout.sh
index f646e16412f..3feba6c5adf 100755
--- a/contrib/sparse-checkout/setup-sparse-checkout.sh
+++ b/contrib/sparse-checkout/setup-sparse-checkout.sh
@@ -11,3 +11,9 @@ git config submodule."contrib/arrow".update '!../sparse-checkout/update-arrow.sh
 git config submodule."contrib/grpc".update '!../sparse-checkout/update-grpc.sh'
 git config submodule."contrib/orc".update '!../sparse-checkout/update-orc.sh'
 git config submodule."contrib/h3".update '!../sparse-checkout/update-h3.sh'
+git config submodule."contrib/icu".update '!../sparse-checkout/update-icu.sh'
+git config submodule."contrib/boost".update '!../sparse-checkout/update-boost.sh'
+git config submodule."contrib/aws-s2n-tls".update '!../sparse-checkout/update-aws-s2n-tls.sh'
+git config submodule."contrib/protobuf".update '!../sparse-checkout/update-protobuf.sh'
+git config submodule."contrib/libxml2".update '!../sparse-checkout/update-libxml2.sh'
+git config submodule."contrib/brotli".update '!../sparse-checkout/update-brotli.sh'
diff --git a/contrib/sparse-checkout/update-aws-s2n-tls.sh b/contrib/sparse-checkout/update-aws-s2n-tls.sh
new file mode 100755
index 00000000000..4d65dc4b81d
--- /dev/null
+++ b/contrib/sparse-checkout/update-aws-s2n-tls.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+echo "Using sparse checkout for aws-s2n-tls"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/test/*' >> $FILES_TO_CHECKOUT
+echo '!/docs/*' >> $FILES_TO_CHECKOUT
+echo '!/compliance/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-boost.sh b/contrib/sparse-checkout/update-boost.sh
new file mode 100755
index 00000000000..9bd1f6c1796
--- /dev/null
+++ b/contrib/sparse-checkout/update-boost.sh
@@ -0,0 +1,85 @@
+#!/bin/sh
+
+echo "Using sparse checkout for boost"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/boost/*' > $FILES_TO_CHECKOUT
+echo '!/boost/*/*' >> $FILES_TO_CHECKOUT
+echo '/boost/algorithm/*' >> $FILES_TO_CHECKOUT
+echo '/boost/any/*' >> $FILES_TO_CHECKOUT
+echo '/boost/atomic/*' >> $FILES_TO_CHECKOUT
+echo '/boost/assert/*' >> $FILES_TO_CHECKOUT
+echo '/boost/bind/*' >> $FILES_TO_CHECKOUT
+echo '/boost/concept/*' >> $FILES_TO_CHECKOUT
+echo '/boost/config/*' >> $FILES_TO_CHECKOUT
+echo '/boost/container/*' >> $FILES_TO_CHECKOUT
+echo '/boost/container_hash/*' >> $FILES_TO_CHECKOUT
+echo '/boost/context/*' >> $FILES_TO_CHECKOUT
+echo '/boost/convert/*' >> $FILES_TO_CHECKOUT
+echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT
+echo '/boost/core/*' >> $FILES_TO_CHECKOUT
+echo '/boost/detail/*' >> $FILES_TO_CHECKOUT
+echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT
+echo '/boost/exception/*' >> $FILES_TO_CHECKOUT
+echo '/boost/filesystem/*' >> $FILES_TO_CHECKOUT
+echo '/boost/functional/*' >> $FILES_TO_CHECKOUT
+echo '/boost/function/*' >> $FILES_TO_CHECKOUT
+echo '/boost/geometry/*' >> $FILES_TO_CHECKOUT
+echo '/boost/graph/*' >> $FILES_TO_CHECKOUT
+echo '/boost/heap/*' >> $FILES_TO_CHECKOUT
+echo '/boost/integer/*' >> $FILES_TO_CHECKOUT
+echo '/boost/intrusive/*' >> $FILES_TO_CHECKOUT
+echo '/boost/iostreams/*' >> $FILES_TO_CHECKOUT
+echo '/boost/io/*' >> $FILES_TO_CHECKOUT
+echo '/boost/iterator/*' >> $FILES_TO_CHECKOUT
+echo '/boost/math/*' >> $FILES_TO_CHECKOUT
+echo '/boost/move/*' >> $FILES_TO_CHECKOUT
+echo '/boost/mpl/*' >> $FILES_TO_CHECKOUT
+echo '/boost/multi_index/*' >> $FILES_TO_CHECKOUT
+echo '/boost/multiprecision/*' >> $FILES_TO_CHECKOUT
+echo '/boost/numeric/*' >> $FILES_TO_CHECKOUT
+echo '/boost/predef/*' >> $FILES_TO_CHECKOUT
+echo '/boost/preprocessor/*' >> $FILES_TO_CHECKOUT
+echo '/boost/program_options/*' >> $FILES_TO_CHECKOUT
+echo '/boost/range/*' >> $FILES_TO_CHECKOUT
+echo '/boost/regex/*' >> $FILES_TO_CHECKOUT
+echo '/boost/smart_ptr/*' >> $FILES_TO_CHECKOUT
+echo '/boost/type_index/*' >> $FILES_TO_CHECKOUT
+echo '/boost/type_traits/*' >> $FILES_TO_CHECKOUT
+echo '/boost/system/*' >> $FILES_TO_CHECKOUT
+echo '/boost/tti/*' >> $FILES_TO_CHECKOUT
+echo '/boost/utility/*' >> $FILES_TO_CHECKOUT
+echo '/boost/lexical_cast/*' >> $FILES_TO_CHECKOUT
+echo '/boost/optional/*' >> $FILES_TO_CHECKOUT
+echo '/boost/property_map/*' >> $FILES_TO_CHECKOUT
+echo '/boost/pending/*' >> $FILES_TO_CHECKOUT
+echo '/boost/multi_array/*' >> $FILES_TO_CHECKOUT
+echo '/boost/tuple/*' >> $FILES_TO_CHECKOUT
+echo '/boost/icl/*' >> $FILES_TO_CHECKOUT
+echo '/boost/unordered/*' >> $FILES_TO_CHECKOUT
+echo '/boost/typeof/*' >> $FILES_TO_CHECKOUT
+echo '/boost/parameter/*' >> $FILES_TO_CHECKOUT
+echo '/boost/mp11/*' >> $FILES_TO_CHECKOUT
+echo '/boost/archive/*' >> $FILES_TO_CHECKOUT
+echo '/boost/function_types/*' >> $FILES_TO_CHECKOUT
+echo '/boost/serialization/*' >> $FILES_TO_CHECKOUT
+echo '/boost/fusion/*' >> $FILES_TO_CHECKOUT
+echo '/boost/variant/*' >> $FILES_TO_CHECKOUT
+echo '/boost/format/*' >> $FILES_TO_CHECKOUT
+echo '/boost/locale/*' >> $FILES_TO_CHECKOUT
+echo '/boost/random/*' >> $FILES_TO_CHECKOUT
+echo '/boost/spirit/*' >> $FILES_TO_CHECKOUT
+echo '/boost/uuid/*' >> $FILES_TO_CHECKOUT
+echo '/boost/xpressive/*' >> $FILES_TO_CHECKOUT
+echo '/boost/asio/*' >> $FILES_TO_CHECKOUT
+echo '/boost/circular_buffer/*' >> $FILES_TO_CHECKOUT
+echo '/boost/proto/*' >> $FILES_TO_CHECKOUT
+echo '/boost/qvm/*' >> $FILES_TO_CHECKOUT
+echo '/boost/property_tree/*' >> $FILES_TO_CHECKOUT
+echo '/libs/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
\ No newline at end of file
diff --git a/contrib/sparse-checkout/update-boringssl.sh b/contrib/sparse-checkout/update-boringssl.sh
index dee19e3ca97..f877a78afed 100755
--- a/contrib/sparse-checkout/update-boringssl.sh
+++ b/contrib/sparse-checkout/update-boringssl.sh
@@ -5,6 +5,9 @@ echo "Using sparse checkout for boringsll"
 FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
 echo '/*' > $FILES_TO_CHECKOUT
 echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
+echo '!/crypto/cipher_extra/test/*' >> $FILES_TO_CHECKOUT
+echo '!/third_party/wycheproof_testvectors/*' >> $FILES_TO_CHECKOUT
+echo '!/third_party/googletest/*' >> $FILES_TO_CHECKOUT
 
 git config core.sparsecheckout true
 git checkout $1
diff --git a/contrib/sparse-checkout/update-brotli.sh b/contrib/sparse-checkout/update-brotli.sh
new file mode 100755
index 00000000000..8784f5e4125
--- /dev/null
+++ b/contrib/sparse-checkout/update-brotli.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for brotli"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/c/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-grpc.sh b/contrib/sparse-checkout/update-grpc.sh
index 18045e6ec90..38934fdbc1b 100755
--- a/contrib/sparse-checkout/update-grpc.sh
+++ b/contrib/sparse-checkout/update-grpc.sh
@@ -4,12 +4,14 @@ echo "Using sparse checkout for grpc"
 
 FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
 echo '/*' > $FILES_TO_CHECKOUT
-# FIXME for some reason we need it for build
-# echo '!/test/*' >> $FILES_TO_CHECKOUT
-# echo '!/tools/*' >> $FILES_TO_CHECKOUT
+echo '!/test/*' >> $FILES_TO_CHECKOUT
+echo '/test/build/*' >> $FILES_TO_CHECKOUT
+echo '!/tools/*' >> $FILES_TO_CHECKOUT
+echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT
 echo '!/examples/*' >> $FILES_TO_CHECKOUT
 echo '!/doc/*' >> $FILES_TO_CHECKOUT
-# echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
+# FIXME why do we need csharp?
+#echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
 echo '!/src/python/*' >> $FILES_TO_CHECKOUT
 echo '!/src/objective-c/*' >> $FILES_TO_CHECKOUT
 echo '!/src/php/*' >> $FILES_TO_CHECKOUT
diff --git a/contrib/sparse-checkout/update-icu.sh b/contrib/sparse-checkout/update-icu.sh
new file mode 100755
index 00000000000..76af39f07a4
--- /dev/null
+++ b/contrib/sparse-checkout/update-icu.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for icu"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/icu4c/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
\ No newline at end of file
diff --git a/contrib/sparse-checkout/update-libxml2.sh b/contrib/sparse-checkout/update-libxml2.sh
new file mode 100755
index 00000000000..24faf11eec9
--- /dev/null
+++ b/contrib/sparse-checkout/update-libxml2.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+echo "Using sparse checkout for libxml2"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/result/*' >> $FILES_TO_CHECKOUT
+echo '!/test/*' >> $FILES_TO_CHECKOUT
+echo '!/doc/*' >> $FILES_TO_CHECKOUT
+echo '!/os400/*' >> $FILES_TO_CHECKOUT
+echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
+echo '!/python/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-llvm-project.sh b/contrib/sparse-checkout/update-llvm-project.sh
index 43c20bd8bfe..53c3b691d3a 100755
--- a/contrib/sparse-checkout/update-llvm-project.sh
+++ b/contrib/sparse-checkout/update-llvm-project.sh
@@ -4,19 +4,23 @@ echo "Using sparse checkout for llvm-project"
 
 FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
 echo '/*' > $FILES_TO_CHECKOUT
-echo '!/llvm/test/*' >> $FILES_TO_CHECKOUT
-echo '!/llvm/docs/*' >> $FILES_TO_CHECKOUT
-echo '!/llvm/unittests/*' >> $FILES_TO_CHECKOUT
-echo '!/llvm/tools/*' >> $FILES_TO_CHECKOUT
-echo '!/clang/*' >> $FILES_TO_CHECKOUT
-echo '!/clang-tools-extra/*' >> $FILES_TO_CHECKOUT
-echo '!/lldb/*' >> $FILES_TO_CHECKOUT
-echo '!/mlir/*' >> $FILES_TO_CHECKOUT
-echo '!/polly/*' >> $FILES_TO_CHECKOUT
-echo '!/lld/*' >> $FILES_TO_CHECKOUT
-echo '!/flang/*' >> $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/*' >> $FILES_TO_CHECKOUT
+echo '!/llvm/*/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/cmake/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/projects/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/include/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/lib/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/utils/TableGen/*' >> $FILES_TO_CHECKOUT
+echo '/libcxxabi/*' >> $FILES_TO_CHECKOUT
+echo '!/libcxxabi/test/*' >> $FILES_TO_CHECKOUT
+echo '/libcxx/*' >> $FILES_TO_CHECKOUT
 echo '!/libcxx/test/*' >> $FILES_TO_CHECKOUT
+echo '/libunwind/*' >> $FILES_TO_CHECKOUT
+echo '!/libunwind/test/*' >> $FILES_TO_CHECKOUT
+echo '/compiler-rt/*' >> $FILES_TO_CHECKOUT
 echo '!/compiler-rt/test/*' >> $FILES_TO_CHECKOUT
+echo '/cmake/*' >> $FILES_TO_CHECKOUT
 
 git config core.sparsecheckout true
 git checkout $1
diff --git a/contrib/sparse-checkout/update-openssl.sh b/contrib/sparse-checkout/update-openssl.sh
index 5528f0e70ba..33e19f43cb7 100755
--- a/contrib/sparse-checkout/update-openssl.sh
+++ b/contrib/sparse-checkout/update-openssl.sh
@@ -1,11 +1,14 @@
 #!/bin/sh
 
-echo "Using sparse checkout for opensll"
+echo "Using sparse checkout for openssl"
 
 FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
 echo '/*' > $FILES_TO_CHECKOUT
 echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
 echo '!/test/*' >> $FILES_TO_CHECKOUT
+echo '!/doc/*' >> $FILES_TO_CHECKOUT
+echo '!/providers/*' >> $FILES_TO_CHECKOUT
+echo '!/apps/*' >> $FILES_TO_CHECKOUT
 
 git config core.sparsecheckout true
 git checkout $1
diff --git a/contrib/sparse-checkout/update-protobuf.sh b/contrib/sparse-checkout/update-protobuf.sh
new file mode 100755
index 00000000000..31c037c2cf5
--- /dev/null
+++ b/contrib/sparse-checkout/update-protobuf.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+echo "Using sparse checkout for protobuf"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '!/*' > $FILES_TO_CHECKOUT
+echo '/*/*' >> $FILES_TO_CHECKOUT
+echo '/src/*' >> $FILES_TO_CHECKOUT
+echo '/cmake/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD

From 409c8e3f547e208563dbb824c080ed226c2eab4f Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 31 Mar 2023 22:35:28 +0200
Subject: [PATCH 050/277] fix

---
 .github/workflows/pull_request.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 291c0fac05a..84a0d6ef0eb 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -552,9 +552,9 @@ jobs:
           submodules: true
       - name: Apply sparse checkout for contrib # in order to check that it doesn't break build
         run: |
-          rm -rf "$GITHUB_WORKSPACE/contrib"
-          git -c "$GITHUB_WORKSPACE" checkout .
-          "$GITHUB_WORKSPACE/contrib/update-submodules.sh"
+          rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
+          git -C "$GITHUB_WORKSPACE" checkout .  && echo 'restored'
+          "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
       - name: Build
         run: |
           sudo rm -fr "$TEMP_PATH"

From 2f280e247a23b54c72c0be39b31b50cf9e1fb219 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Fri, 31 Mar 2023 21:37:18 +0000
Subject: [PATCH 051/277] review suggestions

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 39 ++++++++++--------
 src/Interpreters/Context.cpp              | 48 ++++++++++++-----------
 2 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 30ef28abe9c..2bc9d4f9472 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -6079,6 +6079,18 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
     scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data));
 }
 
+static bool findIdentifier(const FunctionNode & function)
+{
+    for (const auto & argument : function.getArguments())
+    {
+        if (argument->as<IdentifierNode>())
+            return true;
+        if (const auto * f = argument->as<FunctionNode>(); f && findIdentifier(*f))
+            return true;
+    }
+    return false;
+}
+
 /// Resolve table function node in scope
 void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
     IdentifierResolveScope & scope,
@@ -6123,22 +6135,6 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
 
             bool use_columns_from_insert_query = true;
 
-            auto find_identifier = [](const FunctionNode & function) -> bool
-            {
-                auto find_identifier_impl = [](const FunctionNode & function, auto && self) -> bool
-                {
-                    for (const auto & argument : function.getArguments())
-                    {
-                        if (argument->as<IdentifierNode>())
-                            return true;
-                        if (const auto * f = argument->as<FunctionNode>(); f && self(*f, self))
-                            return true;
-                    }
-                    return false;
-                };
-                return find_identifier_impl(function, find_identifier_impl);
-            };
-
             /// Insert table matches columns against SELECT expression by position, so we want to map
             /// insert table columns to table function columns through names from SELECT expression.
 
@@ -6149,6 +6145,8 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
             const auto & expression_list = scope.scope_node->as<QueryNode &>().getProjection();
             auto expression = expression_list.begin();
 
+            /// We want to go through SELECT expression list and correspond each expression to column in insert table
+            /// which type will be used as a hint for the file structure inference.
             for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
             {
                 if (auto * identifier_node = (*expression)->as<IdentifierNode>())
@@ -6168,6 +6166,8 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                         structure_hint.add({ identifier_node->getIdentifier().getFullName(), insert_column->type });
                     }
 
+                    /// Once we hit asterisk we want to find end of the range covered by asterisk
+                    /// contributing every further SELECT expression to the tail of insert structure
                     if (asterisk)
                         --insert_structure_end;
                     else
@@ -6196,12 +6196,14 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                 }
                 else if (auto * function = (*expression)->as<FunctionNode>())
                 {
-                    if (use_structure_from_insertion_table_in_table_functions == 2 && find_identifier(*function))
+                    if (use_structure_from_insertion_table_in_table_functions == 2 && findIdentifier(*function))
                     {
                         use_columns_from_insert_query = false;
                         break;
                     }
 
+                    /// Once we hit asterisk we want to find end of the range covered by asterisk
+                    /// contributing every further SELECT expression to the tail of insert structure
                     if (asterisk)
                         --insert_structure_end;
                     else
@@ -6209,6 +6211,8 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                 }
                 else
                 {
+                    /// Once we hit asterisk we want to find end of the range covered by asterisk
+                    /// contributing every further SELECT expression to the tail of insert structure
                     if (asterisk)
                         --insert_structure_end;
                     else
@@ -6220,6 +6224,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
             {
                 if (expression == expression_list.end())
                 {
+                    /// Append tail of insert structure to the hint
                     if (asterisk)
                     {
                         for (; insert_column != insert_structure_end; ++insert_column)
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 61953e7ac0d..6de728088ea 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1349,6 +1349,22 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String
     }
 }
 
+static bool findIdentifier(const ASTFunction * function)
+{
+    if (!function || !function->arguments)
+        return false;
+    if (const auto * arguments = function->arguments->as<ASTExpressionList>())
+    {
+        for (const auto & argument : arguments->children)
+        {
+            if (argument->as<ASTIdentifier>())
+                return true;
+            if (const auto * f = argument->as<ASTFunction>(); f && findIdentifier(f))
+                return true;
+        }
+    }
+    return false;
+}
 
 StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const ASTSelectQuery * select_query_hint)
 {
@@ -1404,27 +1420,6 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
 
             bool use_columns_from_insert_query = true;
 
-            auto find_identifier = [](const ASTFunction * function) -> bool
-            {
-                auto find_identifier_impl = [](const ASTFunction * function, auto && self) -> bool
-                {
-                    if (!function || !function->arguments)
-                        return false;
-                    if (const auto * arguments = function->arguments->as<ASTExpressionList>())
-                    {
-                        for (const auto & argument : arguments->children)
-                        {
-                            if (argument->as<ASTIdentifier>())
-                                return true;
-                            if (const auto * f = argument->as<ASTFunction>(); f && self(f, self))
-                                return true;
-                        }
-                    }
-                    return false;
-                };
-                return find_identifier_impl(function, find_identifier_impl);
-            };
-
             /// Insert table matches columns against SELECT expression by position, so we want to map
             /// insert table columns to table function columns through names from SELECT expression.
 
@@ -1435,6 +1430,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             const auto & expression_list = select_query_hint->select()->as<ASTExpressionList>()->children;
             const auto * expression = expression_list.begin();
 
+            /// We want to go through SELECT expression list and correspond each expression to column in insert table
+            /// which type will be used as a hint for the file structure inference.
             for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
             {
                 if (auto * identifier = (*expression)->as<ASTIdentifier>())
@@ -1453,6 +1450,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                         structure_hint.add({ identifier->name(), insert_column->type });
                     }
 
+                    /// Once we hit asterisk we want to find end of the range covered by asterisk
+                    /// contributing every further SELECT expression to the tail of insert structure
                     if (asterisk)
                         --insert_structure_end;
                     else
@@ -1481,12 +1480,14 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                 }
                 else if (auto * function = (*expression)->as<ASTFunction>())
                 {
-                    if (use_structure_from_insertion_table_in_table_functions == 2 && find_identifier(function))
+                    if (use_structure_from_insertion_table_in_table_functions == 2 && findIdentifier(function))
                     {
                         use_columns_from_insert_query = false;
                         break;
                     }
 
+                    /// Once we hit asterisk we want to find end of the range covered by asterisk
+                    /// contributing every further SELECT expression to the tail of insert structure
                     if (asterisk)
                         --insert_structure_end;
                     else
@@ -1494,6 +1495,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                 }
                 else
                 {
+                    /// Once we hit asterisk we want to find end of the range covered by asterisk
+                    /// contributing every further SELECT expression to the tail of insert structure
                     if (asterisk)
                         --insert_structure_end;
                     else
@@ -1505,6 +1508,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
             {
                 if (expression == expression_list.end())
                 {
+                    /// Append tail of insert structure to the hint
                     if (asterisk)
                     {
                         for (; insert_column != insert_structure_end; ++insert_column)

From 043729e1ebb0cc6e61aa9437b815526515c347a9 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Sat, 1 Apr 2023 02:18:39 +0300
Subject: [PATCH 052/277] Update pull_request.yml

---
 .github/workflows/pull_request.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 84a0d6ef0eb..6fccc0542b7 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -555,6 +555,8 @@ jobs:
           rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
           git -C "$GITHUB_WORKSPACE" checkout .  && echo 'restored'
           "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
+          du -hs "$GITHUB_WORKSPACE/contrib" ||:
+          find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
       - name: Build
         run: |
           sudo rm -fr "$TEMP_PATH"

From 686b284e6c1b48d1ad6e23df0d0a7f20c409dda6 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Sat, 1 Apr 2023 02:00:45 +0200
Subject: [PATCH 053/277] fix

---
 contrib/sparse-checkout/update-orc.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/contrib/sparse-checkout/update-orc.sh b/contrib/sparse-checkout/update-orc.sh
index 963db8f306c..57ab57a8d52 100755
--- a/contrib/sparse-checkout/update-orc.sh
+++ b/contrib/sparse-checkout/update-orc.sh
@@ -6,6 +6,7 @@ FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
 echo '/*' > $FILES_TO_CHECKOUT
 echo '!/*/*' >> $FILES_TO_CHECKOUT
 echo '/c++/*' >> $FILES_TO_CHECKOUT
+echo '/proto/*' >> $FILES_TO_CHECKOUT
 
 git config core.sparsecheckout true
 git checkout $1

From 6ff728fb4dea73c759eedf0c76ba88af95488c40 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sat, 1 Apr 2023 11:45:25 +0200
Subject: [PATCH 054/277] Fix cpu usage in rabbitmq

---
 .../table-engines/integrations/rabbitmq.md    |  1 +
 src/Storages/RabbitMQ/RabbitMQConsumer.h      |  2 +-
 src/Storages/RabbitMQ/RabbitMQSettings.h      |  1 +
 src/Storages/RabbitMQ/RabbitMQSource.cpp      | 38 ++++++++++++++++++-
 src/Storages/RabbitMQ/RabbitMQSource.h        | 10 +++--
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp     | 28 +++++++-------
 6 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md
index eec8691a165..405bfe95068 100644
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@@ -69,6 +69,7 @@ Optional parameters:
 - `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). Default: `0`.
 - `rabbitmq_max_block_size` - Number of row collected before flushing data from RabbitMQ. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size).
 - `rabbitmq_flush_interval_ms` - Timeout for flushing data from RabbitMQ. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms).
+- `rabbitmq_empty_queue_sleep_before_flush_timeout_ms` - Time of sleep on empty queue when waiting for `rabbitmq_flush_interval_ms` or `rabbitmq_max_block_size` to be satisfied (by default: `200`).
 - `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue.
 - `rabbitmq_address` - Address for connection. Use ether this setting or `rabbitmq_host_port`.
 - `rabbitmq_vhost` - RabbitMQ vhost. Default: `'\'`.
diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h
index b5f51aba294..b93407f0015 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.h
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h
@@ -68,7 +68,7 @@ public:
     bool ackMessages();
     void updateAckTracker(AckTracker record = AckTracker());
 
-    bool hasPendingMessages() { return received.empty(); }
+    bool hasPendingMessages() { return !received.empty(); }
 
     auto getChannelID() const { return current.track.channel_id; }
     auto getDeliveryTag() const { return current.track.delivery_tag; }
diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h
index 58d80c6ec9d..dd1b5758a2f 100644
--- a/src/Storages/RabbitMQ/RabbitMQSettings.h
+++ b/src/Storages/RabbitMQ/RabbitMQSettings.h
@@ -30,6 +30,7 @@ namespace DB
     M(UInt64, rabbitmq_empty_queue_backoff_start_ms, 10, "A minimum backoff point to reschedule read if the rabbitmq queue is empty", 0) \
     M(UInt64, rabbitmq_empty_queue_backoff_end_ms, 10000, "A maximum backoff point to reschedule read if the rabbitmq queue is empty", 0) \
     M(UInt64, rabbitmq_empty_queue_backoff_step_ms, 100, "A backoff step to reschedule read if the rabbitmq queue is empty", 0) \
+    M(UInt64, rabbitmq_empty_queue_sleep_before_flush_timeout_ms, 200, "Time of sleep on empty queue when waiting for `rabbitmq_flush_interval_ms` or `rabbitmq_max_block_size` to be satisfied", 0) \
     M(Bool, rabbitmq_queue_consume, false, "Use user-defined queues and do not make any RabbitMQ setup: declaring exchanges, queues, bindings", 0) \
     M(String, rabbitmq_username, "", "RabbitMQ username", 0) \
     M(String, rabbitmq_password, "", "RabbitMQ password", 0) \
diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp
index c11a518b338..677c1502634 100644
--- a/src/Storages/RabbitMQ/RabbitMQSource.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp
@@ -6,6 +6,7 @@
 #include <Storages/RabbitMQ/RabbitMQConsumer.h>
 #include <Common/logger_useful.h>
 #include <IO/EmptyReadBuffer.h>
+#include <base/sleep.h>
 
 namespace DB
 {
@@ -34,6 +35,8 @@ RabbitMQSource::RabbitMQSource(
     ContextPtr context_,
     const Names & columns,
     size_t max_block_size_,
+    UInt64 max_execution_time_,
+    UInt64 empty_queue_sleep_before_flush_timeout_ms_,
     bool ack_in_suffix_)
     : RabbitMQSource(
         storage_,
@@ -42,6 +45,8 @@ RabbitMQSource::RabbitMQSource(
         context_,
         columns,
         max_block_size_,
+        max_execution_time_,
+        empty_queue_sleep_before_flush_timeout_ms_,
         ack_in_suffix_)
 {
 }
@@ -53,6 +58,8 @@ RabbitMQSource::RabbitMQSource(
     ContextPtr context_,
     const Names & columns,
     size_t max_block_size_,
+    UInt64 max_execution_time_,
+    UInt64 empty_queue_sleep_before_flush_timeout_ms_,
     bool ack_in_suffix_)
     : ISource(getSampleBlock(headers.first, headers.second))
     , storage(storage_)
@@ -64,6 +71,8 @@ RabbitMQSource::RabbitMQSource(
     , non_virtual_header(std::move(headers.first))
     , virtual_header(std::move(headers.second))
     , log(&Poco::Logger::get("RabbitMQSource"))
+    , max_execution_time_ms(max_execution_time_)
+    , empty_queue_sleep_before_flush_timeout_ms(empty_queue_sleep_before_flush_timeout_ms_)
 {
     storage.incrementReader();
 }
@@ -120,6 +129,21 @@ bool RabbitMQSource::isTimeLimitExceeded() const
     return false;
 }
 
+UInt64 RabbitMQSource::getSingleIterationWaitOnEmptyQueue() const
+{
+    if (max_execution_time_ms != 0)
+    {
+        uint64_t elapsed_time_ms = total_stopwatch.elapsedMilliseconds();
+        if (elapsed_time_ms >= max_execution_time_ms)
+            return 0;
+
+        return std::min(
+            empty_queue_sleep_before_flush_timeout_ms,
+            max_execution_time_ms - elapsed_time_ms);
+    }
+    return empty_queue_sleep_before_flush_timeout_ms;
+}
+
 Chunk RabbitMQSource::generateImpl()
 {
     if (!consumer)
@@ -147,7 +171,7 @@ Chunk RabbitMQSource::generateImpl()
     {
         size_t new_rows = 0;
 
-        if (!consumer->hasPendingMessages())
+        if (consumer->hasPendingMessages())
         {
             if (auto buf = consumer->consume())
                 new_rows = executor.execute(*buf);
@@ -176,9 +200,21 @@ Chunk RabbitMQSource::generateImpl()
 
             total_rows += new_rows;
         }
+        else if (total_rows == 0)
+        {
+            break;
+        }
 
         if (total_rows >= max_block_size || consumer->isConsumerStopped() || isTimeLimitExceeded())
+        {
             break;
+        }
+        else if (new_rows == 0)
+        {
+            const auto sleep = getSingleIterationWaitOnEmptyQueue();
+            if (sleep)
+                sleepForMilliseconds(sleep);
+        }
     }
 
     LOG_TEST(
diff --git a/src/Storages/RabbitMQ/RabbitMQSource.h b/src/Storages/RabbitMQ/RabbitMQSource.h
index 6d06927fc79..b3043b3c256 100644
--- a/src/Storages/RabbitMQ/RabbitMQSource.h
+++ b/src/Storages/RabbitMQ/RabbitMQSource.h
@@ -18,6 +18,8 @@ public:
             ContextPtr context_,
             const Names & columns,
             size_t max_block_size_,
+            UInt64 max_execution_time_,
+            UInt64 empty_queue_sleep_before_flush_timeout_ms_,
             bool ack_in_suffix = false);
 
     ~RabbitMQSource() override;
@@ -27,13 +29,11 @@ public:
 
     Chunk generate() override;
 
-    bool queueEmpty() const { return !consumer || consumer->hasPendingMessages(); }
+    bool hasPendingMessages() const { return consumer && consumer->hasPendingMessages(); }
     bool needChannelUpdate();
     void updateChannel();
     bool sendAck();
 
-    void setTimeLimit(uint64_t max_execution_time_ms_) { max_execution_time_ms = max_execution_time_ms_; }
-
 private:
     StorageRabbitMQ & storage;
     StorageSnapshotPtr storage_snapshot;
@@ -50,9 +50,11 @@ private:
     RabbitMQConsumerPtr consumer;
 
     uint64_t max_execution_time_ms = 0;
+    uint64_t empty_queue_sleep_before_flush_timeout_ms = 0;
     Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE};
 
     bool isTimeLimitExceeded() const;
+    UInt64 getSingleIterationWaitOnEmptyQueue() const;
 
     RabbitMQSource(
         StorageRabbitMQ & storage_,
@@ -61,6 +63,8 @@ private:
         ContextPtr context_,
         const Names & columns,
         size_t max_block_size_,
+        UInt64 max_execution_time_,
+        UInt64 empty_queue_sleep_before_flush_timeout_ms_,
         bool ack_in_suffix);
 
     Chunk generateImpl();
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index c17b7d829d9..4c7aaec6738 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -711,16 +711,15 @@ void StorageRabbitMQ::read(
     Pipes pipes;
     pipes.reserve(num_created_consumers);
 
+    uint64_t max_execution_time_ms = rabbitmq_settings->rabbitmq_flush_interval_ms.changed
+        ? rabbitmq_settings->rabbitmq_flush_interval_ms
+        : (static_cast<UInt64>(getContext()->getSettingsRef().stream_flush_interval_ms) * 1000);
+
     for (size_t i = 0; i < num_created_consumers; ++i)
     {
         auto rabbit_source = std::make_shared<RabbitMQSource>(
-            *this, storage_snapshot, modified_context, column_names, 1, rabbitmq_settings->rabbitmq_commit_on_select);
-
-        uint64_t max_execution_time_ms = rabbitmq_settings->rabbitmq_flush_interval_ms.changed
-                                          ? rabbitmq_settings->rabbitmq_flush_interval_ms
-                                          : (static_cast<UInt64>(getContext()->getSettingsRef().stream_flush_interval_ms) * 1000);
-
-        rabbit_source->setTimeLimit(max_execution_time_ms);
+            *this, storage_snapshot, modified_context, column_names, 1, max_execution_time_ms,
+            rabbitmq_settings->rabbitmq_empty_queue_sleep_before_flush_timeout_ms, rabbitmq_settings->rabbitmq_commit_on_select);
 
         auto converting_dag = ActionsDAG::makeConvertingActions(
             rabbit_source->getPort().getHeader().getColumnsWithTypeAndName(),
@@ -1082,16 +1081,15 @@ bool StorageRabbitMQ::tryStreamToViews()
     sources.reserve(num_created_consumers);
     pipes.reserve(num_created_consumers);
 
+    uint64_t max_execution_time_ms = rabbitmq_settings->rabbitmq_flush_interval_ms.changed
+        ? rabbitmq_settings->rabbitmq_flush_interval_ms
+        : (static_cast<UInt64>(getContext()->getSettingsRef().stream_flush_interval_ms) * 1000);
+
     for (size_t i = 0; i < num_created_consumers; ++i)
     {
         auto source = std::make_shared<RabbitMQSource>(
-            *this, storage_snapshot, rabbitmq_context, column_names, block_size, false);
-
-        uint64_t max_execution_time_ms = rabbitmq_settings->rabbitmq_flush_interval_ms.changed
-                                          ? rabbitmq_settings->rabbitmq_flush_interval_ms
-                                          : (static_cast<UInt64>(getContext()->getSettingsRef().stream_flush_interval_ms) * 1000);
-
-        source->setTimeLimit(max_execution_time_ms);
+            *this, storage_snapshot, rabbitmq_context, column_names, block_size, max_execution_time_ms,
+            rabbitmq_settings->rabbitmq_empty_queue_sleep_before_flush_timeout_ms, false);
 
         sources.emplace_back(source);
         pipes.emplace_back(source);
@@ -1142,7 +1140,7 @@ bool StorageRabbitMQ::tryStreamToViews()
         /// Commit
         for (auto & source : sources)
         {
-            if (source->queueEmpty())
+            if (!source->hasPendingMessages())
                 ++queue_empty;
 
             if (source->needChannelUpdate())

From 8cfd5443284464b2702e308c49fe5e52abd96c6f Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sat, 1 Apr 2023 18:41:42 +0200
Subject: [PATCH 055/277] Better

---
 .../table-engines/integrations/rabbitmq.md    |  1 -
 src/Storages/RabbitMQ/RabbitMQConsumer.cpp    |  3 ++
 src/Storages/RabbitMQ/RabbitMQConsumer.h      | 12 +++++
 src/Storages/RabbitMQ/RabbitMQSettings.h      |  1 -
 src/Storages/RabbitMQ/RabbitMQSource.cpp      | 49 ++++++-------------
 src/Storages/RabbitMQ/RabbitMQSource.h        |  6 ---
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp     |  7 ++-
 7 files changed, 33 insertions(+), 46 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md
index 405bfe95068..eec8691a165 100644
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@@ -69,7 +69,6 @@ Optional parameters:
 - `rabbitmq_skip_broken_messages` – RabbitMQ message parser tolerance to schema-incompatible messages per block. If `rabbitmq_skip_broken_messages = N` then the engine skips *N* RabbitMQ messages that cannot be parsed (a message equals a row of data). Default: `0`.
 - `rabbitmq_max_block_size` - Number of row collected before flushing data from RabbitMQ. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size).
 - `rabbitmq_flush_interval_ms` - Timeout for flushing data from RabbitMQ. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms).
-- `rabbitmq_empty_queue_sleep_before_flush_timeout_ms` - Time of sleep on empty queue when waiting for `rabbitmq_flush_interval_ms` or `rabbitmq_max_block_size` to be satisfied (by default: `200`).
 - `rabbitmq_queue_settings_list` - allows to set RabbitMQ settings when creating a queue. Available settings: `x-max-length`, `x-max-length-bytes`, `x-message-ttl`, `x-expires`, `x-priority`, `x-max-priority`, `x-overflow`, `x-dead-letter-exchange`, `x-queue-type`. The `durable` setting is enabled automatically for the queue.
 - `rabbitmq_address` - Address for connection. Use ether this setting or `rabbitmq_host_port`.
 - `rabbitmq_vhost` - RabbitMQ vhost. Default: `'\'`.
diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
index 9b66b9b1d7c..41b4cb4a1c8 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
@@ -65,6 +65,9 @@ void RabbitMQConsumer::subscribe()
                         message.hasTimestamp() ? message.timestamp() : 0,
                         redelivered, AckTracker(delivery_tag, channel_id)}))
                     throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to received queue");
+
+                std::unique_lock lock(mutex);
+                cv.notify_one();
             }
         })
         .onError([&](const char * message)
diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h
index b93407f0015..a5e7131b68c 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.h
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h
@@ -20,6 +20,7 @@ namespace DB
 
 class RabbitMQHandler;
 using ChannelPtr = std::unique_ptr<AMQP::TcpChannel>;
+static constexpr auto SANITY_TIMEOUT = 1000 * 60 * 10; /// 10min.
 
 class RabbitMQConsumer
 {
@@ -76,6 +77,14 @@ public:
     auto getMessageID() const { return current.message_id; }
     auto getTimestamp() const { return current.timestamp; }
 
+    void waitForMessages(std::optional<uint64_t> timeout_ms = std::nullopt)
+    {
+        std::unique_lock lock(mutex);
+        if (!timeout_ms)
+            timeout_ms = SANITY_TIMEOUT;
+        cv.wait_for(lock, std::chrono::milliseconds(*timeout_ms), [this]{ return !received.empty(); });
+    }
+
 private:
     void subscribe();
     void iterateEventLoop();
@@ -96,6 +105,9 @@ private:
 
     AckTracker last_inserted_record_info;
     UInt64 prev_tag = 0, channel_id_counter = 0;
+
+    std::condition_variable cv;
+    std::mutex mutex;
 };
 
 }
diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h
index dd1b5758a2f..58d80c6ec9d 100644
--- a/src/Storages/RabbitMQ/RabbitMQSettings.h
+++ b/src/Storages/RabbitMQ/RabbitMQSettings.h
@@ -30,7 +30,6 @@ namespace DB
     M(UInt64, rabbitmq_empty_queue_backoff_start_ms, 10, "A minimum backoff point to reschedule read if the rabbitmq queue is empty", 0) \
     M(UInt64, rabbitmq_empty_queue_backoff_end_ms, 10000, "A maximum backoff point to reschedule read if the rabbitmq queue is empty", 0) \
     M(UInt64, rabbitmq_empty_queue_backoff_step_ms, 100, "A backoff step to reschedule read if the rabbitmq queue is empty", 0) \
-    M(UInt64, rabbitmq_empty_queue_sleep_before_flush_timeout_ms, 200, "Time of sleep on empty queue when waiting for `rabbitmq_flush_interval_ms` or `rabbitmq_max_block_size` to be satisfied", 0) \
     M(Bool, rabbitmq_queue_consume, false, "Use user-defined queues and do not make any RabbitMQ setup: declaring exchanges, queues, bindings", 0) \
     M(String, rabbitmq_username, "", "RabbitMQ username", 0) \
     M(String, rabbitmq_password, "", "RabbitMQ password", 0) \
diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp
index 677c1502634..11903c0419a 100644
--- a/src/Storages/RabbitMQ/RabbitMQSource.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp
@@ -36,7 +36,6 @@ RabbitMQSource::RabbitMQSource(
     const Names & columns,
     size_t max_block_size_,
     UInt64 max_execution_time_,
-    UInt64 empty_queue_sleep_before_flush_timeout_ms_,
     bool ack_in_suffix_)
     : RabbitMQSource(
         storage_,
@@ -46,7 +45,6 @@ RabbitMQSource::RabbitMQSource(
         columns,
         max_block_size_,
         max_execution_time_,
-        empty_queue_sleep_before_flush_timeout_ms_,
         ack_in_suffix_)
 {
 }
@@ -59,7 +57,6 @@ RabbitMQSource::RabbitMQSource(
     const Names & columns,
     size_t max_block_size_,
     UInt64 max_execution_time_,
-    UInt64 empty_queue_sleep_before_flush_timeout_ms_,
     bool ack_in_suffix_)
     : ISource(getSampleBlock(headers.first, headers.second))
     , storage(storage_)
@@ -72,7 +69,6 @@ RabbitMQSource::RabbitMQSource(
     , virtual_header(std::move(headers.second))
     , log(&Poco::Logger::get("RabbitMQSource"))
     , max_execution_time_ms(max_execution_time_)
-    , empty_queue_sleep_before_flush_timeout_ms(empty_queue_sleep_before_flush_timeout_ms_)
 {
     storage.incrementReader();
 }
@@ -118,32 +114,6 @@ Chunk RabbitMQSource::generate()
     return chunk;
 }
 
-bool RabbitMQSource::isTimeLimitExceeded() const
-{
-    if (max_execution_time_ms != 0)
-    {
-        uint64_t elapsed_time_ms = total_stopwatch.elapsedMilliseconds();
-        return max_execution_time_ms <= elapsed_time_ms;
-    }
-
-    return false;
-}
-
-UInt64 RabbitMQSource::getSingleIterationWaitOnEmptyQueue() const
-{
-    if (max_execution_time_ms != 0)
-    {
-        uint64_t elapsed_time_ms = total_stopwatch.elapsedMilliseconds();
-        if (elapsed_time_ms >= max_execution_time_ms)
-            return 0;
-
-        return std::min(
-            empty_queue_sleep_before_flush_timeout_ms,
-            max_execution_time_ms - elapsed_time_ms);
-    }
-    return empty_queue_sleep_before_flush_timeout_ms;
-}
-
 Chunk RabbitMQSource::generateImpl()
 {
     if (!consumer)
@@ -205,15 +175,26 @@ Chunk RabbitMQSource::generateImpl()
             break;
         }
 
-        if (total_rows >= max_block_size || consumer->isConsumerStopped() || isTimeLimitExceeded())
+        bool is_time_limit_exceeded = false;
+        UInt64 remaining_execution_time = 0;
+        if (max_execution_time_ms != 0)
+        {
+            uint64_t elapsed_time_ms = total_stopwatch.elapsedMilliseconds();
+            is_time_limit_exceeded = max_execution_time_ms <= elapsed_time_ms;
+            if (!is_time_limit_exceeded)
+                remaining_execution_time = max_execution_time_ms - elapsed_time_ms;
+        }
+
+        if (total_rows >= max_block_size || consumer->isConsumerStopped() || is_time_limit_exceeded)
         {
             break;
         }
         else if (new_rows == 0)
         {
-            const auto sleep = getSingleIterationWaitOnEmptyQueue();
-            if (sleep)
-                sleepForMilliseconds(sleep);
+            if (remaining_execution_time)
+                consumer->waitForMessages(remaining_execution_time);
+            else
+                consumer->waitForMessages();
         }
     }
 
diff --git a/src/Storages/RabbitMQ/RabbitMQSource.h b/src/Storages/RabbitMQ/RabbitMQSource.h
index b3043b3c256..5d2d8b25630 100644
--- a/src/Storages/RabbitMQ/RabbitMQSource.h
+++ b/src/Storages/RabbitMQ/RabbitMQSource.h
@@ -19,7 +19,6 @@ public:
             const Names & columns,
             size_t max_block_size_,
             UInt64 max_execution_time_,
-            UInt64 empty_queue_sleep_before_flush_timeout_ms_,
             bool ack_in_suffix = false);
 
     ~RabbitMQSource() override;
@@ -50,12 +49,8 @@ private:
     RabbitMQConsumerPtr consumer;
 
     uint64_t max_execution_time_ms = 0;
-    uint64_t empty_queue_sleep_before_flush_timeout_ms = 0;
     Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE};
 
-    bool isTimeLimitExceeded() const;
-    UInt64 getSingleIterationWaitOnEmptyQueue() const;
-
     RabbitMQSource(
         StorageRabbitMQ & storage_,
         const StorageSnapshotPtr & storage_snapshot_,
@@ -64,7 +59,6 @@ private:
         const Names & columns,
         size_t max_block_size_,
         UInt64 max_execution_time_,
-        UInt64 empty_queue_sleep_before_flush_timeout_ms_,
         bool ack_in_suffix);
 
     Chunk generateImpl();
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 4c7aaec6738..85c7b2724fa 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -718,8 +718,8 @@ void StorageRabbitMQ::read(
     for (size_t i = 0; i < num_created_consumers; ++i)
     {
         auto rabbit_source = std::make_shared<RabbitMQSource>(
-            *this, storage_snapshot, modified_context, column_names, 1, max_execution_time_ms,
-            rabbitmq_settings->rabbitmq_empty_queue_sleep_before_flush_timeout_ms, rabbitmq_settings->rabbitmq_commit_on_select);
+            *this, storage_snapshot, modified_context, column_names, 1,
+            max_execution_time_ms, rabbitmq_settings->rabbitmq_commit_on_select);
 
         auto converting_dag = ActionsDAG::makeConvertingActions(
             rabbit_source->getPort().getHeader().getColumnsWithTypeAndName(),
@@ -1088,8 +1088,7 @@ bool StorageRabbitMQ::tryStreamToViews()
     for (size_t i = 0; i < num_created_consumers; ++i)
     {
         auto source = std::make_shared<RabbitMQSource>(
-            *this, storage_snapshot, rabbitmq_context, column_names, block_size, max_execution_time_ms,
-            rabbitmq_settings->rabbitmq_empty_queue_sleep_before_flush_timeout_ms, false);
+            *this, storage_snapshot, rabbitmq_context, column_names, block_size, max_execution_time_ms, false);
 
         sources.emplace_back(source);
         pipes.emplace_back(source);

From ddf2af598d768755313d8071643230ec341ebd19 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sat, 1 Apr 2023 20:52:22 +0200
Subject: [PATCH 056/277] Fix

---
 src/Storages/RabbitMQ/RabbitMQConsumer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
index 41b4cb4a1c8..0213139a469 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
@@ -61,12 +61,13 @@ void RabbitMQConsumer::subscribe()
             {
                 String message_received = std::string(message.body(), message.body() + message.bodySize());
 
+                std::unique_lock lock(mutex);
+
                 if (!received.push({message_received, message.hasMessageID() ? message.messageID() : "",
                         message.hasTimestamp() ? message.timestamp() : 0,
                         redelivered, AckTracker(delivery_tag, channel_id)}))
                     throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not push to received queue");
 
-                std::unique_lock lock(mutex);
                 cv.notify_one();
             }
         })

From 6d75ca9c6fa58692660dd7196d63954de7fde601 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sun, 2 Apr 2023 18:21:45 +0200
Subject: [PATCH 057/277] Fix

---
 src/Storages/RabbitMQ/RabbitMQConsumer.cpp | 1 +
 src/Storages/RabbitMQ/RabbitMQConsumer.h   | 2 +-
 src/Storages/RabbitMQ/RabbitMQSource.cpp   | 2 +-
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp  | 8 +++-----
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
index 0213139a469..596e0f8e223 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
@@ -39,6 +39,7 @@ RabbitMQConsumer::RabbitMQConsumer(
 
 void RabbitMQConsumer::closeChannel()
 {
+    cv.notify_one();
     if (consumer_channel)
         consumer_channel->close();
 }
diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h
index a5e7131b68c..dba32983862 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.h
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h
@@ -82,7 +82,7 @@ public:
         std::unique_lock lock(mutex);
         if (!timeout_ms)
             timeout_ms = SANITY_TIMEOUT;
-        cv.wait_for(lock, std::chrono::milliseconds(*timeout_ms), [this]{ return !received.empty(); });
+        cv.wait_for(lock, std::chrono::milliseconds(*timeout_ms), [this]{ return !received.empty() || isConsumerStopped(); });
     }
 
 private:
diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp
index 11903c0419a..d755dff3202 100644
--- a/src/Storages/RabbitMQ/RabbitMQSource.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp
@@ -177,7 +177,7 @@ Chunk RabbitMQSource::generateImpl()
 
         bool is_time_limit_exceeded = false;
         UInt64 remaining_execution_time = 0;
-        if (max_execution_time_ms != 0)
+        if (max_execution_time_ms)
         {
             uint64_t elapsed_time_ms = total_stopwatch.elapsedMilliseconds();
             is_time_limit_exceeded = max_execution_time_ms <= elapsed_time_ms;
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 85c7b2724fa..83c43bdfc44 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -833,13 +833,11 @@ void StorageRabbitMQ::shutdown()
     /// Just a paranoid try catch, it is not actually needed.
     try
     {
-        if (drop_table)
-        {
-            for (auto & consumer : consumers)
-                consumer->closeChannel();
+        for (auto & consumer : consumers)
+            consumer->closeChannel();
 
+        if (drop_table)
             cleanupRabbitMQ();
-        }
 
         /// It is important to close connection here - before removing consumers, because
         /// it will finish and clean callbacks, which might use those consumers data.

From 35a83289228d758e0984efbf81a0884480c186de Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 3 Apr 2023 12:11:12 +0200
Subject: [PATCH 058/277] Fix

---
 src/Storages/RabbitMQ/RabbitMQConsumer.cpp | 12 +++++++-----
 src/Storages/RabbitMQ/RabbitMQConsumer.h   |  9 ++++-----
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp  | 13 ++++++-------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
index 596e0f8e223..84710ad8777 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
@@ -24,22 +24,24 @@ RabbitMQConsumer::RabbitMQConsumer(
         size_t channel_id_base_,
         const String & channel_base_,
         Poco::Logger * log_,
-        uint32_t queue_size_,
-        const std::atomic<bool> & stopped_)
+        uint32_t queue_size_)
         : event_handler(event_handler_)
         , queues(queues_)
         , channel_base(channel_base_)
         , channel_id_base(channel_id_base_)
         , log(log_)
-        , stopped(stopped_)
         , received(queue_size_)
 {
 }
 
-
-void RabbitMQConsumer::closeChannel()
+void RabbitMQConsumer::shutdown()
 {
+    {
+        std::lock_guard lock(mutex);
+        stopped = true;
+    }
     cv.notify_one();
+
     if (consumer_channel)
         consumer_channel->close();
 }
diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h
index dba32983862..3cf7231a600 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.h
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h
@@ -32,8 +32,7 @@ public:
             size_t channel_id_base_,
             const String & channel_base_,
             Poco::Logger * log_,
-            uint32_t queue_size_,
-            const std::atomic<bool> & stopped_);
+            uint32_t queue_size_);
 
     struct AckTracker
     {
@@ -60,12 +59,12 @@ public:
     ChannelPtr & getChannel() { return consumer_channel; }
     void setupChannel();
     bool needChannelUpdate();
-    void closeChannel();
+    void shutdown();
 
     void updateQueues(std::vector<String> & queues_) { queues = queues_; }
     size_t queuesCount() { return queues.size(); }
 
-    bool isConsumerStopped() { return stopped; }
+    bool isConsumerStopped() const { return stopped; }
     bool ackMessages();
     void updateAckTracker(AckTracker record = AckTracker());
 
@@ -95,7 +94,7 @@ private:
     const String channel_base;
     const size_t channel_id_base;
     Poco::Logger * log;
-    const std::atomic<bool> & stopped;
+    bool stopped;
 
     String channel_id;
     std::atomic<bool> channel_error = true, wait_subscription = false;
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 83c43bdfc44..399cf86e579 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -713,7 +713,7 @@ void StorageRabbitMQ::read(
 
     uint64_t max_execution_time_ms = rabbitmq_settings->rabbitmq_flush_interval_ms.changed
         ? rabbitmq_settings->rabbitmq_flush_interval_ms
-        : (static_cast<UInt64>(getContext()->getSettingsRef().stream_flush_interval_ms) * 1000);
+        : static_cast<UInt64>(Poco::Timespan(getContext()->getSettingsRef().stream_flush_interval_ms).milliseconds());
 
     for (size_t i = 0; i < num_created_consumers; ++i)
     {
@@ -818,6 +818,9 @@ void StorageRabbitMQ::shutdown()
 {
     shutdown_called = true;
 
+    for (auto & consumer : consumers)
+        consumer->shutdown();
+
     LOG_TRACE(log, "Deactivating background tasks");
 
     /// In case it has not yet been able to setup connection;
@@ -833,9 +836,6 @@ void StorageRabbitMQ::shutdown()
     /// Just a paranoid try catch, it is not actually needed.
     try
     {
-        for (auto & consumer : consumers)
-            consumer->closeChannel();
-
         if (drop_table)
             cleanupRabbitMQ();
 
@@ -943,8 +943,7 @@ RabbitMQConsumerPtr StorageRabbitMQ::popConsumer(std::chrono::milliseconds timeo
 RabbitMQConsumerPtr StorageRabbitMQ::createConsumer()
 {
     return std::make_shared<RabbitMQConsumer>(
-        connection->getHandler(), queues, ++consumer_id,
-        unique_strbase, log, queue_size, shutdown_called);
+        connection->getHandler(), queues, ++consumer_id, unique_strbase, log, queue_size);
 }
 
 bool StorageRabbitMQ::hasDependencies(const StorageID & table_id)
@@ -1081,7 +1080,7 @@ bool StorageRabbitMQ::tryStreamToViews()
 
     uint64_t max_execution_time_ms = rabbitmq_settings->rabbitmq_flush_interval_ms.changed
         ? rabbitmq_settings->rabbitmq_flush_interval_ms
-        : (static_cast<UInt64>(getContext()->getSettingsRef().stream_flush_interval_ms) * 1000);
+        : static_cast<UInt64>(Poco::Timespan(getContext()->getSettingsRef().stream_flush_interval_ms).milliseconds());
 
     for (size_t i = 0; i < num_created_consumers; ++i)
     {

From 018df220bf4a2799a8acdf50deadb4bf55cdab19 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 3 Apr 2023 12:18:04 +0200
Subject: [PATCH 059/277] Remove redundant

---
 src/Storages/RabbitMQ/RabbitMQConsumer.cpp | 7 +------
 src/Storages/RabbitMQ/RabbitMQConsumer.h   | 4 ++--
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
index 84710ad8777..835cf82b246 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
@@ -36,10 +36,7 @@ RabbitMQConsumer::RabbitMQConsumer(
 
 void RabbitMQConsumer::shutdown()
 {
-    {
-        std::lock_guard lock(mutex);
-        stopped = true;
-    }
+    stopped = true;
     cv.notify_one();
 
     if (consumer_channel)
@@ -64,8 +61,6 @@ void RabbitMQConsumer::subscribe()
             {
                 String message_received = std::string(message.body(), message.body() + message.bodySize());
 
-                std::unique_lock lock(mutex);
-
                 if (!received.push({message_received, message.hasMessageID() ? message.messageID() : "",
                         message.hasTimestamp() ? message.timestamp() : 0,
                         redelivered, AckTracker(delivery_tag, channel_id)}))
diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h
index 3cf7231a600..89e2b192c35 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.h
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h
@@ -64,7 +64,7 @@ public:
     void updateQueues(std::vector<String> & queues_) { queues = queues_; }
     size_t queuesCount() { return queues.size(); }
 
-    bool isConsumerStopped() const { return stopped; }
+    bool isConsumerStopped() const { return stopped.load(); }
     bool ackMessages();
     void updateAckTracker(AckTracker record = AckTracker());
 
@@ -94,7 +94,7 @@ private:
     const String channel_base;
     const size_t channel_id_base;
     Poco::Logger * log;
-    bool stopped;
+    std::atomic<bool> stopped;
 
     String channel_id;
     std::atomic<bool> channel_error = true, wait_subscription = false;

From bce8eb7468e8dfbd5f3dfe79a251b2a577535a55 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 3 Apr 2023 12:15:59 +0200
Subject: [PATCH 060/277] Updated to use tryGet instead of get for checking
 stage of backups/restores in concurrency check

---
 src/Backups/BackupCoordinationRemote.cpp  |  9 ++++++---
 src/Backups/RestoreCoordinationRemote.cpp | 10 ++++++----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index 9b4343a1d3b..ba20418d2f0 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -662,9 +662,12 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
             if (existing_backup_uuid == toString(backup_uuid))
                 continue;
 
-            const auto status = zk->get(root_zookeeper_path + "/" + existing_backup_path + "/stage");
-            if (status != Stage::COMPLETED)
-                return true;
+            String status;
+            if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status))
+            {
+                if (status != Stage::COMPLETED)
+                    return true;
+            }
         }
 
         zk->createIfNotExists(backup_stage_path, "");
diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp
index 10d085a696a..e17eaa0a72c 100644
--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@@ -194,10 +194,12 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t>
             if (existing_restore_uuid == toString(restore_uuid))
                 continue;
 
-
-            const auto status = zk->get(root_zookeeper_path + "/" + existing_restore_path + "/stage");
-            if (status != Stage::COMPLETED)
-                return true;
+            String status;
+            if (zk->tryGet(root_zookeeper_path + "/" + existing_restore_path + "/stage", status))
+            {
+                if (status != Stage::COMPLETED)
+                    return true;
+            }
         }
 
         zk->createIfNotExists(path, "");

From e3457da144b2f653db2c853b9279374f9f6d8f7d Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 3 Apr 2023 16:05:21 +0000
Subject: [PATCH 061/277] for auto check subset of columns support, fix some
 minor issues

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 14 ++++++++++++--
 src/Interpreters/Context.cpp              |  9 +++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 2bc9d4f9472..09f6d89341d 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -3,7 +3,6 @@
 #include <Common/checkStackSize.h>
 #include <Common/NamePrompter.h>
 #include <Common/ProfileEvents.h>
-#include "Analyzer/Identifier.h"
 
 #include <IO/WriteBuffer.h>
 #include <IO/WriteHelpers.h>
@@ -33,6 +32,7 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 
 #include <TableFunctions/TableFunctionFactory.h>
+#include <Formats/FormatFactory.h>
 
 #include <Databases/IDatabase.h>
 
@@ -76,6 +76,7 @@
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/QueryTreeBuilder.h>
 #include <Analyzer/IQueryTreeNode.h>
+#include <Analyzer/Identifier.h>
 
 namespace ProfileEvents
 {
@@ -6079,7 +6080,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
     scope.table_expression_node_to_data.emplace(table_expression_node, std::move(table_expression_data));
 }
 
-static bool findIdentifier(const FunctionNode & function)
+bool findIdentifier(const FunctionNode & function)
 {
     for (const auto & argument : function.getArguments())
     {
@@ -6220,6 +6221,15 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
                 }
             }
 
+            if (use_structure_from_insertion_table_in_table_functions == 2 && !asterisk)
+            {
+                /// For input function we should check if input format supports reading subset of columns.
+                if (table_function_ptr->getName() == "input")
+                    use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat());
+                else
+                    use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
+            }
+
             if (use_columns_from_insert_query)
             {
                 if (expression == expression_list.end())
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 6de728088ea..37791aa7b08 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1504,6 +1504,15 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
                 }
             }
 
+            if (use_structure_from_insertion_table_in_table_functions == 2 && !asterisk)
+            {
+                /// For input function we should check if input format supports reading subset of columns.
+                if (table_function_ptr->getName() == "input")
+                    use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat());
+                else
+                    use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
+            }
+
             if (use_columns_from_insert_query)
             {
                 if (expression == expression_list.end())

From 2cde63a25ced0c252ff1929be7c242c6b40e3d9f Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 3 Apr 2023 16:25:22 +0000
Subject: [PATCH 062/277] Avoid abort in protobuf library in debug build

---
 src/Formats/ProtobufSchemas.cpp               | 31 ++++++++++++++++---
 .../02705_protobuf_debug_abort.reference      |  1 +
 .../0_stateless/02705_protobuf_debug_abort.sh | 18 +++++++++++
 3 files changed, 45 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02705_protobuf_debug_abort.reference
 create mode 100755 tests/queries/0_stateless/02705_protobuf_debug_abort.sh

diff --git a/src/Formats/ProtobufSchemas.cpp b/src/Formats/ProtobufSchemas.cpp
index efc0a4e694f..048d88ff2d1 100644
--- a/src/Formats/ProtobufSchemas.cpp
+++ b/src/Formats/ProtobufSchemas.cpp
@@ -41,8 +41,19 @@ public:
             return descriptor;
 
         const auto * file_descriptor = importer.Import(schema_path);
-        // If there are parsing errors, AddError() throws an exception and in this case the following line
-        // isn't executed.
+        if (error)
+        {
+            auto info = error.value();
+            error.reset();
+            throw Exception(
+                ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA,
+                "Cannot parse '{}' file, found an error at line {}, column {}, {}",
+                info.filename,
+                std::to_string(info.line),
+                std::to_string(info.column),
+                info.message);
+        }
+
         assert(file_descriptor);
 
         if (with_envelope == WithEnvelope::No)
@@ -74,14 +85,24 @@ private:
     // Overrides google::protobuf::compiler::MultiFileErrorCollector:
     void AddError(const String & filename, int line, int column, const String & message) override
     {
-        throw Exception(ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA,
-                        "Cannot parse '{}' file, found an error at line {}, column {}, {}",
-                        filename, std::to_string(line), std::to_string(column), message);
+        /// Protobuf library code is not exception safe, we should
+        /// remember error and throw in later from our side.
+        error = ErrorInfo{filename, line, column, message};
     }
 
     google::protobuf::compiler::DiskSourceTree disk_source_tree;
     google::protobuf::compiler::Importer importer;
     const WithEnvelope with_envelope;
+
+    struct ErrorInfo
+    {
+        String filename;
+        int line;
+        int column;
+        String message;
+    };
+
+    std::optional<ErrorInfo> error;
 };
 
 
diff --git a/tests/queries/0_stateless/02705_protobuf_debug_abort.reference b/tests/queries/0_stateless/02705_protobuf_debug_abort.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02705_protobuf_debug_abort.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02705_protobuf_debug_abort.sh b/tests/queries/0_stateless/02705_protobuf_debug_abort.sh
new file mode 100755
index 00000000000..4a66cfca352
--- /dev/null
+++ b/tests/queries/0_stateless/02705_protobuf_debug_abort.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+echo 'syntax = "proto3";
+
+message Message {
+    NotExisted x = 1;
+}' > 02705_schema.proto
+
+
+$CLICKHOUSE_LOCAL -q "select * from file(data.bin, Protobuf) settings format_schema='schema:Message'" 2>&1 | grep -c "CANNOT_PARSE_PROTOBUF_SCHEMA"
+
+rm 02705_schema.proto
+

From 972c680b3c044fe7554c29e5914707f35ba83083 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 3 Apr 2023 16:27:09 +0000
Subject: [PATCH 063/277] Fix typo

---
 src/Formats/ProtobufSchemas.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Formats/ProtobufSchemas.cpp b/src/Formats/ProtobufSchemas.cpp
index 048d88ff2d1..86c81e1a3c3 100644
--- a/src/Formats/ProtobufSchemas.cpp
+++ b/src/Formats/ProtobufSchemas.cpp
@@ -86,7 +86,7 @@ private:
     void AddError(const String & filename, int line, int column, const String & message) override
     {
         /// Protobuf library code is not exception safe, we should
-        /// remember error and throw in later from our side.
+        /// remember the error and throw it later from our side.
         error = ErrorInfo{filename, line, column, message};
     }
 

From 61a484f0bd3f181b2c9ac7f7644172f5d0cdb6ee Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 3 Apr 2023 16:44:31 +0000
Subject: [PATCH 064/277] Batch fix for projections analysis with analyzer.

---
 src/Interpreters/ActionsDAG.cpp               | 29 +++++++---
 src/Interpreters/ActionsDAG.h                 |  2 +-
 .../optimizeUseAggregateProjection.cpp        | 55 ++++++++++++++-----
 .../01710_projection_with_joins.sql           |  4 +-
 .../queries/0_stateless/01710_projections.sql |  4 +-
 .../02516_projections_and_context.reference   |  2 +
 .../02516_projections_and_context.sql         |  4 ++
 7 files changed, 73 insertions(+), 27 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index ad809dca022..bf82858c6fc 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -762,11 +762,10 @@ NameSet ActionsDAG::foldActionsByProjection(
 }
 
 
-ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_map<const Node *, std::string> & new_inputs, const NodeRawConstPtrs & required_outputs)
+ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_map<const Node *, const Node *> & new_inputs, const NodeRawConstPtrs & required_outputs)
 {
     auto dag = std::make_unique<ActionsDAG>();
-    std::unordered_map<const Node *, size_t> new_input_to_pos;
-
+    std::unordered_map<const Node *, const Node *> inputs_mapping;
     std::unordered_map<const Node *, const Node *> mapping;
     struct Frame
     {
@@ -796,11 +795,18 @@ ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_map<const
 
                     if (!node)
                     {
-                        bool should_rename = !rename.empty() && new_input->result_name != rename;
-                        const auto & input_name = should_rename ? rename : new_input->result_name;
-                        node = &dag->addInput(input_name, new_input->result_type);
-                        if (should_rename)
-                            node = &dag->addAlias(*node, new_input->result_name);
+                        auto & mapped_input = inputs_mapping[rename];
+
+                        if (!mapped_input)
+                        {
+                            bool should_rename = new_input->result_name != rename->result_name;
+                            const auto & input_name = should_rename ? rename->result_name : new_input->result_name;
+                            mapped_input = &dag->addInput(input_name, new_input->result_type);
+                            if (should_rename)
+                                mapped_input = &dag->addAlias(*mapped_input, new_input->result_name);
+                        }
+
+                        node = mapped_input;
                     }
 
                     stack.pop_back();
@@ -836,7 +842,12 @@ ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_map<const
     }
 
     for (const auto * output : required_outputs)
-        dag->outputs.push_back(mapping[output]);
+    {
+        const auto * mapped_output = mapping[output];
+        if (output->result_name != mapped_output->result_name)
+            mapped_output = &dag->addAlias(*mapped_output, output->result_name);
+        dag->outputs.push_back(mapped_output);
+    }
 
     return dag;
 }
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index 1859fda2808..e260892a05d 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -240,7 +240,7 @@ public:
     ///            \      /
     ///            c * d - e
     static ActionsDAGPtr foldActionsByProjection(
-        const std::unordered_map<const Node *, std::string> & new_inputs,
+        const std::unordered_map<const Node *, const Node *> & new_inputs,
         const NodeRawConstPtrs & required_outputs);
 
     /// Reorder the output nodes using given position mapping.
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index 77b5547207c..d7ec1ca3897 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -100,16 +100,25 @@ static bool hasNullableOrMissingColumn(const DAGIndex & index, const Names & nam
     return false;
 }
 
+struct AggregateFunctionMatch
+{
+    const AggregateDescription * description = nullptr;
+    DataTypes argument_types;
+};
+
+using AggregateFunctionMatches = std::vector<AggregateFunctionMatch>;
 
 /// Here we try to match aggregate functions from the query to
 /// aggregate functions from projection.
-bool areAggregatesMatch(
+std::optional<AggregateFunctionMatches> matchAggregateFunctions(
     const AggregateProjectionInfo & info,
     const AggregateDescriptions & aggregates,
     const MatchedTrees::Matches & matches,
     const DAGIndex & query_index,
     const DAGIndex & proj_index)
 {
+    AggregateFunctionMatches res;
+
     /// Index (projection agg function name) -> pos
     std::unordered_map<std::string, std::vector<size_t>> projection_aggregate_functions;
     for (size_t i = 0; i < info.aggregates.size(); ++i)
@@ -126,14 +135,20 @@ bool areAggregatesMatch(
             //     "Cannot match agg func {} by name {}",
             //     aggregate.column_name, aggregate.function->getName());
 
-            return false;
+            return {};
         }
 
+        size_t num_args = aggregate.argument_names.size();
+
+        DataTypes argumen_types;
+        argumen_types.reserve(num_args);
+
         auto & candidates = it->second;
         bool found_match = false;
 
         for (size_t idx : candidates)
         {
+            argumen_types.clear();
             const auto & candidate = info.aggregates[idx];
 
             /// Note: this check is a bit strict.
@@ -144,9 +159,9 @@ bool areAggregatesMatch(
             /// and we can't replace one to another from projection.
             if (!candidate.function->getStateType()->equals(*aggregate.function->getStateType()))
             {
-                LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Cannot match agg func {} vs {} by state {} vs {}",
-                    aggregate.column_name, candidate.column_name,
-                    candidate.function->getStateType()->getName(), aggregate.function->getStateType()->getName());
+                // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Cannot match agg func {} vs {} by state {} vs {}",
+                //     aggregate.column_name, candidate.column_name,
+                //     candidate.function->getStateType()->getName(), aggregate.function->getStateType()->getName());
                 continue;
             }
 
@@ -162,6 +177,7 @@ bool areAggregatesMatch(
                 {
                     /// we can ignore arguments for count()
                     found_match = true;
+                    res.push_back({&candidate, DataTypes()});
                     break;
                 }
             }
@@ -169,7 +185,6 @@ bool areAggregatesMatch(
             /// Now, function names and types matched.
             /// Next, match arguments from DAGs.
 
-            size_t num_args = aggregate.argument_names.size();
             if (num_args != candidate.argument_names.size())
                 continue;
 
@@ -211,6 +226,7 @@ bool areAggregatesMatch(
                     break;
                 }
 
+                argumen_types.push_back(query_node->result_type);
                 ++next_arg;
             }
 
@@ -218,14 +234,15 @@ bool areAggregatesMatch(
                 continue;
 
             found_match = true;
+            res.push_back({&candidate, std::move(argumen_types)});
             break;
         }
 
         if (!found_match)
-            return false;
+            return {};
     }
 
-    return true;
+    return res;
 }
 
 ActionsDAGPtr analyzeAggregateProjection(
@@ -246,7 +263,8 @@ ActionsDAGPtr analyzeAggregateProjection(
     //         static_cast<const void *>(match.node), (match.node ? match.node->result_name : ""), match.monotonicity != std::nullopt);
     // }
 
-    if (!areAggregatesMatch(info, aggregates, matches, query_index, proj_index))
+    auto matched_aggregates = matchAggregateFunctions(info, aggregates, matches, query_index, proj_index);
+    if (!matched_aggregates)
         return {};
 
     ActionsDAG::NodeRawConstPtrs query_key_nodes;
@@ -295,7 +313,7 @@ ActionsDAGPtr analyzeAggregateProjection(
 
     std::stack<Frame> stack;
     std::unordered_set<const ActionsDAG::Node *> visited;
-    std::unordered_map<const ActionsDAG::Node *, std::string> new_inputs;
+    std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> new_inputs;
 
     for (const auto * key_node : query_key_nodes)
     {
@@ -317,7 +335,7 @@ ActionsDAGPtr analyzeAggregateProjection(
                     if (match.node && !match.monotonicity && proj_key_nodes.contains(match.node))
                     {
                         visited.insert(frame.node);
-                        new_inputs[frame.node] = match.node->result_name;
+                        new_inputs[frame.node] = match.node; //->result_name;
                         stack.pop();
                         continue;
                     }
@@ -350,8 +368,19 @@ ActionsDAGPtr analyzeAggregateProjection(
 
     /// Just add all the aggregates to dag inputs.
     auto & proj_dag_outputs =  proj_dag->getOutputs();
-    for (const auto & aggregate : aggregates)
-        proj_dag_outputs.push_back(&proj_dag->addInput(aggregate.column_name, aggregate.function->getResultType()));
+    size_t num_aggregates = aggregates.size();
+    for (size_t i = 0; i < num_aggregates; ++i)
+    {
+        const auto & aggregate = aggregates[i];
+        const auto & match = (*matched_aggregates)[i];
+        auto type = std::make_shared<DataTypeAggregateFunction>(aggregate.function, match.argument_types, aggregate.parameters);
+        const auto * node = &proj_dag->addInput(match.description->column_name, std::move(type));
+
+        if (aggregate.column_name != match.description->column_name)
+            node = &proj_dag->addAlias(*node, aggregate.column_name);
+
+        proj_dag_outputs.push_back(node);
+    }
 
     return proj_dag;
 }
diff --git a/tests/queries/0_stateless/01710_projection_with_joins.sql b/tests/queries/0_stateless/01710_projection_with_joins.sql
index 5dac2f05da9..f6ae9255b60 100644
--- a/tests/queries/0_stateless/01710_projection_with_joins.sql
+++ b/tests/queries/0_stateless/01710_projection_with_joins.sql
@@ -9,7 +9,7 @@ drop table t;
 
 drop table if exists mt;
 create table mt (id1 Int8, id2 Int8) Engine=MergeTree order by tuple();
-select id1 as alias1 from mt all inner join (select id2 as alias1 from mt) as t using (alias1) order by id1 settings allow_experimental_projection_optimization = 1;
+select alias1 from (select id1, id1 as alias1 from mt) as l all inner join (select id2 as alias1 from mt) as t using (alias1) order by l.id1 settings allow_experimental_projection_optimization = 1;
 select id1 from mt all inner join (select id2 as id1 from mt) as t using (id1) order by id1 settings allow_experimental_projection_optimization = 1;
 select id2 as id1 from mt all inner join (select id1 from mt) as t using (id1) order by id1 settings allow_experimental_projection_optimization = 1;
 drop table mt;
@@ -17,5 +17,5 @@ drop table mt;
 drop table if exists j;
 create table j (id1 Int8, id2 Int8, projection p (select id1, id2 order by id2)) Engine=MergeTree order by id1 settings index_granularity = 1;
 insert into j select number, number from numbers(10);
-select id1 as alias1 from j all inner join (select id2 as alias1 from j where id2 in (1, 2, 3)) as t using (alias1) where id2 in (2, 3, 4) order by id1 settings allow_experimental_projection_optimization = 1;
+select alias1 from (select id1, id1 as alias1 from j) as l all inner join (select id2, id2 as alias1 from j where id2 in (1, 2, 3)) as t using (alias1) where id2 in (2, 3, 4) order by id1 settings allow_experimental_projection_optimization = 1;
 drop table j;
diff --git a/tests/queries/0_stateless/01710_projections.sql b/tests/queries/0_stateless/01710_projections.sql
index 5097a88c8fa..111dd6eb16e 100644
--- a/tests/queries/0_stateless/01710_projections.sql
+++ b/tests/queries/0_stateless/01710_projections.sql
@@ -7,7 +7,7 @@ insert into projection_test with rowNumberInAllBlocks() as id select 1, toDateTi
 set allow_experimental_projection_optimization = 1, force_optimize_projection = 1;
 
 select * from projection_test; -- { serverError 584 }
-select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) from projection_test join (select 1) x using (1) where domain = '1' group by dt_m order by dt_m; -- { serverError 584 }
+select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) from projection_test join (select 1) x on 1 where domain = '1' group by dt_m order by dt_m; -- { serverError 584 }
 
 select toStartOfMinute(datetime) dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration) from projection_test where domain = '1' group by dt_m order by dt_m;
 
@@ -39,7 +39,7 @@ select toStartOfMinute(datetime) dt_m, domain, sum(retry_count) / sum(duration),
 select toStartOfHour(toStartOfMinute(datetime)) dt_h, uniqHLL12(x_id), uniqHLL12(y_id) from projection_test group by dt_h order by dt_h;
 
 -- found by fuzzer
-SET enable_positional_arguments = 0;
+SET enable_positional_arguments = 0, force_optimize_projection = 0;
 SELECT 2, -1 FROM projection_test PREWHERE domain_alias = 1. WHERE domain = NULL GROUP BY -9223372036854775808 ORDER BY countIf(first_time = 0) / count(-2147483649) DESC NULLS LAST, 1048576 DESC NULLS LAST;
 
 drop table if exists projection_test;
diff --git a/tests/queries/0_stateless/02516_projections_and_context.reference b/tests/queries/0_stateless/02516_projections_and_context.reference
index e69de29bb2d..6ed281c757a 100644
--- a/tests/queries/0_stateless/02516_projections_and_context.reference
+++ b/tests/queries/0_stateless/02516_projections_and_context.reference
@@ -0,0 +1,2 @@
+1
+1
diff --git a/tests/queries/0_stateless/02516_projections_and_context.sql b/tests/queries/0_stateless/02516_projections_and_context.sql
index a7c143c7900..2b659eafabc 100644
--- a/tests/queries/0_stateless/02516_projections_and_context.sql
+++ b/tests/queries/0_stateless/02516_projections_and_context.sql
@@ -1,6 +1,10 @@
 DROP TABLE IF EXISTS test1__fuzz_37;
 CREATE TABLE test1__fuzz_37 (`i` Date) ENGINE = MergeTree ORDER BY i;
 insert into test1__fuzz_37 values ('2020-10-10');
+set allow_experimental_analyzer = 0;
 SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL)); -- { serverError BAD_ARGUMENTS }
 SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a')); -- { serverError BAD_ARGUMENTS }
+set allow_experimental_analyzer = 1;
+SELECT count() FROM test1__fuzz_37 GROUP BY dictHas(NULL, (dictHas(NULL, (('', materialize(NULL)), materialize(NULL))), 'KeyKey')), dictHas('test_dictionary', tuple(materialize('Ke\0'))), tuple(dictHas(NULL, (tuple('Ke\0Ke\0Ke\0Ke\0Ke\0Ke\0\0\0\0Ke\0'), materialize(NULL)))), 'test_dicti\0nary', (('', materialize(NULL)), dictHas(NULL, (dictHas(NULL, tuple(materialize(NULL))), 'KeyKeyKeyKeyKeyKeyKeyKey')), materialize(NULL));
+SELECT count() FROM test1__fuzz_37 GROUP BY dictHas('non_existing_dictionary', materialize('a'));
 DROP TABLE test1__fuzz_37;

From b327a93cf79234b4cb17fd60bcc7f4378704e2f4 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 3 Apr 2023 18:18:39 +0000
Subject: [PATCH 065/277] Add comments.

---
 src/Interpreters/ActionsDAG.cpp                           | 5 +++++
 src/Interpreters/ActionsDAG.h                             | 8 +++++---
 .../Optimizations/optimizeUseAggregateProjection.cpp      | 2 +-
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index bf82858c6fc..8d3736b4e6b 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -795,6 +795,9 @@ ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_map<const
 
                     if (!node)
                     {
+                        /// It is possible to have a few aliases on the same column.
+                        /// We may want to replace all the aliases,
+                        /// in this case they should have a single input as a child.
                         auto & mapped_input = inputs_mapping[rename];
 
                         if (!mapped_input)
@@ -843,6 +846,8 @@ ActionsDAGPtr ActionsDAG::foldActionsByProjection(const std::unordered_map<const
 
     for (const auto * output : required_outputs)
     {
+        /// Keep the names for outputs.
+        /// Add an alias if the mapped node has a different result name.
         const auto * mapped_output = mapping[output];
         if (output->result_name != mapped_output->result_name)
             mapped_output = &dag->addAlias(*mapped_output, output->result_name);
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index e260892a05d..e8bfade93cf 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -221,9 +221,11 @@ public:
         const String & predicate_column_name = {},
         bool add_missing_keys = true);
 
-    /// Get an ActionsDAG where:
-    /// * Subtrees from new_inputs are converted to inputs with specified names.
-    /// * Outputs are taken from required_outputs.
+    /// Get an ActionsDAG in a following way:
+    /// * Traverse a tree starting from required_outputs
+    /// * If there is a node from new_inputs keys, replace it to INPUT
+    /// * INPUT name should be taken from new_inputs mapped node name
+    /// * Mapped nodes may be the same nodes, and in this case there would be a single INPUT
     /// Here want to substitute some expressions to columns from projection.
     /// This function expects that all required_outputs can be calculated from nodes in new_inputs.
     /// If not, exception will happen.
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index d7ec1ca3897..27a548afb0e 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -335,7 +335,7 @@ ActionsDAGPtr analyzeAggregateProjection(
                     if (match.node && !match.monotonicity && proj_key_nodes.contains(match.node))
                     {
                         visited.insert(frame.node);
-                        new_inputs[frame.node] = match.node; //->result_name;
+                        new_inputs[frame.node] = match.node;
                         stack.pop();
                         continue;
                     }

From 6d5edc03156a148b4787d0f1f1ad18c0c1d43d84 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Mon, 3 Apr 2023 22:01:18 +0200
Subject: [PATCH 066/277] rename fake_query_context to task_context

---
 src/Storages/MergeTree/MergeFromLogEntryTask.cpp       |  8 ++++----
 src/Storages/MergeTree/MergePlainMergeTreeTask.cpp     |  6 +++---
 src/Storages/MergeTree/MergePlainMergeTreeTask.h       |  4 ++--
 src/Storages/MergeTree/MutateFromLogEntryTask.cpp      | 10 +++++-----
 src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp    |  8 ++++----
 src/Storages/MergeTree/MutatePlainMergeTreeTask.h      |  4 ++--
 src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h |  2 +-
 7 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
index e21dbd135f4..a5b4a29cb18 100644
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@@ -260,15 +260,15 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
 
     auto table_id = storage.getStorageID();
 
-    fake_query_context = Context::createCopy(storage.getContext());
-    fake_query_context->makeQueryContext();
-    fake_query_context->setCurrentQueryId("");
+    task_context = Context::createCopy(storage.getContext());
+    task_context->makeQueryContext();
+    task_context->setCurrentQueryId("");
 
     /// Add merge to list
     merge_mutate_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_merged_part,
-        fake_query_context);
+        task_context);
 
     transaction_ptr = std::make_unique<MergeTreeData::Transaction>(storage, NO_TRANSACTION_RAW);
     stopwatch_ptr = std::make_unique<Stopwatch>();
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
index 8b1ae5484d2..b41fcaf4181 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@@ -84,11 +84,11 @@ void MergePlainMergeTreeTask::prepare()
     future_part = merge_mutate_entry->future_part;
     stopwatch_ptr = std::make_unique<Stopwatch>();
 
-    fake_query_context = createFakeQueryContext();
+    task_context = createTaskContext();
     merge_list_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_part,
-        fake_query_context);
+        task_context);
 
     write_part_log = [this] (const ExecutionStatus & execution_status)
     {
@@ -134,7 +134,7 @@ void MergePlainMergeTreeTask::finish()
     storage.incrementMergedPartsProfileEvent(new_part->getType());
 }
 
-ContextMutablePtr MergePlainMergeTreeTask::createFakeQueryContext() const
+ContextMutablePtr MergePlainMergeTreeTask::createTaskContext() const
 {
     auto context = Context::createCopy(storage.getContext());
     context->makeQueryContext();
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h
index 00217401baf..478ec36630c 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h
@@ -88,9 +88,9 @@ private:
 
     ProfileEvents::Counters profile_counters;
 
-    ContextMutablePtr fake_query_context;
+    ContextMutablePtr task_context;
 
-    ContextMutablePtr createFakeQueryContext() const;
+    ContextMutablePtr createTaskContext() const;
 };
 
 
diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
index c57d812e371..35a503c658d 100644
--- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
@@ -164,20 +164,20 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare()
         }
     }
 
-    fake_query_context = Context::createCopy(storage.getContext());
-    fake_query_context->makeQueryContext();
-    fake_query_context->setCurrentQueryId("");
+    task_context = Context::createCopy(storage.getContext());
+    task_context->makeQueryContext();
+    task_context->setCurrentQueryId("");
 
     merge_mutate_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_mutated_part,
-        fake_query_context);
+        task_context);
 
     stopwatch_ptr = std::make_unique<Stopwatch>();
 
     mutate_task = storage.merger_mutator.mutatePartToTemporaryPart(
             future_mutated_part, metadata_snapshot, commands, merge_mutate_entry.get(),
-            entry.create_time, fake_query_context, NO_TRANSACTION_PTR, reserved_space, table_lock_holder);
+            entry.create_time, task_context, NO_TRANSACTION_PTR, reserved_space, table_lock_holder);
 
     /// Adjust priority
     for (auto & item : future_mutated_part->parts)
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
index 8ac815e85ed..e283cfa8a93 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@@ -29,11 +29,11 @@ void MutatePlainMergeTreeTask::prepare()
 {
     future_part = merge_mutate_entry->future_part;
 
-    fake_query_context = createFakeQueryContext();
+    task_context = createTaskContext();
     merge_list_entry = storage.getContext()->getMergeList().insert(
         storage.getStorageID(),
         future_part,
-        fake_query_context);
+        task_context);
 
     stopwatch = std::make_unique<Stopwatch>();
 
@@ -54,7 +54,7 @@ void MutatePlainMergeTreeTask::prepare()
 
     mutate_task = storage.merger_mutator.mutatePartToTemporaryPart(
             future_part, metadata_snapshot, merge_mutate_entry->commands, merge_list_entry.get(),
-            time(nullptr), fake_query_context, merge_mutate_entry->txn, merge_mutate_entry->tagger->reserved_space, table_lock_holder);
+            time(nullptr), task_context, merge_mutate_entry->txn, merge_mutate_entry->tagger->reserved_space, table_lock_holder);
 }
 
 
@@ -126,7 +126,7 @@ bool MutatePlainMergeTreeTask::executeStep()
     return false;
 }
 
-ContextMutablePtr MutatePlainMergeTreeTask::createFakeQueryContext() const
+ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const
 {
     auto context = Context::createCopy(storage.getContext());
     context->makeQueryContext();
diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h
index 74945ca26fd..823ea6d7a0f 100644
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h
@@ -78,9 +78,9 @@ private:
 
     ProfileEvents::Counters profile_counters;
 
-    ContextMutablePtr fake_query_context;
+    ContextMutablePtr task_context;
 
-    ContextMutablePtr createFakeQueryContext() const;
+    ContextMutablePtr createTaskContext() const;
 };
 
 
diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
index 0de8caac60f..fb8f01437ef 100644
--- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
+++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
@@ -62,7 +62,7 @@ protected:
     StorageReplicatedMergeTree & storage;
     /// ProfileEvents for current part will be stored here
     ProfileEvents::Counters profile_counters;
-    ContextMutablePtr fake_query_context;
+    ContextMutablePtr task_context;
 
 private:
     enum class CheckExistingPartResult

From a7c153f88c60ed1125527501a478da5d4b19614c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Mon, 20 Feb 2023 09:10:25 +0000
Subject: [PATCH 067/277] replace{One/All}(): allow non-const pattern and
 replacement arguments

---
 .../functions/string-replace-functions.md     |   9 +-
 src/Functions/FunctionStringReplace.h         | 100 +++---
 src/Functions/ReplaceRegexpImpl.h             | 212 ++++++++++--
 src/Functions/ReplaceStringImpl.h             | 315 ++++++++++++++++--
 src/Functions/replaceAll.cpp                  |   2 +-
 src/Functions/replaceOne.cpp                  |   2 +-
 src/Functions/replaceRegexpAll.cpp            |   2 +-
 src/Functions/replaceRegexpOne.cpp            |   2 +-
 .../00765_sql_compatibility_aliases.reference |   1 +
 .../00765_sql_compatibility_aliases.sql       |   1 +
 ..._nonconst_needle_and_replacement.reference |  77 +++++
 ...e_with_nonconst_needle_and_replacement.sql |  90 +++++
 12 files changed, 697 insertions(+), 116 deletions(-)
 create mode 100644 tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference
 create mode 100644 tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql

diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index 50e15f70f5d..d4c7c451af2 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -13,17 +13,18 @@ Functions for [searching](../../sql-reference/functions/string-search-functions.
 ## replaceOne(haystack, pattern, replacement)
 
 Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string.
-‘pattern’ and ‘replacement’ must be constants.
 
 ## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement)
 
 Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string.
 
+Alias: `replace`.
+
 ## replaceRegexpOne(haystack, pattern, replacement)
 
 Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string.
-‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
-‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`.
+‘pattern‘ must be a [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
+‘replacement’ must be a plain string or a string containing substitutions `\0-\9`.
 Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
 To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`.
 Also keep in mind that string literals require an extra escaping.
@@ -88,6 +89,8 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res
 └─────────────────────┘
 ```
 
+Alias: `REGEXP_REPLACE`.
+
 ## regexpQuoteMeta(s)
 
 The function adds a backslash before some predefined characters in the string.
diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h
index f90eac2e7f3..6199e146210 100644
--- a/src/Functions/FunctionStringReplace.h
+++ b/src/Functions/FunctionStringReplace.h
@@ -5,6 +5,7 @@
 #include <Columns/ColumnConst.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
 
 
 namespace DB
@@ -13,16 +14,14 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int ILLEGAL_COLUMN;
-    extern const int ARGUMENT_OUT_OF_BOUND;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }
 
-
 template <typename Impl, typename Name>
 class FunctionStringReplace : public IFunction
 {
 public:
     static constexpr auto name = Name::name;
+
     static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStringReplace>(); }
 
     String getName() const override { return name; }
@@ -32,65 +31,80 @@ public:
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
 
     bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if (!isStringOrFixedString(arguments[0]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of first argument of function {}",
-                arguments[0]->getName(), getName());
+        FunctionArgumentDescriptors args{
+            {"haystack", &isStringOrFixedString<IDataType>, nullptr, "String or FixedString"},
+            {"pattern", &isString<IDataType>, nullptr, "String"},
+            {"replacement", &isString<IDataType>, nullptr, "String"}
+        };
 
-        if (!isStringOrFixedString(arguments[1]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of second argument of function {}",
-                arguments[1]->getName(), getName());
-
-        if (!isStringOrFixedString(arguments[2]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of third argument of function {}",
-                arguments[2]->getName(), getName());
+        validateFunctionArgumentTypes(*this, arguments, args);
 
         return std::make_shared<DataTypeString>();
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
     {
-        const ColumnPtr column_src = arguments[0].column;
+        const ColumnPtr column_haystack = arguments[0].column;
         const ColumnPtr column_needle = arguments[1].column;
         const ColumnPtr column_replacement = arguments[2].column;
 
-        if (!isColumnConst(*column_needle) || !isColumnConst(*column_replacement))
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN,
-                "2nd and 3rd arguments of function {} must be constants.",
-                getName());
+        const ColumnString * col_haystack = checkAndGetColumn<ColumnString>(column_haystack.get());
+        const ColumnFixedString * col_haystack_fixed = checkAndGetColumn<ColumnFixedString>(column_haystack.get());
 
-        const IColumn * c1 = arguments[1].column.get();
-        const IColumn * c2 = arguments[2].column.get();
-        const ColumnConst * c1_const = typeid_cast<const ColumnConst *>(c1);
-        const ColumnConst * c2_const = typeid_cast<const ColumnConst *>(c2);
-        String needle = c1_const->getValue<String>();
-        String replacement = c2_const->getValue<String>();
+        const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(column_needle.get());
+        const ColumnConst * col_needle_const = checkAndGetColumn<ColumnConst>(column_needle.get());
 
-        if (needle.empty())
-            throw Exception(
-                ErrorCodes::ARGUMENT_OUT_OF_BOUND,
-                "Length of the second argument of function replace must be greater than 0.");
+        const ColumnString * col_replacement_vector = checkAndGetColumn<ColumnString>(column_replacement.get());
+        const ColumnConst * col_replacement_const = checkAndGetColumn<ColumnConst>(column_replacement.get());
 
-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_src.get()))
+        auto col_res = ColumnString::create();
+
+        if (col_haystack && col_needle_const && col_replacement_const)
         {
-            auto col_res = ColumnString::create();
-            Impl::vector(col->getChars(), col->getOffsets(), needle, replacement, col_res->getChars(), col_res->getOffsets());
+            Impl::vectorConstantConstant(
+                col_haystack->getChars(), col_haystack->getOffsets(),
+                col_needle_const->getValue<String>(),
+                col_replacement_const->getValue<String>(),
+                col_res->getChars(), col_res->getOffsets());
             return col_res;
         }
-        else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_src.get()))
+        else if (col_haystack && col_needle_vector && col_replacement_const)
         {
-            auto col_res = ColumnString::create();
-            Impl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), needle, replacement, col_res->getChars(), col_res->getOffsets());
+            Impl::vectorVectorConstant(
+                col_haystack->getChars(), col_haystack->getOffsets(),
+                col_needle_vector->getChars(), col_needle_vector->getOffsets(),
+                col_replacement_const->getValue<String>(),
+                col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else if (col_haystack && col_needle_const && col_replacement_vector)
+        {
+            Impl::vectorConstantVector(
+                col_haystack->getChars(), col_haystack->getOffsets(),
+                col_needle_const->getValue<String>(),
+                col_replacement_vector->getChars(), col_replacement_vector->getOffsets(),
+                col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else if (col_haystack && col_needle_vector && col_replacement_vector)
+        {
+            Impl::vectorVectorVector(
+                col_haystack->getChars(), col_haystack->getOffsets(),
+                col_needle_vector->getChars(), col_needle_vector->getOffsets(),
+                col_replacement_vector->getChars(), col_replacement_vector->getOffsets(),
+                col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else if (col_haystack_fixed && col_needle_const && col_replacement_const)
+        {
+            Impl::vectorFixedConstantConstant(
+                col_haystack_fixed->getChars(), col_haystack_fixed->getN(),
+                col_needle_const->getValue<String>(),
+                col_replacement_const->getValue<String>(),
+                col_res->getChars(), col_res->getOffsets());
             return col_res;
         }
         else
diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h
index 88d7a40d2dd..7e3af1e62d9 100644
--- a/src/Functions/ReplaceRegexpImpl.h
+++ b/src/Functions/ReplaceRegexpImpl.h
@@ -13,6 +13,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int BAD_ARGUMENTS;
 }
 
@@ -28,9 +29,11 @@ struct ReplaceRegexpTraits
 /** Replace all matches of regexp 'needle' to string 'replacement'. 'needle' and 'replacement' are constants.
   * 'replacement' can contain substitutions, for example: '\2-\3-\1'
   */
-template <ReplaceRegexpTraits::Replace replace>
+template <typename Name, ReplaceRegexpTraits::Replace replace>
 struct ReplaceRegexpImpl
 {
+    static constexpr auto name = Name::name;
+
     struct Instruction
     {
         /// If not negative, perform substitution of n-th subpattern from the regexp match.
@@ -162,18 +165,21 @@ struct ReplaceRegexpImpl
         ++res_offset;
     }
 
-    static void vector(
-        const ColumnString::Chars & data,
-        const ColumnString::Offsets & offsets,
+    static void vectorConstantConstant(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
         const String & needle,
         const String & replacement,
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets)
     {
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
         ColumnString::Offset res_offset = 0;
-        res_data.reserve(data.size());
-        size_t size = offsets.size();
-        res_offsets.resize(size);
+        res_data.reserve(haystack_data.size());
+        size_t haystack_size = haystack_offsets.size();
+        res_offsets.resize(haystack_size);
 
         re2_st::RE2::Options regexp_options;
         /// Don't write error messages to stderr.
@@ -182,39 +188,89 @@ struct ReplaceRegexpImpl
         re2_st::RE2 searcher(needle, regexp_options);
 
         if (!searcher.ok())
-            throw Exception(
-                ErrorCodes::BAD_ARGUMENTS,
-                "The pattern argument is not a valid re2 pattern: {}",
-                searcher.error());
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
 
         int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
 
         Instructions instructions = createInstructions(replacement, num_captures);
 
         /// Cannot perform search for whole columns. Will process each string separately.
-        for (size_t i = 0; i < size; ++i)
+        for (size_t i = 0; i < haystack_size; ++i)
         {
-            size_t from = i > 0 ? offsets[i - 1] : 0;
-            const char * haystack_data = reinterpret_cast<const char *>(data.data() + from);
-            const size_t haystack_length = static_cast<unsigned>(offsets[i] - from - 1);
+            size_t from = i > 0 ? haystack_offsets[i - 1] : 0;
 
-            processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions);
+            const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + from);
+            const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - from - 1);
+
+            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
             res_offsets[i] = res_offset;
         }
     }
 
-    static void vectorFixed(
-        const ColumnString::Chars & data,
-        size_t n,
-        const String & needle,
+    static void vectorVectorConstant(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const ColumnString::Chars & needle_data,
+        const ColumnString::Offsets & needle_offsets,
         const String & replacement,
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets)
     {
+        assert(haystack_offsets.size() == needle_offsets.size());
+
         ColumnString::Offset res_offset = 0;
-        size_t size = data.size() / n;
-        res_data.reserve(data.size());
-        res_offsets.resize(size);
+        res_data.reserve(haystack_data.size());
+        size_t haystack_size = haystack_offsets.size();
+        res_offsets.resize(haystack_size);
+
+        re2_st::RE2::Options regexp_options;
+        /// Don't write error messages to stderr.
+        regexp_options.set_log_errors(false);
+
+        /// Cannot perform search for whole columns. Will process each string separately.
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
+            const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
+            const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
+
+            size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0;
+            const char * ndl_data = reinterpret_cast<const char *>(needle_data.data() + ndl_from);
+            const size_t ndl_length = static_cast<unsigned>(needle_offsets[i] - ndl_from - 1);
+            std::string_view needle(ndl_data, ndl_length);
+
+            if (needle.empty())
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+            re2_st::RE2 searcher(needle, regexp_options);
+            if (!searcher.ok())
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
+            int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
+            Instructions instructions = createInstructions(replacement, num_captures);
+
+            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
+            res_offsets[i] = res_offset;
+        }
+    }
+
+    static void vectorConstantVector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const String & needle,
+        const ColumnString::Chars & replacement_data,
+        const ColumnString::Offsets & replacement_offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        assert(haystack_offsets.size() == replacement_offsets.size());
+
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+        ColumnString::Offset res_offset = 0;
+        res_data.reserve(haystack_data.size());
+        size_t haystack_size = haystack_offsets.size();
+        res_offsets.resize(haystack_size);
 
         re2_st::RE2::Options regexp_options;
         /// Don't write error messages to stderr.
@@ -223,22 +279,116 @@ struct ReplaceRegexpImpl
         re2_st::RE2 searcher(needle, regexp_options);
 
         if (!searcher.ok())
-            throw Exception(
-                ErrorCodes::BAD_ARGUMENTS,
-                "The pattern argument is not a valid re2 pattern: {}",
-                searcher.error());
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
+
+        int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
+
+        /// Cannot perform search for whole columns. Will process each string separately.
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
+            const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
+            const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
+
+            size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
+            const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
+            const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
+
+            Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
+
+            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
+            res_offsets[i] = res_offset;
+        }
+    }
+
+    static void vectorVectorVector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const ColumnString::Chars & needle_data,
+        const ColumnString::Offsets & needle_offsets,
+        const ColumnString::Chars & replacement_data,
+        const ColumnString::Offsets & replacement_offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        assert(haystack_offsets.size() == needle_offsets.size());
+        assert(needle_offsets.size() == replacement_offsets.size());
+
+        ColumnString::Offset res_offset = 0;
+        res_data.reserve(haystack_data.size());
+        size_t haystack_size = haystack_offsets.size();
+        res_offsets.resize(haystack_size);
+
+        re2_st::RE2::Options regexp_options;
+        /// Don't write error messages to stderr.
+        regexp_options.set_log_errors(false);
+
+        /// Cannot perform search for whole columns. Will process each string separately.
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
+            const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
+            const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
+
+            size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0;
+            const char * ndl_data = reinterpret_cast<const char *>(needle_data.data() + ndl_from);
+            const size_t ndl_length = static_cast<unsigned>(needle_offsets[i] - ndl_from - 1);
+            std::string_view needle(ndl_data, ndl_length);
+
+            if (needle.empty())
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+            size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
+            const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
+            const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
+
+            re2_st::RE2 searcher(needle, regexp_options);
+            if (!searcher.ok())
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
+            int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
+            Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
+
+            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
+            res_offsets[i] = res_offset;
+        }
+    }
+
+    static void vectorFixedConstantConstant(
+        const ColumnString::Chars & haystack_data,
+        size_t n,
+        const String & needle,
+        const String & replacement,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+        ColumnString::Offset res_offset = 0;
+        size_t haystack_size = haystack_data.size() / n;
+        res_data.reserve(haystack_data.size());
+        res_offsets.resize(haystack_size);
+
+        re2_st::RE2::Options regexp_options;
+        /// Don't write error messages to stderr.
+        regexp_options.set_log_errors(false);
+
+        re2_st::RE2 searcher(needle, regexp_options);
+
+        if (!searcher.ok())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
 
         int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
 
         Instructions instructions = createInstructions(replacement, num_captures);
 
-        for (size_t i = 0; i < size; ++i)
+        for (size_t i = 0; i < haystack_size; ++i)
         {
             size_t from = i * n;
-            const char * haystack_data = reinterpret_cast<const char *>(data.data() + from);
-            const size_t haystack_length = n;
+            const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + from);
+            const size_t hs_length = n;
 
-            processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions);
+            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
             res_offsets[i] = res_offset;
         }
     }
diff --git a/src/Functions/ReplaceStringImpl.h b/src/Functions/ReplaceStringImpl.h
index 1a9ec49c58c..186348d7d53 100644
--- a/src/Functions/ReplaceStringImpl.h
+++ b/src/Functions/ReplaceStringImpl.h
@@ -8,6 +8,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int ARGUMENT_OUT_OF_BOUND;
+}
+
 struct ReplaceStringTraits
 {
     enum class Replace
@@ -16,27 +21,33 @@ struct ReplaceStringTraits
         All
     };
 };
-/** Replace one or all occurencies of substring 'needle' to 'replacement'. 'needle' and 'replacement' are constants.
+
+/** Replace one or all occurencies of substring 'needle' to 'replacement'.
   */
-template <ReplaceStringTraits::Replace replace>
+template <typename Name, ReplaceStringTraits::Replace replace>
 struct ReplaceStringImpl
 {
-    static void vector(
-        const ColumnString::Chars & data,
-        const ColumnString::Offsets & offsets,
-        const std::string & needle,
-        const std::string & replacement,
+    static constexpr auto name = Name::name;
+
+    static void vectorConstantConstant(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const String & needle,
+        const String & replacement,
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets)
     {
-        const UInt8 * begin = data.data();
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+        const UInt8 * const begin = haystack_data.data();
+        const UInt8 * const end = haystack_data.data() + haystack_data.size();
         const UInt8 * pos = begin;
-        const UInt8 * end = pos + data.size();
 
         ColumnString::Offset res_offset = 0;
-        res_data.reserve(data.size());
-        size_t size = offsets.size();
-        res_offsets.resize(size);
+        res_data.reserve(haystack_data.size());
+        const size_t haystack_size = haystack_offsets.size();
+        res_offsets.resize(haystack_size);
 
         /// The current index in the array of strings.
         size_t i = 0;
@@ -53,22 +64,22 @@ struct ReplaceStringImpl
             memcpy(&res_data[res_offset], pos, match - pos);
 
             /// Determine which index it belongs to.
-            while (i < offsets.size() && begin + offsets[i] <= match)
+            while (i < haystack_offsets.size() && begin + haystack_offsets[i] <= match)
             {
-                res_offsets[i] = res_offset + ((begin + offsets[i]) - pos);
+                res_offsets[i] = res_offset + ((begin + haystack_offsets[i]) - pos);
                 ++i;
             }
             res_offset += (match - pos);
 
             /// If you have reached the end, it's time to stop
-            if (i == offsets.size())
+            if (i == haystack_offsets.size())
                 break;
 
             /// Is it true that this string no longer needs to perform transformations.
             bool can_finish_current_string = false;
 
             /// We check that the entry does not go through the boundaries of strings.
-            if (match + needle.size() < begin + offsets[i])
+            if (match + needle.size() < begin + haystack_offsets[i])
             {
                 res_data.resize(res_data.size() + replacement.size());
                 memcpy(&res_data[res_offset], replacement.data(), replacement.size());
@@ -85,34 +96,268 @@ struct ReplaceStringImpl
 
             if (can_finish_current_string)
             {
-                res_data.resize(res_data.size() + (begin + offsets[i] - pos));
-                memcpy(&res_data[res_offset], pos, (begin + offsets[i] - pos));
-                res_offset += (begin + offsets[i] - pos);
+                res_data.resize(res_data.size() + (begin + haystack_offsets[i] - pos));
+                memcpy(&res_data[res_offset], pos, (begin + haystack_offsets[i] - pos));
+                res_offset += (begin + haystack_offsets[i] - pos);
                 res_offsets[i] = res_offset;
-                pos = begin + offsets[i];
+                pos = begin + haystack_offsets[i];
                 ++i;
             }
         }
     }
 
-    /// Note: this function converts fixed-length strings to variable-length strings
-    ///       and each variable-length string should ends with zero byte.
-    static void vectorFixed(
-        const ColumnString::Chars & data,
-        size_t n,
-        const std::string & needle,
-        const std::string & replacement,
+    template <typename CharT>
+    requires (sizeof(CharT) == 1)
+    static void copyToOutput(
+        const CharT * what_start, size_t what_size,
+        ColumnString::Chars & output, size_t & output_offset)
+    {
+        output.resize(output.size() + what_size);
+        memcpy(&output[output_offset], what_start, what_size);
+        output_offset += what_size;
+    }
+
+    static void vectorVectorConstant(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const ColumnString::Chars & needle_data,
+        const ColumnString::Offsets & needle_offsets,
+        const String & replacement,
         ColumnString::Chars & res_data,
         ColumnString::Offsets & res_offsets)
     {
-        const UInt8 * begin = data.data();
-        const UInt8 * pos = begin;
-        const UInt8 * end = pos + data.size();
+        chassert(haystack_offsets.size() == needle_offsets.size());
+
+        const size_t haystack_size = haystack_offsets.size();
+
+        res_data.reserve(haystack_data.size());
+        res_offsets.resize(haystack_size);
 
         ColumnString::Offset res_offset = 0;
-        size_t count = data.size() / n;
-        res_data.reserve(data.size());
-        res_offsets.resize(count);
+
+        size_t prev_haystack_offset = 0;
+        size_t prev_needle_offset = 0;
+
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
+            const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
+
+            const auto * const cur_needle_data = &needle_data[prev_needle_offset];
+            const size_t cur_needle_length = needle_offsets[i] - prev_needle_offset - 1;
+
+            if (cur_needle_length == 0)
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+            /// Using "slow" "stdlib searcher instead of Volnitsky because there is a different pattern in each row
+            StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(cur_needle_data, cur_needle_length);
+
+            const auto * last_match = static_cast<UInt8 *>(nullptr);
+            const auto * start_pos = cur_haystack_data;
+            const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length;
+
+            while (start_pos < cur_haystack_end)
+            {
+                if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end)
+                {
+                    /// Copy prefix before match
+                    copyToOutput(start_pos, match - start_pos, res_data, res_offset);
+
+                    /// Insert replacement for match
+                    copyToOutput(replacement.data(), replacement.size(), res_data, res_offset);
+
+                    last_match = match;
+                    start_pos = match + cur_needle_length;
+
+                    if constexpr (replace == ReplaceStringTraits::Replace::First)
+                        break;
+                }
+                else
+                    break;
+            }
+
+            /// Copy suffix after last match
+            size_t bytes = (last_match == nullptr) ? (cur_haystack_end - cur_haystack_data + 1)
+                                                   : (cur_haystack_end - last_match - cur_needle_length + 1);
+            copyToOutput(start_pos, bytes, res_data, res_offset);
+
+            res_offsets[i] = res_offset;
+
+            prev_haystack_offset = haystack_offsets[i];
+            prev_needle_offset = needle_offsets[i];
+        }
+    }
+
+    static void vectorConstantVector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const String & needle,
+        const ColumnString::Chars & replacement_data,
+        const ColumnString::Offsets & replacement_offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        chassert(haystack_offsets.size() == replacement_offsets.size());
+
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+        const size_t haystack_size = haystack_offsets.size();
+
+        res_data.reserve(haystack_data.size());
+        res_offsets.resize(haystack_size);
+
+        ColumnString::Offset res_offset = 0;
+
+        size_t prev_haystack_offset = 0;
+        size_t prev_replacement_offset = 0;
+
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
+            const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
+
+            const auto * const cur_replacement_data = &replacement_data[prev_replacement_offset];
+            const size_t cur_replacement_length = replacement_offsets[i] - prev_replacement_offset - 1;
+
+            /// Using "slow" "stdlib searcher instead of Volnitsky just to keep things simple
+            StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(needle.data(), needle.size());
+
+            const auto * last_match = static_cast<UInt8 *>(nullptr);
+            const auto * start_pos = cur_haystack_data;
+            const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length;
+
+            while (start_pos < cur_haystack_end)
+            {
+                if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end)
+                {
+                    /// Copy prefix before match
+                    copyToOutput(start_pos, match - start_pos, res_data, res_offset);
+
+                    /// Insert replacement for match
+                    copyToOutput(cur_replacement_data, cur_replacement_length, res_data, res_offset);
+
+                    last_match = match;
+                    start_pos = match + needle.size();
+
+                    if constexpr (replace == ReplaceStringTraits::Replace::First)
+                        break;
+                }
+                else
+                    break;
+            }
+
+            /// Copy suffix after last match
+            size_t bytes = (last_match == nullptr) ? (cur_haystack_end - cur_haystack_data + 1)
+                                                   : (cur_haystack_end - last_match - needle.size() + 1);
+            copyToOutput(start_pos, bytes, res_data, res_offset);
+
+            res_offsets[i] = res_offset;
+
+            prev_haystack_offset = haystack_offsets[i];
+            prev_replacement_offset = replacement_offsets[i];
+        }
+    }
+
+    static void vectorVectorVector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const ColumnString::Chars & needle_data,
+        const ColumnString::Offsets & needle_offsets,
+        const ColumnString::Chars & replacement_data,
+        const ColumnString::Offsets & replacement_offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        chassert(haystack_offsets.size() == needle_offsets.size());
+        chassert(needle_offsets.size() == replacement_offsets.size());
+
+        const size_t haystack_size = haystack_offsets.size();
+
+        res_data.reserve(haystack_data.size());
+        res_offsets.resize(haystack_size);
+
+        ColumnString::Offset res_offset = 0;
+
+        size_t prev_haystack_offset = 0;
+        size_t prev_needle_offset = 0;
+        size_t prev_replacement_offset = 0;
+
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
+            const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
+
+            const auto * const cur_needle_data = &needle_data[prev_needle_offset];
+            const size_t cur_needle_length = needle_offsets[i] - prev_needle_offset - 1;
+
+            const auto * const cur_replacement_data = &replacement_data[prev_replacement_offset];
+            const size_t cur_replacement_length = replacement_offsets[i] - prev_replacement_offset - 1;
+
+            if (cur_needle_length == 0)
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+            /// Using "slow" "stdlib searcher instead of Volnitsky because there is a different pattern in each row
+            StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(cur_needle_data, cur_needle_length);
+
+            const auto * last_match = static_cast<UInt8 *>(nullptr);
+            const auto * start_pos = cur_haystack_data;
+            const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length;
+
+            while (start_pos < cur_haystack_end)
+            {
+                if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end)
+                {
+                    /// Copy prefix before match
+                    copyToOutput(start_pos, match - start_pos, res_data, res_offset);
+
+                    /// Insert replacement for match
+                    copyToOutput(cur_replacement_data, cur_replacement_length, res_data, res_offset);
+
+                    last_match = match;
+                    start_pos = match + cur_needle_length;
+
+                    if constexpr (replace == ReplaceStringTraits::Replace::First)
+                        break;
+                }
+                else
+                    break;
+            }
+
+            /// Copy suffix after last match
+            size_t bytes = (last_match == nullptr) ? (cur_haystack_end - cur_haystack_data + 1)
+                                                   : (cur_haystack_end - last_match - cur_needle_length + 1);
+            copyToOutput(start_pos, bytes, res_data, res_offset);
+
+            res_offsets[i] = res_offset;
+
+            prev_haystack_offset = haystack_offsets[i];
+            prev_needle_offset = needle_offsets[i];
+            prev_replacement_offset = replacement_offsets[i];
+        }
+    }
+
+    /// Note: this function converts fixed-length strings to variable-length strings
+    ///       and each variable-length string should ends with zero byte.
+    static void vectorFixedConstantConstant(
+        const ColumnString::Chars & haystack_data,
+        size_t n,
+        const String & needle,
+        const String & replacement,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+        const UInt8 * const begin = haystack_data.data();
+        const UInt8 * const end = haystack_data.data() + haystack_data.size();
+        const UInt8 * pos = begin;
+
+        ColumnString::Offset res_offset = 0;
+        size_t haystack_size = haystack_data.size() / n;
+        res_data.reserve(haystack_data.size());
+        res_offsets.resize(haystack_size);
 
         /// The current index in the string array.
         size_t i = 0;
@@ -139,13 +384,13 @@ struct ReplaceStringImpl
 
             /// Copy skipped strings without any changes but
             /// add zero byte to the end of each string.
-            while (i < count && begin + n * (i + 1) <= match)
+            while (i < haystack_size && begin + n * (i + 1) <= match)
             {
                 COPY_REST_OF_CURRENT_STRING();
             }
 
             /// If you have reached the end, it's time to stop
-            if (i == count)
+            if (i == haystack_size)
                 break;
 
             /// Copy unchanged part of current string.
diff --git a/src/Functions/replaceAll.cpp b/src/Functions/replaceAll.cpp
index d85d192d199..6c06f5984b3 100644
--- a/src/Functions/replaceAll.cpp
+++ b/src/Functions/replaceAll.cpp
@@ -13,7 +13,7 @@ struct NameReplaceAll
     static constexpr auto name = "replaceAll";
 };
 
-using FunctionReplaceAll = FunctionStringReplace<ReplaceStringImpl<ReplaceStringTraits::Replace::All>, NameReplaceAll>;
+using FunctionReplaceAll = FunctionStringReplace<ReplaceStringImpl<NameReplaceAll, ReplaceStringTraits::Replace::All>, NameReplaceAll>;
 
 }
 
diff --git a/src/Functions/replaceOne.cpp b/src/Functions/replaceOne.cpp
index 6557339537e..62be2906a71 100644
--- a/src/Functions/replaceOne.cpp
+++ b/src/Functions/replaceOne.cpp
@@ -13,7 +13,7 @@ struct NameReplaceOne
     static constexpr auto name = "replaceOne";
 };
 
-using FunctionReplaceOne = FunctionStringReplace<ReplaceStringImpl<ReplaceStringTraits::Replace::First>, NameReplaceOne>;
+using FunctionReplaceOne = FunctionStringReplace<ReplaceStringImpl<NameReplaceOne, ReplaceStringTraits::Replace::First>, NameReplaceOne>;
 
 }
 
diff --git a/src/Functions/replaceRegexpAll.cpp b/src/Functions/replaceRegexpAll.cpp
index 4eaf46c05d4..f5f56fb0f35 100644
--- a/src/Functions/replaceRegexpAll.cpp
+++ b/src/Functions/replaceRegexpAll.cpp
@@ -13,7 +13,7 @@ struct NameReplaceRegexpAll
     static constexpr auto name = "replaceRegexpAll";
 };
 
-using FunctionReplaceRegexpAll = FunctionStringReplace<ReplaceRegexpImpl<ReplaceRegexpTraits::Replace::All>, NameReplaceRegexpAll>;
+using FunctionReplaceRegexpAll = FunctionStringReplace<ReplaceRegexpImpl<NameReplaceRegexpAll, ReplaceRegexpTraits::Replace::All>, NameReplaceRegexpAll>;
 
 }
 
diff --git a/src/Functions/replaceRegexpOne.cpp b/src/Functions/replaceRegexpOne.cpp
index 60e29213a9a..fc3e55aa791 100644
--- a/src/Functions/replaceRegexpOne.cpp
+++ b/src/Functions/replaceRegexpOne.cpp
@@ -13,7 +13,7 @@ struct NameReplaceRegexpOne
     static constexpr auto name = "replaceRegexpOne";
 };
 
-using FunctionReplaceRegexpOne = FunctionStringReplace<ReplaceRegexpImpl<ReplaceRegexpTraits::Replace::First>, NameReplaceRegexpOne>;
+using FunctionReplaceRegexpOne = FunctionStringReplace<ReplaceRegexpImpl<NameReplaceRegexpOne, ReplaceRegexpTraits::Replace::First>, NameReplaceRegexpOne>;
 
 }
 
diff --git a/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference b/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference
index 6a2a0523476..285b9a62d20 100644
--- a/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference
+++ b/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference
@@ -3,6 +3,7 @@ FOO
 foo
 FOO
 baz
+zzz
 2
 fo
 oo
diff --git a/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql b/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql
index 4e16768b373..da0eb9bea6d 100644
--- a/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql
+++ b/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql
@@ -5,6 +5,7 @@ select ucase('foo');
 select LOWER('Foo');
 select UPPER('Foo');
 select REPLACE('bar', 'r', 'z');
+select REGEXP_REPLACE('bar', '.', 'z');
 select Locate('foo', 'o');
 select SUBSTRING('foo', 1, 2);
 select Substr('foo', 2);
diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference
new file mode 100644
index 00000000000..c7a02045316
--- /dev/null
+++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference
@@ -0,0 +1,77 @@
+** replaceAll() **
+- non-const needle, const replacement
+1	Hello World	l	x	Hexxo Worxd
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hello World
+5	Hello World	.	x	Hello World
+- const needle, non-const replacement
+1	Hello World	l	xx	Hexxxxo Worxxd
+2	Hello World	l	x	Hexxo Worxd
+3	Hello World	l	x	Hexxo Worxd
+4	Hello World	l	x	Hexxo Worxd
+5	Hello World	l	x	Hexxo Worxd
+- non-const needle, non-const replacement
+1	Hello World	l	xx	Hexxxxo Worxxd
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hello World
+5	Hello World	.	x	Hello World
+** replaceOne() **
+- non-const needle, const replacement
+1	Hello World	l	x	Hexlo World
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hello World
+5	Hello World	.	x	Hello World
+- const needle, non-const replacement
+1	Hello World	l	xx	Hexxlo World
+2	Hello World	l	x	Hexlo World
+3	Hello World	l	x	Hexlo World
+4	Hello World	l	x	Hexlo World
+5	Hello World	l	x	Hexlo World
+- non-const needle, non-const replacement
+1	Hello World	l	xx	Hexxlo World
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hello World
+5	Hello World	.	x	Hello World
+** replaceRegexpAll() **
+- non-const needle, const replacement
+1	Hello World	l	x	Hexxo Worxd
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hxllx Wxrld
+5	Hello World	.	x	xxxxxxxxxxx
+- const needle, non-const replacement
+1	Hello World	l	xx	Hexxxxo Worxxd
+2	Hello World	l	x	Hexxo Worxd
+3	Hello World	l	x	Hexxo Worxd
+4	Hello World	l	x	Hexxo Worxd
+5	Hello World	l	x	Hexxo Worxd
+- non-const needle, non-const replacement
+1	Hello World	l	xx	Hexxxxo Worxxd
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hxllx Wxrld
+5	Hello World	.	x	xxxxxxxxxxx
+** replaceRegexpOne() **
+- non-const needle, const replacement
+1	Hello World	l	x	Hexlo World
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hxllo World
+5	Hello World	.	x	xello World
+- const needle, non-const replacement
+1	Hello World	l	xx	Hexxlo World
+2	Hello World	l	x	Hexlo World
+3	Hello World	l	x	Hexlo World
+4	Hello World	l	x	Hexlo World
+5	Hello World	l	x	Hexlo World
+- non-const needle, non-const replacement
+1	Hello World	l	xx	Hexxlo World
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hxllo World
+5	Hello World	.	x	xello World
+Check that an exception is thrown if the needle is empty
diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql
new file mode 100644
index 00000000000..7406f0309bb
--- /dev/null
+++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql
@@ -0,0 +1,90 @@
+-- Tests that functions replaceOne(), replaceAll(), replaceRegexpOne(), replaceRegexpAll() work with with non-const pattern and replacement arguments
+
+DROP TABLE IF EXISTS test_tab;
+
+CREATE TABLE test_tab
+  (id UInt32, haystack String, needle String, replacement String)
+  engine = MergeTree()
+  ORDER BY id;
+
+INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'xx') (2, 'Hello World', 'll', 'x') (3, 'Hello World', 'not_found', 'x') (4, 'Hello World', '[eo]', 'x') (5, 'Hello World', '.', 'x')
+
+SELECT '** replaceAll() **';
+
+SELECT '- non-const needle, const replacement';
+SELECT id, haystack, needle, 'x', replaceAll(haystack, needle, 'x') FROM test_tab ORDER BY id;
+
+SELECT '- const needle, non-const replacement';
+SELECT id, haystack, 'l', replacement, replaceAll(haystack, 'l', replacement) FROM test_tab ORDER BY id;
+
+SELECT '- non-const needle, non-const replacement';
+SELECT id, haystack, needle, replacement, replaceAll(haystack, needle, replacement) FROM test_tab ORDER BY id;
+
+SELECT '** replaceOne() **';
+
+SELECT '- non-const needle, const replacement';
+SELECT id, haystack, needle, 'x', replaceOne(haystack, needle, 'x') FROM test_tab ORDER BY id;
+
+
+SELECT '- const needle, non-const replacement';
+SELECT id, haystack, 'l', replacement, replaceOne(haystack, 'l', replacement) FROM test_tab ORDER BY id;
+
+
+SELECT '- non-const needle, non-const replacement';
+SELECT id, haystack, needle, replacement, replaceOne(haystack, needle, replacement) FROM test_tab ORDER BY id;
+
+SELECT '** replaceRegexpAll() **';
+
+SELECT '- non-const needle, const replacement';
+SELECT id, haystack, needle, 'x', replaceRegexpAll(haystack, needle, 'x') FROM test_tab ORDER BY id;
+
+SELECT '- const needle, non-const replacement';
+SELECT id, haystack, 'l', replacement, replaceRegexpAll(haystack, 'l', replacement) FROM test_tab ORDER BY id;
+
+SELECT '- non-const needle, non-const replacement';
+SELECT id, haystack, needle, replacement, replaceRegexpAll(haystack, needle, replacement) FROM test_tab ORDER BY id;
+
+SELECT '** replaceRegexpOne() **';
+
+SELECT '- non-const needle, const replacement';
+SELECT id, haystack, needle, 'x', replaceRegexpOne(haystack, needle, 'x') FROM test_tab ORDER BY id;
+
+
+SELECT '- const needle, non-const replacement';
+SELECT id, haystack, 'l', replacement, replaceRegexpOne(haystack, 'l', replacement) FROM test_tab ORDER BY id;
+
+
+SELECT '- non-const needle, non-const replacement';
+SELECT id, haystack, needle, replacement, replaceRegexpOne(haystack, needle, replacement) FROM test_tab ORDER BY id;
+
+DROP TABLE IF EXISTS test_tab;
+
+
+SELECT 'Check that an exception is thrown if the needle is empty';
+
+CREATE TABLE test_tab
+  (id UInt32, haystack String, needle String, replacement String)
+  engine = MergeTree()
+  ORDER BY id;
+
+INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'x') (2, 'Hello World', '', 'y')
+
+-- needle: non-const, replacement: const
+SELECT replaceAll(haystack, needle, 'x') FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT replaceOne(haystack, needle, 'x') FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT replaceRegexpAll(haystack, needle, 'x') FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT replaceRegexpOne(haystack, needle, 'x') FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+
+-- needle: const, replacement: non-const
+SELECT replaceAll(haystack, '', replacement) FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT replaceOne(haystack, '', replacement) FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT replaceRegexpAll(haystack, '', replacement) FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT replaceRegexpOne(haystack, '', replacement) FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+
+-- needle: non-const, replacement: non-const
+SELECT replaceAll(haystack, needle, replacement) FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT replaceOne(haystack, needle, replacement) FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT replaceRegexpAll(haystack, needle, replacement) FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+SELECT replaceRegexpOne(haystack, needle, replacement) FROM test_tab;  -- { serverError ARGUMENT_OUT_OF_BOUND }
+
+DROP TABLE IF EXISTS test_tab;

From 73ffdbe2f43a7f633de97b3caabbfab36b1b2c0c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Apr 2023 09:11:59 +0000
Subject: [PATCH 068/277] Minor fixups

---
 src/Functions/formatDateTime.cpp | 54 +++++++++++++++++---------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index be5b5f9b068..162c34803de 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -150,8 +150,8 @@ private:
         /// Joda format generally requires capturing extra variables (i.e. holding state) which is more convenient with
         /// std::function and std::bind. Unfortunately, std::function causes a performance degradation by 0.45x compared to raw function
         /// pointers. For MySQL format, we generally prefer raw function pointers. Because of the special case that not all formatters are
-        /// fixed-width formatters (see mysqlLiteral instruction ), we still need to be able to store state. For that reason, we use member
-        /// function pointers (which come with even uglier syntax) instead of static function pointers.
+        /// fixed-width formatters (see mysqlLiteral instruction), we still need to be able to store state. For that reason, we use member
+        /// function pointers instead of static function pointers.
         using FuncMysql = size_t (Instruction<Time>::*)(char *, Time, UInt64, UInt32, const DateLUTImpl &);
         FuncMysql func_mysql = nullptr;
 
@@ -173,8 +173,8 @@ private:
         void perform(char *& dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
         {
             size_t shift = func_mysql
-                           ? ((static_cast<Instruction<Time>*>(this))->*func_mysql)(dest, source, fractional_second, scale, timezone)
-                           : func_joda(dest, source, fractional_second, scale, timezone);
+                           ? std::invoke(func_mysql, this, dest, source, fractional_second, scale, timezone)
+                           : std::invoke(func_joda, dest, source, fractional_second, scale, timezone);
             dest += shift + extra_shift;
         }
 
@@ -674,7 +674,7 @@ private:
 
     static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format)
     {
-        constexpr std::array variable_width_formatter = {'M', 'W'};
+        static constexpr std::array variable_width_formatter = {'M', 'W'};
 
         for (size_t i = 0; i < format.size(); ++i)
         {
@@ -835,11 +835,15 @@ public:
             scale = times->getScale();
 
         /// For MySQL, we support two modes of execution:
-        /// - All formatters in the format string are fixed-width. As a result, all output rows will have the same with and structure. We
-        ///   take advantage of this and 1. create a "template" with placeholders from the format string, 2. allocate a result column large
-        ///   enough to store the template on each row, 3. copy the template into each result row 4. run instructions which replace the
-        ///   formatter placeholders. All other parts of the template (e.g. whitespaces) are already as desired and instructions skip over
-        ///   them (see 'extra_shift' in the formatters).
+        ///
+        /// - All formatters in the format string are fixed-width. As a result, all output rows will have the same width and structure. We
+        ///   take advantage of this and
+        ///     1. create a "template" with placeholders from the format string,
+        ///     2. allocate a result column large enough to store the template on each row,
+        ///     3. copy the template into each result row,
+        ///     4. run instructions which replace the formatter placeholders. All other parts of the template (e.g. whitespaces) are already
+        ///        as desired and instructions skip over them (see 'extra_shift' in the formatters).
+        ///
         /// - The format string contains at least one variable-width formatter. Output rows will potentially be of different size.
         ///   Steps 1. and 2. are performed as above (the result column is allocated based on a worst-case size estimation). The result
         ///   column rows are NOT populated with the template and left uninitialized. We run the normal instructions for formatters AND
@@ -1220,7 +1224,7 @@ public:
                     // AM or PM
                     case 'p':
                     {
-                        constexpr std::string_view val = "AM";
+                        static constexpr std::string_view val = "AM";
                         add_time_instruction(&Instruction<T>::mysqlAMPM, 2, val);
                         out_template += val;
                         break;
@@ -1229,7 +1233,7 @@ public:
                     // 12-hour HH:MM time, equivalent to %h:%i %p 2:55 PM
                     case 'r':
                     {
-                        constexpr std::string_view val = "12:00 AM";
+                        static constexpr std::string_view val = "12:00 AM";
                         add_time_instruction(&Instruction<T>::mysqlHHMM12, 8, val);
                         out_template += val;
                         break;
@@ -1238,7 +1242,7 @@ public:
                     // 24-hour HH:MM time, equivalent to %H:%i 14:55
                     case 'R':
                     {
-                        constexpr std::string_view val = "00:00";
+                        static constexpr std::string_view val = "00:00";
                         add_time_instruction(&Instruction<T>::mysqlHHMM24, 5, val);
                         out_template += val;
                         break;
@@ -1247,7 +1251,7 @@ public:
                     // Seconds
                     case 's':
                     {
-                        constexpr std::string_view val = "00";
+                        static constexpr std::string_view val = "00";
                         add_time_instruction(&Instruction<T>::mysqlSecond, 2, val);
                         out_template += val;
                         break;
@@ -1256,7 +1260,7 @@ public:
                     // Seconds
                     case 'S':
                     {
-                        constexpr std::string_view val = "00";
+                        static constexpr std::string_view val = "00";
                         add_time_instruction(&Instruction<T>::mysqlSecond, 2, val);
                         out_template += val;
                         break;
@@ -1265,7 +1269,7 @@ public:
                     // ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S 14:55:02
                     case 'T':
                     {
-                        constexpr std::string_view val = "00:00:00";
+                        static constexpr std::string_view val = "00:00:00";
                         add_time_instruction(&Instruction<T>::mysqlISO8601Time, 8, val);
                         out_template += val;
                         break;
@@ -1274,7 +1278,7 @@ public:
                     // Hour in 12h format (01-12)
                     case 'h':
                     {
-                        constexpr std::string_view val = "12";
+                        static constexpr std::string_view val = "12";
                         add_time_instruction(&Instruction<T>::mysqlHour12, 2, val);
                         out_template += val;
                         break;
@@ -1283,7 +1287,7 @@ public:
                     // Hour in 24h format (00-23)
                     case 'H':
                     {
-                        constexpr std::string_view val = "00";
+                        static constexpr std::string_view val = "00";
                         add_time_instruction(&Instruction<T>::mysqlHour24, 2, val);
                         out_template += val;
                         break;
@@ -1292,7 +1296,7 @@ public:
                     // Minute of hour range [0, 59]
                     case 'i':
                     {
-                        constexpr std::string_view val = "00";
+                        static constexpr std::string_view val = "00";
                         add_time_instruction(&Instruction<T>::mysqlMinute, 2, val);
                         out_template += val;
                         break;
@@ -1301,7 +1305,7 @@ public:
                     // Hour in 12h format (01-12)
                     case 'I':
                     {
-                        constexpr std::string_view val = "12";
+                        static constexpr std::string_view val = "12";
                         add_time_instruction(&Instruction<T>::mysqlHour12, 2, val);
                         out_template += val;
                         break;
@@ -1310,7 +1314,7 @@ public:
                     // Hour in 24h format (00-23)
                     case 'k':
                     {
-                        constexpr std::string_view val = "00";
+                        static constexpr std::string_view val = "00";
                         add_time_instruction(&Instruction<T>::mysqlHour24, 2, val);
                         out_template += val;
                         break;
@@ -1319,7 +1323,7 @@ public:
                     // Hour in 12h format (01-12)
                     case 'l':
                     {
-                        constexpr std::string_view val = "12";
+                        static constexpr std::string_view val = "12";
                         add_time_instruction(&Instruction<T>::mysqlHour12, 2, val);
                         out_template += val;
                         break;
@@ -1327,7 +1331,7 @@ public:
 
                     case 't':
                     {
-                        constexpr std::string_view val = "\t";
+                        static constexpr std::string_view val = "\t";
                         add_extra_shift_or_literal_instruction(1, val);
                         out_template += val;
                         break;
@@ -1335,7 +1339,7 @@ public:
 
                     case 'n':
                     {
-                        constexpr std::string_view val = "\n";
+                        static constexpr std::string_view val = "\n";
                         add_extra_shift_or_literal_instruction(1, val);
                         out_template += val;
                         break;
@@ -1344,7 +1348,7 @@ public:
                     // Escaped literal characters.
                     case '%':
                     {
-                        constexpr std::string_view val = "%";
+                        static constexpr std::string_view val = "%";
                         add_extra_shift_or_literal_instruction(1, val);
                         out_template += val;
                         break;

From 8f4fb2aa3b5ba36bc08b1df33492fe5b8feb07f5 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 4 Apr 2023 11:41:42 +0000
Subject: [PATCH 069/277] Fix fuzzed query.

---
 .../optimizeUseAggregateProjection.cpp        | 47 ++++++++++++-------
 ...rojections_and_duplicate_columms.reference |  0
 ...1710_projections_and_duplicate_columms.sql |  8 ++++
 3 files changed, 38 insertions(+), 17 deletions(-)
 create mode 100644 tests/queries/0_stateless/01710_projections_and_duplicate_columms.reference
 create mode 100644 tests/queries/0_stateless/01710_projections_and_duplicate_columms.sql

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index 27a548afb0e..61e6988ded1 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -245,6 +245,35 @@ std::optional<AggregateFunctionMatches> matchAggregateFunctions(
     return res;
 }
 
+static void appendAggregateFunctions(
+    ActionsDAG & proj_dag,
+    const AggregateDescriptions & aggregates,
+    const AggregateFunctionMatches & matched_aggregates)
+{
+    std::unordered_map<const AggregateDescription *, const ActionsDAG::Node *> inputs;
+
+    /// Just add all the aggregates to dag inputs.
+    auto & proj_dag_outputs =  proj_dag.getOutputs();
+    size_t num_aggregates = aggregates.size();
+    for (size_t i = 0; i < num_aggregates; ++i)
+    {
+        const auto & aggregate = aggregates[i];
+        const auto & match = matched_aggregates[i];
+        auto type = std::make_shared<DataTypeAggregateFunction>(aggregate.function, match.argument_types, aggregate.parameters);
+
+        auto & input = inputs[match.description];
+        if (!input)
+            input = &proj_dag.addInput(match.description->column_name, std::move(type));
+
+        const auto * node = input;
+
+        if (node->result_name != aggregate.column_name)
+            node = &proj_dag.addAlias(*node, aggregate.column_name);
+
+        proj_dag_outputs.push_back(node);
+    }
+}
+
 ActionsDAGPtr analyzeAggregateProjection(
     const AggregateProjectionInfo & info,
     const QueryDAG & query,
@@ -365,23 +394,7 @@ ActionsDAGPtr analyzeAggregateProjection(
     // LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Folding actions by projection");
 
     auto proj_dag = query.dag->foldActionsByProjection(new_inputs, query_key_nodes);
-
-    /// Just add all the aggregates to dag inputs.
-    auto & proj_dag_outputs =  proj_dag->getOutputs();
-    size_t num_aggregates = aggregates.size();
-    for (size_t i = 0; i < num_aggregates; ++i)
-    {
-        const auto & aggregate = aggregates[i];
-        const auto & match = (*matched_aggregates)[i];
-        auto type = std::make_shared<DataTypeAggregateFunction>(aggregate.function, match.argument_types, aggregate.parameters);
-        const auto * node = &proj_dag->addInput(match.description->column_name, std::move(type));
-
-        if (aggregate.column_name != match.description->column_name)
-            node = &proj_dag->addAlias(*node, aggregate.column_name);
-
-        proj_dag_outputs.push_back(node);
-    }
-
+    appendAggregateFunctions(*proj_dag, aggregates, *matched_aggregates);
     return proj_dag;
 }
 
diff --git a/tests/queries/0_stateless/01710_projections_and_duplicate_columms.reference b/tests/queries/0_stateless/01710_projections_and_duplicate_columms.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01710_projections_and_duplicate_columms.sql b/tests/queries/0_stateless/01710_projections_and_duplicate_columms.sql
new file mode 100644
index 00000000000..74a7aea418a
--- /dev/null
+++ b/tests/queries/0_stateless/01710_projections_and_duplicate_columms.sql
@@ -0,0 +1,8 @@
+drop table if exists projection_test__fuzz_0;
+set allow_suspicious_low_cardinality_types=1;
+
+CREATE TABLE projection_test__fuzz_0 (`sum(block_count)` UInt64, `domain_alias` UInt64 ALIAS length(domain), `datetime` DateTime, `domain` LowCardinality(String), `x_id` String, `y_id` String, `block_count` Int64, `retry_count` Int64, `duration` Decimal(76, 13), `kbytes` LowCardinality(Int64), `buffer_time` Int64, `first_time` UInt256, `total_bytes` LowCardinality(Nullable(UInt64)), `valid_bytes` Nullable(UInt64), `completed_bytes` Nullable(UInt64), `fixed_bytes` LowCardinality(Nullable(UInt64)), `force_bytes` Int256, PROJECTION p (SELECT toStartOfMinute(datetime) AS dt_m, countIf(first_time = 0) / count(), avg((kbytes * 8) / duration), count(), sum(block_count) / sum(duration), avg(block_count / duration), sum(buffer_time) / sum(duration), avg(buffer_time / duration), sum(valid_bytes) / sum(total_bytes), sum(completed_bytes) / sum(total_bytes), sum(fixed_bytes) / sum(total_bytes), sum(force_bytes) / sum(total_bytes), sum(valid_bytes) / sum(total_bytes), sum(retry_count) / sum(duration), avg(retry_count / duration), countIf(block_count > 0) / count(), countIf(first_time = 0) / count(), uniqHLL12(x_id), uniqHLL12(y_id) GROUP BY dt_m, domain)) ENGINE = MergeTree PARTITION BY toDate(datetime) ORDER BY (toStartOfTenMinutes(datetime), domain) SETTINGS index_granularity_bytes = 10000000;
+INSERT INTO projection_test__fuzz_0 SETTINGS max_threads = 1 WITH rowNumberInAllBlocks() AS id SELECT 1, toDateTime('2020-10-24 00:00:00') + (id / 20), toString(id % 100), * FROM generateRandom('x_id String, y_id String, block_count Int64, retry_count Int64, duration Int64, kbytes Int64, buffer_time Int64, first_time Int64, total_bytes Nullable(UInt64), valid_bytes Nullable(UInt64), completed_bytes Nullable(UInt64), fixed_bytes Nullable(UInt64), force_bytes Nullable(UInt64)', 10, 10, 1) LIMIT 1000 SETTINGS max_threads = 1;
+SELECT '-21474836.48', 10000000000., '', count(kbytes), '', 10.0001, toStartOfMinute(datetime) AS dt_m, 10, NULL FROM projection_test__fuzz_0 GROUP BY dt_m WITH ROLLUP WITH TOTALS ORDER BY count(retry_count / duration) ASC NULLS LAST, 100000000000000000000. ASC NULLS FIRST format Null;
+
+drop table projection_test__fuzz_0;

From e3b709d8c0edfcf8b4de1dfca8bb792f7efe7feb Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 4 Apr 2023 15:04:47 +0200
Subject: [PATCH 070/277] Update test

---
 tests/queries/0_stateless/02705_protobuf_debug_abort.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02705_protobuf_debug_abort.sh b/tests/queries/0_stateless/02705_protobuf_debug_abort.sh
index 4a66cfca352..4660044e269 100755
--- a/tests/queries/0_stateless/02705_protobuf_debug_abort.sh
+++ b/tests/queries/0_stateless/02705_protobuf_debug_abort.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest
+# Tags: no-fasttest, no-parallel
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 07147771e6fc66b827fe9c468da3f01ce6eaa226 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 4 Apr 2023 13:26:24 +0000
Subject: [PATCH 071/277] Reset downloader for cache file segment in
 TemporaryFileStream

---
 src/Interpreters/Cache/FileSegment.cpp   |  1 -
 src/Interpreters/TemporaryDataOnDisk.cpp | 32 +++++++++++++++++++++---
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index bd4554c6532..5377c544a48 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -429,7 +429,6 @@ bool FileSegment::reserve(size_t size_to_reserve)
     {
         std::unique_lock segment_lock(mutex);
 
-
         assertNotDetachedUnlocked(segment_lock);
         assertIsDownloaderUnlocked("reserve", segment_lock);
 
diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp
index c57de88d964..dc5d90ff61b 100644
--- a/src/Interpreters/TemporaryDataOnDisk.cpp
+++ b/src/Interpreters/TemporaryDataOnDisk.cpp
@@ -137,11 +137,18 @@ struct TemporaryFileStream::OutputWriter
             static_cast<const WriteBufferToFileSegment *>(out_buf.get())->getFileName());
     }
 
-    size_t write(const Block & block)
+    size_t write(const Block & block, bool flush = false)
     {
         if (finalized)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized stream");
         size_t written_bytes = out_writer.write(block);
+        if (flush)
+        {
+            out_compressed_buf.next();
+            out_buf->next();
+            out_writer.flush();
+        }
+
         num_rows += block.rows();
         return written_bytes;
     }
@@ -234,9 +241,12 @@ TemporaryFileStream::TemporaryFileStream(FileSegmentsHolder && segments_, const
 {
     if (segment_holder.file_segments.size() != 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream can be created only from single segment");
-    auto & segment = segment_holder.file_segments.front();
-    auto out_buf = std::make_unique<WriteBufferToFileSegment>(segment.get());
+    auto & file_segment = segment_holder.file_segments.front();
+    auto out_buf = std::make_unique<WriteBufferToFileSegment>(file_segment.get());
     out_writer = std::make_unique<OutputWriter>(std::move(out_buf), header);
+
+    /// `write` can be called from different thread, so we need to reset downloader
+    file_segment->completePartAndResetDownloader();
 }
 
 size_t TemporaryFileStream::write(const Block & block)
@@ -245,7 +255,21 @@ size_t TemporaryFileStream::write(const Block & block)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing has been finished");
 
     updateAllocAndCheck();
-    size_t bytes_written = out_writer->write(block);
+
+    size_t bytes_written = 0;
+    if (segment_holder.empty())
+    {
+        bytes_written = out_writer->write(block);
+    }
+    else
+    {
+        auto & file_segment = segment_holder.file_segments.front();
+        /// We need to reset downloader for each block,
+        /// without it `file_segment->reserve` can be called only by the same thread
+        file_segment->getOrSetDownloader();
+        bytes_written = out_writer->write(block, true);
+        file_segment->completePartAndResetDownloader();
+    }
     return bytes_written;
 }
 

From a3ef50c2c0780caa72472a0471005afe29bf5348 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 4 Apr 2023 16:06:21 +0200
Subject: [PATCH 072/277] Update
 src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp

---
 src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index 5d21b38ae85..6b59de27ff8 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -632,7 +632,7 @@ static ColumnWithTypeAndName readIPv4ColumnWithInt32Data(std::shared_ptr<arrow::
 {
     auto internal_type = std::make_shared<DataTypeIPv4>();
     auto internal_column = internal_type->createColumn();
-    auto & column_data = static_cast<ColumnIPv4 &>(*internal_column).getData();
+    auto & column_data = assert_cast<ColumnIPv4 &>(*internal_column).getData();
     column_data.reserve(arrow_column->length());
 
     for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i)

From fad053c4fe38241a29da7bc5a77d9945594d6113 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 4 Apr 2023 16:10:47 +0200
Subject: [PATCH 073/277] Make better

---
 .../Formats/Impl/CHColumnToArrowColumn.cpp           | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
index b61c586a636..e378d10ef7d 100644
--- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
+++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp
@@ -431,13 +431,12 @@ namespace DB
         const auto & internal_data = internal_column.getChars();
         size_t fixed_length = internal_column.getN();
         arrow::FixedSizeBinaryBuilder & builder = assert_cast<arrow::FixedSizeBinaryBuilder &>(*array_builder);
-        arrow::Status status;
 
         PaddedPODArray<UInt8> arrow_null_bytemap = revertNullByteMap(null_bytemap, start, end);
         const UInt8 * arrow_null_bytemap_raw_ptr = arrow_null_bytemap.empty() ? nullptr : arrow_null_bytemap.data();
 
         const uint8_t * data_start = reinterpret_cast<const uint8_t *>(internal_data.data() + start * fixed_length);
-        status = builder.AppendValues(data_start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
+        arrow::Status status = builder.AppendValues(data_start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
         checkStatus(status, write_column->getName(), format_name);
     }
 
@@ -453,13 +452,12 @@ namespace DB
         const auto & internal_data = internal_column.getData();
         size_t fixed_length = sizeof(IPv6);
         arrow::FixedSizeBinaryBuilder & builder = assert_cast<arrow::FixedSizeBinaryBuilder &>(*array_builder);
-        arrow::Status status;
 
         PaddedPODArray<UInt8> arrow_null_bytemap = revertNullByteMap(null_bytemap, start, end);
         const UInt8 * arrow_null_bytemap_raw_ptr = arrow_null_bytemap.empty() ? nullptr : arrow_null_bytemap.data();
 
         const uint8_t * data_start = reinterpret_cast<const uint8_t *>(internal_data.data()) + start * fixed_length;
-        status = builder.AppendValues(data_start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
+        arrow::Status status = builder.AppendValues(data_start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
         checkStatus(status, write_column->getName(), format_name);
     }
 
@@ -473,11 +471,10 @@ namespace DB
     {
         const auto & internal_data = assert_cast<const ColumnIPv4 &>(*write_column).getData();
         auto & builder = assert_cast<arrow::UInt32Builder &>(*array_builder);
-        arrow::Status status;
 
         PaddedPODArray<UInt8> arrow_null_bytemap = revertNullByteMap(null_bytemap, start, end);
         const UInt8 * arrow_null_bytemap_raw_ptr = arrow_null_bytemap.empty() ? nullptr : arrow_null_bytemap.data();
-        status = builder.AppendValues(&(internal_data.data() + start)->toUnderType(), end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
+        arrow::Status status = builder.AppendValues(&(internal_data.data() + start)->toUnderType(), end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
         checkStatus(status, write_column->getName(), format_name);
     }
 
@@ -589,13 +586,12 @@ namespace DB
         const auto & internal_data = internal_column.getData();
         size_t fixed_length = sizeof(typename ColumnType::ValueType);
         arrow::FixedSizeBinaryBuilder & builder = assert_cast<arrow::FixedSizeBinaryBuilder &>(*array_builder);
-        arrow::Status status;
 
         PaddedPODArray<UInt8> arrow_null_bytemap = revertNullByteMap(null_bytemap, start, end);
         const UInt8 * arrow_null_bytemap_raw_ptr = arrow_null_bytemap.empty() ? nullptr : arrow_null_bytemap.data();
 
         const uint8_t * data_start = reinterpret_cast<const uint8_t *>(internal_data.data()) + start * fixed_length;
-        status = builder.AppendValues(data_start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
+        arrow::Status status = builder.AppendValues(data_start, end - start, reinterpret_cast<const uint8_t *>(arrow_null_bytemap_raw_ptr));
         checkStatus(status, write_column->getName(), format_name);
     }
 

From 999a3889d01bb38dc4ede1f683dca25bc99c73f6 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 4 Apr 2023 14:22:33 +0000
Subject: [PATCH 074/277] Revert "Reset downloader for cache file segment in
 TemporaryFileStream"

This reverts commit 07147771e6fc66b827fe9c468da3f01ce6eaa226.
---
 src/Interpreters/Cache/FileSegment.cpp   |  1 +
 src/Interpreters/TemporaryDataOnDisk.cpp | 32 +++---------------------
 2 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index 5377c544a48..bd4554c6532 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -429,6 +429,7 @@ bool FileSegment::reserve(size_t size_to_reserve)
     {
         std::unique_lock segment_lock(mutex);
 
+
         assertNotDetachedUnlocked(segment_lock);
         assertIsDownloaderUnlocked("reserve", segment_lock);
 
diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp
index dc5d90ff61b..c57de88d964 100644
--- a/src/Interpreters/TemporaryDataOnDisk.cpp
+++ b/src/Interpreters/TemporaryDataOnDisk.cpp
@@ -137,18 +137,11 @@ struct TemporaryFileStream::OutputWriter
             static_cast<const WriteBufferToFileSegment *>(out_buf.get())->getFileName());
     }
 
-    size_t write(const Block & block, bool flush = false)
+    size_t write(const Block & block)
     {
         if (finalized)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized stream");
         size_t written_bytes = out_writer.write(block);
-        if (flush)
-        {
-            out_compressed_buf.next();
-            out_buf->next();
-            out_writer.flush();
-        }
-
         num_rows += block.rows();
         return written_bytes;
     }
@@ -241,12 +234,9 @@ TemporaryFileStream::TemporaryFileStream(FileSegmentsHolder && segments_, const
 {
     if (segment_holder.file_segments.size() != 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream can be created only from single segment");
-    auto & file_segment = segment_holder.file_segments.front();
-    auto out_buf = std::make_unique<WriteBufferToFileSegment>(file_segment.get());
+    auto & segment = segment_holder.file_segments.front();
+    auto out_buf = std::make_unique<WriteBufferToFileSegment>(segment.get());
     out_writer = std::make_unique<OutputWriter>(std::move(out_buf), header);
-
-    /// `write` can be called from different thread, so we need to reset downloader
-    file_segment->completePartAndResetDownloader();
 }
 
 size_t TemporaryFileStream::write(const Block & block)
@@ -255,21 +245,7 @@ size_t TemporaryFileStream::write(const Block & block)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing has been finished");
 
     updateAllocAndCheck();
-
-    size_t bytes_written = 0;
-    if (segment_holder.empty())
-    {
-        bytes_written = out_writer->write(block);
-    }
-    else
-    {
-        auto & file_segment = segment_holder.file_segments.front();
-        /// We need to reset downloader for each block,
-        /// without it `file_segment->reserve` can be called only by the same thread
-        file_segment->getOrSetDownloader();
-        bytes_written = out_writer->write(block, true);
-        file_segment->completePartAndResetDownloader();
-    }
+    size_t bytes_written = out_writer->write(block);
     return bytes_written;
 }
 

From 20278ef711b925481b867e29f2cdb1a98a72fe75 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 4 Apr 2023 16:44:52 +0200
Subject: [PATCH 075/277] Update test

---
 tests/queries/0_stateless/02705_protobuf_debug_abort.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02705_protobuf_debug_abort.sh b/tests/queries/0_stateless/02705_protobuf_debug_abort.sh
index 4660044e269..ec564d4c6fc 100755
--- a/tests/queries/0_stateless/02705_protobuf_debug_abort.sh
+++ b/tests/queries/0_stateless/02705_protobuf_debug_abort.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest, no-parallel
+# Tags: no-fasttest
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -9,10 +9,10 @@ echo 'syntax = "proto3";
 
 message Message {
     NotExisted x = 1;
-}' > 02705_schema.proto
+}' > 02705_schema_$CLICKHOUSE_TEST_UNIQUE_NAME.proto
 
 
-$CLICKHOUSE_LOCAL -q "select * from file(data.bin, Protobuf) settings format_schema='schema:Message'" 2>&1 | grep -c "CANNOT_PARSE_PROTOBUF_SCHEMA"
+$CLICKHOUSE_LOCAL -q "select * from file(data.bin, Protobuf) settings format_schema='02705_schema_$CLICKHOUSE_TEST_UNIQUE_NAME:Message'" 2>&1 | grep -c "CANNOT_PARSE_PROTOBUF_SCHEMA"
 
-rm 02705_schema.proto
+rm 02705_schema_$CLICKHOUSE_TEST_UNIQUE_NAME.proto
 

From 04be32216ad22f7d036a53811989926897457e85 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 4 Apr 2023 14:47:37 +0000
Subject: [PATCH 076/277] Allow write/read unnamed tuple as nested Message in
 Protobuf format

---
 src/Formats/ProtobufSerializer.cpp            | 24 +++++++++++++++++--
 ..._unnamed_tuple_as_nested_message.reference |  1 +
 ...rotobuf_unnamed_tuple_as_nested_message.sh | 24 +++++++++++++++++++
 3 files changed, 47 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.reference
 create mode 100755 tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh

diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp
index 4f3b19ac803..00ef659c0ff 100644
--- a/src/Formats/ProtobufSerializer.cpp
+++ b/src/Formats/ProtobufSerializer.cpp
@@ -3453,13 +3453,33 @@ namespace
                     const auto & tuple_data_type = assert_cast<const DataTypeTuple &>(*data_type);
                     size_t size_of_tuple = tuple_data_type.getElements().size();
 
-                    if (tuple_data_type.haveExplicitNames() && field_descriptor.message_type())
+                    if (field_descriptor.message_type())
                     {
+                        bool have_explicit_names = tuple_data_type.haveExplicitNames();
+                        Names element_names;
+                        if (have_explicit_names)
+                        {
+                            element_names = tuple_data_type.getElementNames();
+                        }
+                        else
+                        {
+                            /// Match unnamed Tuple elements and Message fields by position.
+                            size_t field_count = field_descriptor.message_type()->field_count();
+                            if (field_count != tuple_data_type.getElements().size())
+                                throw Exception(
+                                    ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS,
+                                    "The number of fields in Protobuf message ({}) is not equal to the number of elements in unnamed Tuple ({})",
+                                    field_count,
+                                    tuple_data_type.getElements().size());
+                            for (size_t i = 0; i != field_count; ++i)
+                                element_names.push_back(field_descriptor.message_type()->field(static_cast<int>(i))->name());
+                        }
+
                         /// Try to serialize as a nested message.
                         std::vector<size_t> used_column_indices;
                         auto message_serializer = buildMessageSerializerImpl(
                             size_of_tuple,
-                            tuple_data_type.getElementNames().data(),
+                            element_names.data(),
                             tuple_data_type.getElements().data(),
                             *field_descriptor.message_type(),
                             /* with_length_delimiter = */ false,
diff --git a/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.reference b/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.reference
new file mode 100644
index 00000000000..a01aba9895b
--- /dev/null
+++ b/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.reference
@@ -0,0 +1 @@
+(42,'Hello',[1,2,3])
diff --git a/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh b/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh
new file mode 100755
index 00000000000..af0ddc88145
--- /dev/null
+++ b/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+echo 'syntax = "proto3";
+
+message Nested {
+    int32 a = 1;
+    string b = 2;
+    repeated int32 c = 3;
+};
+
+message Message {
+    Nested x = 1;
+};' > 02707_schema_$CLICKHOUSE_TEST_UNIQUE_NAME.proto
+
+
+$CLICKHOUSE_LOCAL -q "select tuple(42, 'Hello', [1,2,3]) as x format Protobuf settings format_schema='02707_schema_$CLICKHOUSE_TEST_UNIQUE_NAME:Message'" | $CLICKHOUSE_LOCAL --input-format Protobuf --structure='x Tuple(UInt32, String, Array(UInt32))' -q "select * from table" --format_schema="02707_schema_$CLICKHOUSE_TEST_UNIQUE_NAME:Message"
+
+rm 02707_schema_$CLICKHOUSE_TEST_UNIQUE_NAME.proto
+

From a42af5e4ac6b104f7de6e0a0f3c5b48be825acb8 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 4 Apr 2023 16:56:42 +0200
Subject: [PATCH 077/277] Simplify get_free_port helper function

---
 tests/integration/helpers/cluster.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index a9a996e0a5f..d9c460fa562 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -115,14 +115,11 @@ def run_and_check(
     return out
 
 
-# Based on https://stackoverflow.com/questions/2838244/get-open-tcp-port-in-python/2838309#2838309
+# Based on https://stackoverflow.com/a/1365284/3706827
 def get_free_port():
-    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    s.bind(("", 0))
-    s.listen(1)
-    port = s.getsockname()[1]
-    s.close()
-    return port
+    with socket.socket() as s:
+        s.bind(("", 0))
+        return s.getsockname()[1]
 
 
 def retry_exception(num, delay, func, exception=Exception, *args, **kwargs):

From 353434f8f122f5f2cb679b10bdb8f900ffe903a5 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 4 Apr 2023 17:14:50 +0200
Subject: [PATCH 078/277] Fix possible SYSTEM SYNC REPLICA stuck in case of
 DROP/REPLACE PARTITION

In case of DROP/REPLACE PARTITION the entries from the queue will be
removed without notifying the subscribes.

CI: https://s3.amazonaws.com/clickhouse-test-reports/48242/4a315cd0d17bc12edd934fd25663b0119880f207/stress_test__asan_.html
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 34cdefc99e0..c8ce55f9600 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1134,12 +1134,14 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange(
         bool replace_range_covered = covering_entry && checkReplaceRangeCanBeRemoved(part_info, *it, *covering_entry);
         if (simple_op_covered || replace_range_covered)
         {
+            const String & znode_name = (*it)->znode_name;
+
             if ((*it)->currently_executing)
                 to_wait.push_back(*it);
 
-            auto code = zookeeper->tryRemove(fs::path(replica_path) / "queue" / (*it)->znode_name);
+            auto code = zookeeper->tryRemove(fs::path(replica_path) / "queue" / znode_name);
             if (code != Coordination::Error::ZOK)
-                LOG_INFO(log, "Couldn't remove {}: {}", (fs::path(replica_path) / "queue" / (*it)->znode_name).string(), Coordination::errorMessage(code));
+                LOG_INFO(log, "Couldn't remove {}: {}", (fs::path(replica_path) / "queue" / znode_name).string(), Coordination::errorMessage(code));
 
             updateStateOnQueueEntryRemoval(
                 *it, /* is_successful = */ false,
@@ -1147,6 +1149,7 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange(
 
             (*it)->removed_by_other_entry = true;
             it = queue.erase(it);
+            notifySubscribers(queue.size(), &znode_name);
             ++removed_entries;
         }
         else

From 5bcbdc958aab6aaa51ff3829dda98e3952366af0 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 4 Apr 2023 17:18:24 +0200
Subject: [PATCH 079/277] Reduce probability of reusing the same port

---
 tests/integration/helpers/cluster.py | 72 ++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 8 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index d9c460fa562..b2aedfce3ca 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -467,10 +467,10 @@ class ClickHouseCluster:
         # available when with_kafka == True
         self.kafka_host = "kafka1"
         self.kafka_dir = os.path.join(self.instances_dir, "kafka")
-        self.kafka_port = get_free_port()
+        self._kafka_port = 0
         self.kafka_docker_id = None
         self.schema_registry_host = "schema-registry"
-        self.schema_registry_port = get_free_port()
+        self._schema_registry_port = 0
         self.kafka_docker_id = self.get_instance_docker_id(self.kafka_host)
 
         self.coredns_host = "coredns"
@@ -478,7 +478,7 @@ class ClickHouseCluster:
         # available when with_kerberozed_kafka == True
         # reuses kafka_dir
         self.kerberized_kafka_host = "kerberized_kafka1"
-        self.kerberized_kafka_port = get_free_port()
+        self._kerberized_kafka_port = 0
         self.kerberized_kafka_docker_id = self.get_instance_docker_id(
             self.kerberized_kafka_host
         )
@@ -489,15 +489,15 @@ class ClickHouseCluster:
 
         # available when with_mongo == True
         self.mongo_host = "mongo1"
-        self.mongo_port = get_free_port()
+        self._mongo_port = 0
         self.mongo_no_cred_host = "mongo2"
-        self.mongo_no_cred_port = get_free_port()
+        self._mongo_no_cred_port = 0
 
         # available when with_meili == True
         self.meili_host = "meili1"
-        self.meili_port = get_free_port()
+        self._meili_port = 0
         self.meili_secure_host = "meili_secure"
-        self.meili_secure_port = get_free_port()
+        self._meili_secure_port = 0
 
         # available when with_cassandra == True
         self.cassandra_host = "cassandra1"
@@ -527,7 +527,7 @@ class ClickHouseCluster:
 
         # available when with_redis == True
         self.redis_host = "redis1"
-        self.redis_port = get_free_port()
+        self._redis_port = 0
 
         # available when with_postgres == True
         self.postgres_host = "postgres1"
@@ -613,6 +613,62 @@ class ClickHouseCluster:
             shutil.rmtree(self.instances_dir, ignore_errors=True)
             logging.debug(f"Removed :{self.instances_dir}")
 
+    @property
+    def kafka_port(self):
+        if self._kafka_port:
+            return self._kafka_port
+        self._kafka_port = get_free_port()
+        return self._kafka_port
+
+    @property
+    def schema_registry_port(self):
+        if self._schema_registry_port:
+            return self._schema_registry_port
+        self._schema_registry_port = get_free_port()
+        return self._schema_registry_port
+
+    @property
+    def kerberized_kafka_port(self):
+        if self._kerberized_kafka_port:
+            return self._kerberized_kafka_port
+        self._kerberized_kafka_port = get_free_port()
+        return self._kerberized_kafka_port
+
+    @property
+    def mongo_port(self):
+        if self._mongo_port:
+            return self._mongo_port
+        self._mongo_port = get_free_port()
+        return self._mongo_port
+
+    @property
+    def mongo_no_cred_port(self):
+        if self._mongo_no_cred_port:
+            return self._mongo_no_cred_port
+        self._mongo_no_cred_port = get_free_port()
+        return self._mongo_no_cred_port
+
+    @property
+    def meili_port(self):
+        if self._meili_port:
+            return self._meili_port
+        self._meili_port = get_free_port()
+        return self._meili_port
+
+    @property
+    def meili_secure_port(self):
+        if self._meili_secure_port:
+            return self._meili_secure_port
+        self._meili_secure_port = get_free_port()
+        return self._meili_secure_port
+
+    @property
+    def redis_port(self):
+        if self._redis_port:
+            return self._redis_port
+        self._redis_port = get_free_port()
+        return self._redis_port
+
     def print_all_docker_pieces(self):
         res_networks = subprocess.check_output(
             f"docker network ls --filter name='{self.project_name}*'",

From 50c8472c57c7e7f94feececfee8861d1848562db Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 4 Apr 2023 14:31:33 +0000
Subject: [PATCH 080/277] Reset downloader in
 WriteBufferToFileSegment::nextImpl

---
 src/Interpreters/Cache/WriteBufferToFileSegment.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp
index 08c083b1976..a8dfeb214d0 100644
--- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp
+++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp
@@ -2,6 +2,8 @@
 #include <Interpreters/Cache/FileSegment.h>
 #include <IO/SwapHelper.h>
 
+#include <base/scope_guard.h>
+
 #include <Common/logger_useful.h>
 
 namespace DB
@@ -10,20 +12,23 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NOT_ENOUGH_SPACE;
-    extern const int LOGICAL_ERROR;
 }
 
 WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_)
     : WriteBufferFromFileDecorator(file_segment_->detachWriter()), file_segment(file_segment_)
 {
-    auto downloader = file_segment->getOrSetDownloader();
-    if (downloader != FileSegment::getCallerId())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to set a downloader. ({})", file_segment->getInfoForLog());
 }
 
 /// If it throws an exception, the file segment will be incomplete, so you should not use it in the future.
 void WriteBufferToFileSegment::nextImpl()
 {
+    auto downloader [[maybe_unused]] = file_segment->getOrSetDownloader();
+    chassert(downloader == FileSegment::getCallerId());
+
+    SCOPE_EXIT({
+        file_segment->completePartAndResetDownloader();
+    });
+
     size_t bytes_to_write = offset();
 
     /// In case of an error, we don't need to finalize the file segment

From 9235d1cde214b2a200cc7994a860a35a40e99d10 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 21 Feb 2023 17:23:26 +0100
Subject: [PATCH 081/277] Fix system.query_views_log for MVs that are pushed
 from background threads

Some of such places:
- push from Buffer
- push from Distributed sends
- system.*_log workers

Before #47564 it simply does not work, but after it throws LOGICAL_ERROR
in such situation.

v2: remove expired() check after #46709 got merged
v3: use ThreadGroupStatus ctor with ContextPtr (after #47564)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/ThreadStatusExt.cpp          |  3 ++
 .../Transforms/buildPushingToViewsChain.cpp   |  9 +++--
 ...uery_views_log_background_thread.reference | 19 ++++++++++
 ...2572_query_views_log_background_thread.sql | 35 +++++++++++++++++++
 4 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02572_query_views_log_background_thread.reference
 create mode 100644 tests/queries/0_stateless/02572_query_views_log_background_thread.sql

diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index b4e1da2c697..e518541b984 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -480,7 +480,10 @@ void ThreadStatus::logToQueryViewsLog(const ViewRuntimeData & vinfo)
 {
     auto query_context_ptr = query_context.lock();
     if (!query_context_ptr)
+    {
+        LOG_ERROR(log, "No query context, query_views_log will not be written (this should never happen)");
         return;
+    }
 
     auto views_log = query_context_ptr->getQueryViewsLog();
     if (!views_log)
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 5ab1e811efb..91845bc18ad 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -270,11 +270,14 @@ Chain buildPushingToViewsChain(
         ASTPtr query;
         Chain out;
 
+        /// NOTE: ThreadGroupStatus always should have context attached,
+        /// otherwise entry to the system.query_views_log will not be added
+        /// (see ThreadStatus::logToQueryViewsLog())
         ThreadGroupStatusPtr running_group;
-        if (current_thread && current_thread->getThreadGroup())
+        if (current_thread)
             running_group = current_thread->getThreadGroup();
-        else
-            running_group = std::make_shared<ThreadGroupStatus>();
+        if (!running_group)
+            running_group = std::make_shared<ThreadGroupStatus>(context);
 
         /// We are creating a ThreadStatus per view to store its metrics individually
         /// Since calling ThreadStatus() changes current_thread we save it and restore it after the calls
diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference
new file mode 100644
index 00000000000..eeba62c5dc8
--- /dev/null
+++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference
@@ -0,0 +1,19 @@
+-- { echoOn }
+insert into buffer_02572 values (1);
+-- ensure that the flush was not direct
+select * from data_02572;
+select * from copy_02572;
+-- we cannot use OPTIMIZE, this will attach query context, so let's wait
+select sleepEachRow(1) from numbers(3*2) format Null;
+select * from data_02572;
+1
+select * from copy_02572;
+1
+system flush logs;
+select count() > 0, lower(status::String), errorCodeToName(exception_code)
+    from system.query_views_log where
+    view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and
+    view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572')
+    group by 2, 3
+;
+1	queryfinish	OK
diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql
new file mode 100644
index 00000000000..dc229412b13
--- /dev/null
+++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql
@@ -0,0 +1,35 @@
+-- INSERT buffer_02572 -> data_02572 -> copy_02572
+--                                   ^^
+--                             push to system.query_views_log
+
+drop table if exists buffer_02572;
+drop table if exists data_02572;
+drop table if exists copy_02572;
+drop table if exists mv_02572;
+
+create table copy_02572 (key Int) engine=Memory();
+create table data_02572 (key Int) engine=Memory();
+create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1,
+    /* never direct flush for flush from background thread */
+    /* min_time= */ 3, 3,
+    1, 1e9,
+    1, 1e9);
+create materialized view mv_02572 to copy_02572 as select * from data_02572;
+
+-- { echoOn }
+insert into buffer_02572 values (1);
+-- ensure that the flush was not direct
+select * from data_02572;
+select * from copy_02572;
+-- we cannot use OPTIMIZE, this will attach query context, so let's wait
+select sleepEachRow(1) from numbers(3*2) format Null;
+select * from data_02572;
+select * from copy_02572;
+
+system flush logs;
+select count() > 0, lower(status::String), errorCodeToName(exception_code)
+    from system.query_views_log where
+    view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and
+    view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572')
+    group by 2, 3
+;

From 5a9acac12f8abe6e93d71d1cda9723500e7de9e5 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 4 Apr 2023 16:00:09 +0000
Subject: [PATCH 082/277] Clarification and cleanup

---
 src/Interpreters/FillingRow.cpp               | 19 +++++++++
 src/Interpreters/FillingRow.h                 |  6 ++-
 .../Transforms/FillingTransform.cpp           | 42 +++++++++++++++----
 src/Processors/Transforms/FillingTransform.h  | 10 ++---
 .../00995_order_by_with_fill.reference        | 36 ++++++++++++----
 .../0_stateless/00995_order_by_with_fill.sql  | 12 ++----
 6 files changed, 91 insertions(+), 34 deletions(-)

diff --git a/src/Interpreters/FillingRow.cpp b/src/Interpreters/FillingRow.cpp
index 5c2ad548c93..503269fd8ef 100644
--- a/src/Interpreters/FillingRow.cpp
+++ b/src/Interpreters/FillingRow.cpp
@@ -1,5 +1,6 @@
 #include <Interpreters/FillingRow.h>
 #include <Common/FieldVisitorsAccurateComparison.h>
+#include <IO/Operators.h>
 
 
 namespace DB
@@ -107,4 +108,22 @@ void FillingRow::initFromDefaults(size_t from_pos)
         row[i] = getFillDescription(i).fill_from;
 }
 
+String FillingRow::dump() const
+{
+    WriteBufferFromOwnString out;
+    for (size_t i = 0; i < row.size(); ++i)
+    {
+        if (i != 0)
+            out << ", ";
+        out << row[i].dump();
+    }
+    return out.str();
+}
+
+WriteBuffer & operator<<(WriteBuffer & out, const FillingRow & row)
+{
+    out << row.dump();
+    return out;
+}
+
 }
diff --git a/src/Interpreters/FillingRow.h b/src/Interpreters/FillingRow.h
index 8d47094d0de..9c5828d4282 100644
--- a/src/Interpreters/FillingRow.h
+++ b/src/Interpreters/FillingRow.h
@@ -1,7 +1,5 @@
 #pragma once
 #include <Core/SortDescription.h>
-#include <Core/InterpolateDescription.h>
-#include <Columns/IColumn.h>
 
 
 namespace DB
@@ -34,9 +32,13 @@ public:
     int getDirection(size_t index) const { return sort_description[index].direction; }
     FillColumnDescription & getFillDescription(size_t index) { return sort_description[index].fill_description; }
 
+    String dump() const;
+
 private:
     Row row;
     SortDescription sort_description;
 };
 
+WriteBuffer & operator<<(WriteBuffer & out, const FillingRow & row);
+
 }
diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp
index 4a729863200..2efd639a8bf 100644
--- a/src/Processors/Transforms/FillingTransform.cpp
+++ b/src/Processors/Transforms/FillingTransform.cpp
@@ -8,11 +8,29 @@
 #include <Functions/FunctionDateOrDateTimeAddInterval.h>
 #include <Common/FieldVisitorSum.h>
 #include <Common/FieldVisitorToString.h>
+#include <Common/logger_useful.h>
 
 
 namespace DB
 {
 
+constexpr bool debug_logging_enabled = true;
+
+template <typename T>
+void logDebug(String key, const T & value, const char * separator = " : ")
+{
+    if constexpr (debug_logging_enabled)
+    {
+        WriteBufferFromOwnString ss;
+        if constexpr (std::is_pointer_v<T>)
+            ss << *value;
+        else
+            ss << value;
+
+        LOG_DEBUG(&Poco::Logger::get("FillingTransform"), "{}{}{}", key, separator, ss.str());
+    }
+}
+
 namespace ErrorCodes
 {
     extern const int INVALID_WITH_FILL_EXPRESSION;
@@ -233,6 +251,9 @@ FillingTransform::FillingTransform(
             interpolate_column_positions.push_back(header_.getPositionByName(name));
 }
 
+/// prepair() is overrididen to cover cases when we need to generate rows for no input (so chunk in transform() will have no rows)
+/// (1) when all data are processed and WITH FILL .. TO is provided, we may need to generate suffix
+/// (2) for empty result set when WITH FILL FROM .. TO is provided (see PR #30888) (first and generate_suffix are both true)
 IProcessor::Status FillingTransform::prepare()
 {
     if (input.isFinished() && !output.isFinished() && !has_input && !generate_suffix)
@@ -244,8 +265,8 @@ IProcessor::Status FillingTransform::prepare()
 
         if (first || filling_row < next_row)
         {
-            /// Output if has data.
-            if (has_output)
+            /// push output data to output port if we can
+            if (has_output && output.canPush())
             {
                 output.pushData(std::move(output_data));
                 has_output = false;
@@ -370,9 +391,6 @@ void FillingTransform::initColumns(
 
 void FillingTransform::transform(Chunk & chunk)
 {
-    if (!chunk.hasRows() && !generate_suffix)
-        return;
-
     Columns old_fill_columns;
     Columns old_interpolate_columns;
     Columns old_other_columns;
@@ -385,6 +403,8 @@ void FillingTransform::transform(Chunk & chunk)
 
     if (generate_suffix)
     {
+        chassert(!chunk.hasRows());
+
         const auto & empty_columns = input.getHeader().getColumns();
         initColumns(
             empty_columns,
@@ -405,11 +425,10 @@ void FillingTransform::transform(Chunk & chunk)
             insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
         }
 
-        interpolate(result_columns, interpolate_block);
         while (filling_row.next(next_row))
         {
-            insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
             interpolate(result_columns, interpolate_block);
+            insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
         }
 
         size_t num_output_rows = result_columns[0]->size();
@@ -453,7 +472,12 @@ void FillingTransform::transform(Chunk & chunk)
 
     for (size_t row_ind = 0; row_ind < num_rows; ++row_ind)
     {
+        logDebug("row", row_ind);
+        logDebug("filling_row", filling_row);
+        logDebug("next_row", next_row);
+
         should_insert_first = next_row < filling_row;
+        logDebug("should_insert_first", true);
 
         for (size_t i = 0, size = filling_row.size(); i < size; ++i)
         {
@@ -465,6 +489,7 @@ void FillingTransform::transform(Chunk & chunk)
             else
                 next_row[i] = fill_to;
         }
+        logDebug("next_row updated", next_row);
 
         /// A case, when at previous step row was initialized from defaults 'fill_from' values
         ///  and probably we need to insert it to block.
@@ -474,11 +499,10 @@ void FillingTransform::transform(Chunk & chunk)
             insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
         }
 
-        interpolate(result_columns, interpolate_block);
         while (filling_row.next(next_row))
         {
-            insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
             interpolate(result_columns, interpolate_block);
+            insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
         }
 
         copyRowFromColumns(res_fill_columns, old_fill_columns, row_ind);
diff --git a/src/Processors/Transforms/FillingTransform.h b/src/Processors/Transforms/FillingTransform.h
index 5331254b08c..0f917e8889a 100644
--- a/src/Processors/Transforms/FillingTransform.h
+++ b/src/Processors/Transforms/FillingTransform.h
@@ -1,9 +1,9 @@
 #pragma once
 
-#include <Processors/ISimpleTransform.h>
-#include <Core/SortDescription.h>
 #include <Core/InterpolateDescription.h>
+#include <Core/SortDescription.h>
 #include <Interpreters/FillingRow.h>
+#include <Processors/ISimpleTransform.h>
 
 
 namespace DB
@@ -29,7 +29,7 @@ protected:
 
 private:
     void saveLastRow(const MutableColumns & result_columns);
-    void interpolate(const MutableColumns& result_columns, Block & interpolate_block);
+    void interpolate(const MutableColumns & result_columns, Block & interpolate_block);
 
     using MutableColumnRawPtrs = std::vector<IColumn *>;
     void initColumns(
@@ -54,8 +54,8 @@ private:
     Positions other_column_positions;
     std::vector<std::pair<size_t, NameAndTypePair>> input_positions; /// positions in result columns required for actions
     ExpressionActionsPtr interpolate_actions;
-    bool first = true;
-    bool generate_suffix = false;
+    bool first = true;              /// flag to determine if transform is/will be called for the first time
+    bool generate_suffix = false;   /// flag to determine if we need to generate filling rows after latest chunk is processed (only for WITH FILL ... TO)
 
     Columns last_row;
 
diff --git a/tests/queries/0_stateless/00995_order_by_with_fill.reference b/tests/queries/0_stateless/00995_order_by_with_fill.reference
index 0036aabda40..4863c83c544 100644
--- a/tests/queries/0_stateless/00995_order_by_with_fill.reference
+++ b/tests/queries/0_stateless/00995_order_by_with_fill.reference
@@ -1,4 +1,9 @@
-*** table without fill to compare ***
+--{ echoOn }
+DROP TABLE IF EXISTS fill;
+CREATE TABLE fill (date Date, val Int, str String) ENGINE = Memory;
+INSERT INTO fill VALUES (toDate('2019-05-24'), 13, 'sd0')(toDate('2019-05-10'), 16, 'vp7')(toDate('2019-05-25'), 17, '0ei')(toDate('2019-05-30'), 18, '3kd')(toDate('2019-05-15'), 27, 'enb')(toDate('2019-06-04'), 5, '6az')(toDate('2019-05-23'), 15, '01v')(toDate('2019-05-08'), 28, 'otf')(toDate('2019-05-19'), 20, 'yfh')(toDate('2019-05-07'), 26, '2ke')(toDate('2019-05-07'), 18, 'prh')(toDate('2019-05-09'), 25, '798')(toDate('2019-05-10'), 1, 'myj')(toDate('2019-05-11'), 18, '3s2')(toDate('2019-05-23'), 29, '72y');
+-- *** table without fill to compare ***
+SELECT * FROM fill ORDER BY date, val;
 2019-05-07	18	prh
 2019-05-07	26	2ke
 2019-05-08	28	otf
@@ -14,7 +19,9 @@
 2019-05-25	17	0ei
 2019-05-30	18	3kd
 2019-06-04	5	6az
-*** date WITH FILL, val ***
+-- Some useful cases
+
+SELECT * FROM fill ORDER BY date WITH FILL, val;
 2019-05-07	18	prh
 2019-05-07	26	2ke
 2019-05-08	28	otf
@@ -47,7 +54,7 @@
 2019-06-02	0	
 2019-06-03	0	
 2019-06-04	5	6az
-*** date WITH FILL FROM 2019-05-01 TO 2019-05-31, val WITH FILL ***
+SELECT * FROM fill ORDER BY date WITH FILL FROM toDate('2019-05-01') TO toDate('2019-05-31'), val WITH FILL;
 2019-05-01	0	
 2019-05-02	0	
 2019-05-03	0	
@@ -116,7 +123,7 @@
 2019-05-29	0	
 2019-05-30	18	3kd
 2019-06-04	5	6az
-*** date DESC WITH FILL, val WITH FILL FROM 1 TO 6 ***
+SELECT * FROM fill ORDER BY date DESC WITH FILL, val WITH FILL FROM 1 TO 6;
 2019-06-04	1	
 2019-06-04	2	
 2019-06-04	3	
@@ -275,7 +282,9 @@
 2019-05-07	5	
 2019-05-07	18	prh
 2019-05-07	26	2ke
-*** date DESC WITH FILL TO 2019-05-01 STEP -2, val DESC WITH FILL FROM 10 TO -5 STEP -3 ***
+-- Some weird cases
+
+SELECT * FROM fill ORDER BY date DESC WITH FILL TO toDate('2019-05-01') STEP -2, val DESC WITH FILL FROM 10 TO -5 STEP -3;
 2019-06-04	10	
 2019-06-04	7	
 2019-06-04	5	6az
@@ -376,7 +385,7 @@
 2019-05-03	4	
 2019-05-03	1	
 2019-05-03	-2	
-*** date WITH FILL TO 2019-06-23 STEP 3, val WITH FILL FROM -10 STEP 2
+SELECT * FROM fill ORDER BY date WITH FILL TO toDate('2019-06-23') STEP 3, val WITH FILL FROM -10 STEP 2;
 2019-05-07	-10	
 2019-05-07	-8	
 2019-05-07	-6	
@@ -463,14 +472,18 @@
 2019-06-15	-10	
 2019-06-18	-10	
 2019-06-21	-10	
-*** table without fill to compare ***
+DROP TABLE fill;
+CREATE TABLE fill (a UInt32, b Int32) ENGINE = Memory;
+INSERT INTO fill VALUES (1, -2), (1, 3), (3, 2), (5, -1), (6, 5), (8, 0);
+-- *** table without fill to compare ***
+SELECT * FROM fill ORDER BY a, b;
 1	-2
 1	3
 3	2
 5	-1
 6	5
 8	0
-*** a WITH FILL, b WITH fill ***
+SELECT * FROM fill ORDER BY a WITH FILL, b WITH fill;
 1	-2
 1	-1
 1	0
@@ -484,7 +497,7 @@
 6	5
 7	0
 8	0
-*** a WITH FILL, b WITH fill TO 6 STEP 2 ***
+SELECT * FROM fill ORDER BY a WITH FILL, b WITH fill TO 6 STEP 2;
 1	-2
 1	0
 1	2
@@ -503,3 +516,8 @@
 8	0
 8	2
 8	4
+SELECT * FROM fill ORDER BY a WITH FILL STEP -1; -- { serverError 475 }
+SELECT * FROM fill ORDER BY a WITH FILL FROM 10 TO 1; -- { serverError 475 }
+SELECT * FROM fill ORDER BY a DESC WITH FILL FROM 1 TO 10; -- { serverError 475 }
+SELECT * FROM fill ORDER BY a WITH FILL FROM -10 to 10; -- { serverError 475 }
+DROP TABLE fill;
diff --git a/tests/queries/0_stateless/00995_order_by_with_fill.sql b/tests/queries/0_stateless/00995_order_by_with_fill.sql
index 7f7f85bdb5b..fe7a6e5d4ce 100644
--- a/tests/queries/0_stateless/00995_order_by_with_fill.sql
+++ b/tests/queries/0_stateless/00995_order_by_with_fill.sql
@@ -1,40 +1,34 @@
+--{ echoOn }
 DROP TABLE IF EXISTS fill;
 CREATE TABLE fill (date Date, val Int, str String) ENGINE = Memory;
 INSERT INTO fill VALUES (toDate('2019-05-24'), 13, 'sd0')(toDate('2019-05-10'), 16, 'vp7')(toDate('2019-05-25'), 17, '0ei')(toDate('2019-05-30'), 18, '3kd')(toDate('2019-05-15'), 27, 'enb')(toDate('2019-06-04'), 5, '6az')(toDate('2019-05-23'), 15, '01v')(toDate('2019-05-08'), 28, 'otf')(toDate('2019-05-19'), 20, 'yfh')(toDate('2019-05-07'), 26, '2ke')(toDate('2019-05-07'), 18, 'prh')(toDate('2019-05-09'), 25, '798')(toDate('2019-05-10'), 1, 'myj')(toDate('2019-05-11'), 18, '3s2')(toDate('2019-05-23'), 29, '72y');
 
-SELECT '*** table without fill to compare ***';
+-- *** table without fill to compare ***
 SELECT * FROM fill ORDER BY date, val;
 
 -- Some useful cases
 
-SELECT '*** date WITH FILL, val ***';
 SELECT * FROM fill ORDER BY date WITH FILL, val;
 
-SELECT '*** date WITH FILL FROM 2019-05-01 TO 2019-05-31, val WITH FILL ***';
 SELECT * FROM fill ORDER BY date WITH FILL FROM toDate('2019-05-01') TO toDate('2019-05-31'), val WITH FILL;
 
-SELECT '*** date DESC WITH FILL, val WITH FILL FROM 1 TO 6 ***';
 SELECT * FROM fill ORDER BY date DESC WITH FILL, val WITH FILL FROM 1 TO 6;
 
 -- Some weird cases
 
-SELECT '*** date DESC WITH FILL TO 2019-05-01 STEP -2, val DESC WITH FILL FROM 10 TO -5 STEP -3 ***';
 SELECT * FROM fill ORDER BY date DESC WITH FILL TO toDate('2019-05-01') STEP -2, val DESC WITH FILL FROM 10 TO -5 STEP -3;
 
-SELECT '*** date WITH FILL TO 2019-06-23 STEP 3, val WITH FILL FROM -10 STEP 2';
 SELECT * FROM fill ORDER BY date WITH FILL TO toDate('2019-06-23') STEP 3, val WITH FILL FROM -10 STEP 2;
 
 DROP TABLE fill;
 CREATE TABLE fill (a UInt32, b Int32) ENGINE = Memory;
 INSERT INTO fill VALUES (1, -2), (1, 3), (3, 2), (5, -1), (6, 5), (8, 0);
 
-SELECT '*** table without fill to compare ***';
+-- *** table without fill to compare ***
 SELECT * FROM fill ORDER BY a, b;
 
-SELECT '*** a WITH FILL, b WITH fill ***';
 SELECT * FROM fill ORDER BY a WITH FILL, b WITH fill;
 
-SELECT '*** a WITH FILL, b WITH fill TO 6 STEP 2 ***';
 SELECT * FROM fill ORDER BY a WITH FILL, b WITH fill TO 6 STEP 2;
 
 SELECT * FROM fill ORDER BY a WITH FILL STEP -1; -- { serverError 475 }

From 9399a628dcca9592a5bf301ab62b8a645f7a1468 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 4 Apr 2023 16:00:32 +0000
Subject: [PATCH 083/277] Cleanup mess in .clang-tidy

This fixes some mess I left in .clang-tidy (#43863) when clang-tidy
build times were too bad and had to be reduced urgently.

Verified locally that runtime isn't affected by this PR.
---
 .clang-tidy | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index f8622039f29..7f78143ec3d 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -110,6 +110,7 @@ Checks: '*,
     -misc-const-correctness,
     -misc-no-recursion,
     -misc-non-private-member-variables-in-classes,
+    -misc-confusable-identifiers, # useful but slooow
 
     -modernize-avoid-c-arrays,
     -modernize-concat-nested-namespaces,
@@ -148,19 +149,6 @@ Checks: '*,
     -readability-use-anyofallof,
 
     -zirkon-*,
-
-    -misc-*, # temporarily disabled due to being too slow
-    # also disable checks in other categories which are aliases of checks in misc-*:
-    # https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/checks/list.html
-    -cert-dcl54-cpp,                                            # alias of misc-new-delete-overloads
-    -hicpp-new-delete-operators,                                # alias of misc-new-delete-overloads
-    -cert-fio38-c,                                              # alias of misc-non-copyable-objects
-    -cert-dcl03-c,                                              # alias of misc-static-assert
-    -hicpp-static-assert,                                       # alias of misc-static-assert
-    -cert-err09-cpp,                                            # alias of misc-throw-by-value-catch-by-reference
-    -cert-err61-cpp,                                            # alias of misc-throw-by-value-catch-by-reference
-    -cppcoreguidelines-c-copy-assignment-signature,             # alias of misc-unconventional-assign-operator
-    -cppcoreguidelines-non-private-member-variables-in-classes, # alias of misc-non-private-member-variables-in-classes
 '
 
 WarningsAsErrors: '*'

From 1fb0292fcbcc80b665c6922a55878e7b20fb5623 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 4 Apr 2023 16:13:21 +0000
Subject: [PATCH 084/277] polishing

---
 src/Interpreters/FillingRow.cpp                | 9 +++++----
 src/Processors/Transforms/FillingTransform.cpp | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/FillingRow.cpp b/src/Interpreters/FillingRow.cpp
index 503269fd8ef..9030d2be5d4 100644
--- a/src/Interpreters/FillingRow.cpp
+++ b/src/Interpreters/FillingRow.cpp
@@ -47,19 +47,20 @@ bool FillingRow::operator==(const FillingRow & other) const
 
 bool FillingRow::next(const FillingRow & to_row)
 {
+    const size_t row_size = size();
     size_t pos = 0;
 
     /// Find position we need to increment for generating next row.
-    for (size_t s = size(); pos < s; ++pos)
+    for (; pos < row_size; ++pos)
         if (!row[pos].isNull() && !to_row.row[pos].isNull() && !equals(row[pos], to_row.row[pos]))
             break;
 
-    if (pos == size() || less(to_row.row[pos], row[pos], getDirection(pos)))
+    if (pos == row_size || less(to_row.row[pos], row[pos], getDirection(pos)))
         return false;
 
     /// If we have any 'fill_to' value at position greater than 'pos',
     ///  we need to generate rows up to 'fill_to' value.
-    for (size_t i = size() - 1; i > pos; --i)
+    for (size_t i = row_size - 1; i > pos; --i)
     {
         if (getFillDescription(i).fill_to.isNull() || row[i].isNull())
             continue;
@@ -85,7 +86,7 @@ bool FillingRow::next(const FillingRow & to_row)
     {
         bool is_less = false;
         size_t i = pos + 1;
-        for (; i < size(); ++i)
+        for (; i < row_size; ++i)
         {
             const auto & fill_from = getFillDescription(i).fill_from;
             if (!fill_from.isNull())
diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp
index 2efd639a8bf..3d2c34d2970 100644
--- a/src/Processors/Transforms/FillingTransform.cpp
+++ b/src/Processors/Transforms/FillingTransform.cpp
@@ -273,6 +273,7 @@ IProcessor::Status FillingTransform::prepare()
             }
 
             generate_suffix = true;
+            /// return Ready to call transform() for generating filling rows after latest chunk was processed
             return Status::Ready;
         }
     }

From 2bf2764898768608152f98859642c4af726589e2 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 4 Apr 2023 16:43:48 +0000
Subject: [PATCH 085/277] Fix typo

---
 src/Processors/Transforms/FillingTransform.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp
index 3d2c34d2970..c8e0693afd8 100644
--- a/src/Processors/Transforms/FillingTransform.cpp
+++ b/src/Processors/Transforms/FillingTransform.cpp
@@ -251,7 +251,7 @@ FillingTransform::FillingTransform(
             interpolate_column_positions.push_back(header_.getPositionByName(name));
 }
 
-/// prepair() is overrididen to cover cases when we need to generate rows for no input (so chunk in transform() will have no rows)
+/// prepare() is overrididen to cover cases when we need to generate rows for no input (so chunk in transform() will have no rows)
 /// (1) when all data are processed and WITH FILL .. TO is provided, we may need to generate suffix
 /// (2) for empty result set when WITH FILL FROM .. TO is provided (see PR #30888) (first and generate_suffix are both true)
 IProcessor::Status FillingTransform::prepare()

From 3cee537e73015052fcbeff6b09ab2b98a4141421 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Tue, 4 Apr 2023 18:58:28 +0200
Subject: [PATCH 086/277] Changes for master

---
 src/Backups/BackupEntriesCollector.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp
index 1adc4d41fee..4e904bdc402 100644
--- a/src/Backups/BackupEntriesCollector.cpp
+++ b/src/Backups/BackupEntriesCollector.cpp
@@ -490,7 +490,7 @@ std::vector<std::pair<ASTPtr, StoragePtr>> BackupEntriesCollector::findTablesInD
     {
         /// Database or table could be replicated - so may use ZooKeeper. We need to retry.
         auto zookeeper_retries_info = global_zookeeper_retries_info;
-        ZooKeeperRetriesControl retries_ctl("getTablesForBackup", zookeeper_retries_info);
+        ZooKeeperRetriesControl retries_ctl("getTablesForBackup", zookeeper_retries_info, context->getProcessListElement());
         retries_ctl.retryLoop([&](){ db_tables = database->getTablesForBackup(filter_by_table_name, context); });
     }
     catch (Exception & e)

From 078ebff92aff8f77422a7c46190008bb5f9da398 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 4 Apr 2023 17:02:53 +0000
Subject: [PATCH 087/277] Disable debug logging

---
 src/Processors/Transforms/FillingTransform.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp
index c8e0693afd8..c978314c404 100644
--- a/src/Processors/Transforms/FillingTransform.cpp
+++ b/src/Processors/Transforms/FillingTransform.cpp
@@ -14,7 +14,7 @@
 namespace DB
 {
 
-constexpr bool debug_logging_enabled = true;
+constexpr bool debug_logging_enabled = false;
 
 template <typename T>
 void logDebug(String key, const T & value, const char * separator = " : ")

From f4e2d45fbc691b9c72d63ac4c7d1820640122e83 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 4 Apr 2023 19:05:27 +0200
Subject: [PATCH 088/277] Added check for backup/restore when they fail and
 status is not COMPLETED

---
 src/Backups/BackupCoordinationRemote.cpp  | 3 ++-
 src/Backups/RestoreCoordinationRemote.cpp | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index e8b7c2646bc..d197fd0416a 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -777,7 +777,8 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
                 String status;
                 if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status))
                 {
-                    if (status != Stage::COMPLETED)
+                    /// If status is not COMPLETED it could be because the restore failed, check if 'error' exists
+                    if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_backup_path + "/error"))
                     {
                         LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
                         result = true;
diff --git a/src/Backups/RestoreCoordinationRemote.cpp b/src/Backups/RestoreCoordinationRemote.cpp
index 157cc5f2db3..cc03f0c4a2a 100644
--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@@ -282,7 +282,8 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t>
                     String status;
                     if (zk->tryGet(root_zookeeper_path + "/" + existing_restore_path + "/stage", status))
                     {
-                        if (status != Stage::COMPLETED)
+                        /// If status is not COMPLETED it could be because the restore failed, check if 'error' exists
+                        if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_restore_path + "/error"))
                         {
                             LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
                             result = true;

From a36b81a22d03d0857cca76f7ec74c062fd37e87b Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 4 Apr 2023 19:07:31 +0200
Subject: [PATCH 089/277] Fixed comment

---
 src/Backups/BackupCoordinationRemote.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp
index d197fd0416a..8e6b5db91b1 100644
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@@ -777,7 +777,7 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
                 String status;
                 if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status))
                 {
-                    /// If status is not COMPLETED it could be because the restore failed, check if 'error' exists
+                    /// If status is not COMPLETED it could be because the backup failed, check if 'error' exists
                     if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_backup_path + "/error"))
                     {
                         LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));

From f5574d0d746c6ec4779be4a364dd3977970851e1 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 4 Apr 2023 20:10:22 +0200
Subject: [PATCH 090/277] find big allocations without limits checks

---
 src/Common/MemoryTracker.cpp | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index e2129e1013e..8abc3321988 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -82,6 +82,26 @@ inline std::string_view toDescription(OvercommitResult result)
     }
 }
 
+inline void debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]])
+{
+    /// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug).
+    /// Let's find them.
+#ifdef ABORT_ON_LOGICAL_ERROR
+    if (size < 0)
+        return;
+
+    constexpr Int64 threshold = 16 * 1024 * 1024;   /// The choice is arbitrary (maybe we should decrease it)
+    if (size < threshold)
+        return;
+
+    MemoryTrackerBlockerInThread blocker;
+    LOG_TRACE(&Poco::Logger::get("MemoryTracker"), "Too big allocation ({} bytes) without checking memory limits, "
+                                                   "it may lead to OOM. Stack trace: {}", size, StackTrace().toString());
+#else
+    return;     /// Avoid trash logging in release builds
+#endif
+}
+
 }
 
 namespace ProfileEvents
@@ -235,7 +255,10 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
                 formatReadableSizeWithBinarySuffix(current_hard_limit));
         }
         else
+        {
             memory_limit_exceeded_ignored = true;
+            debugLogBigAllocationWithoutCheck(size);
+        }
     }
 
     Int64 limit_to_check = current_hard_limit;
@@ -303,7 +326,10 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
             }
         }
         else
+        {
             memory_limit_exceeded_ignored = true;
+            debugLogBigAllocationWithoutCheck(size);
+        }
     }
 
     bool peak_updated = false;
@@ -323,6 +349,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
         {
             bool log_memory_usage = false;
             peak_updated = updatePeak(will_be, log_memory_usage);
+            debugLogBigAllocationWithoutCheck(size);
         }
     }
 

From 61c8a9586e8ea407a0ac6b6c4a64cf2d8d99926b Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 4 Apr 2023 18:14:54 +0000
Subject: [PATCH 091/277] Fix usage of is_unbound in FileSegment

---
 src/Interpreters/Cache/FileSegment.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index bd4554c6532..8122d7a168e 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -201,7 +201,7 @@ void FileSegment::resetDownloadingStateUnlocked([[maybe_unused]] std::unique_loc
 
     size_t current_downloaded_size = getDownloadedSizeUnlocked(segment_lock);
     /// range().size() can equal 0 in case of write-though cache.
-    if (current_downloaded_size != 0 && current_downloaded_size == range().size())
+    if (!is_unbound && current_downloaded_size != 0 && current_downloaded_size == range().size())
         setDownloadedUnlocked(segment_lock);
     else
         setDownloadState(State::PARTIALLY_DOWNLOADED);
@@ -343,7 +343,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
                 ErrorCodes::LOGICAL_ERROR,
                 "Not enough space is reserved. Available: {}, expected: {}", free_reserved_size, size);
 
-        if (current_downloaded_size == range().size())
+        if (!is_unbound && current_downloaded_size == range().size())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "File segment is already fully downloaded");
 
         if (!cache_writer)
@@ -689,7 +689,8 @@ String FileSegment::getInfoForLogUnlocked(std::unique_lock<std::mutex> & segment
     info << "first non-downloaded offset: " << getFirstNonDownloadedOffsetUnlocked(segment_lock) << ", ";
     info << "caller id: " << getCallerId() << ", ";
     info << "detached: " << is_detached << ", ";
-    info << "kind: " << toString(segment_kind);
+    info << "kind: " << toString(segment_kind) << ", ";
+    info << "unbound: " << is_unbound;
 
     return info.str();
 }
@@ -785,6 +786,7 @@ FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std
     snapshot->downloaded_size = file_segment->getDownloadedSizeUnlocked(segment_lock);
     snapshot->download_state = file_segment->download_state;
     snapshot->segment_kind = file_segment->getKind();
+    snapshot->is_unbound = file_segment->is_unbound;
 
     return snapshot;
 }
@@ -905,6 +907,8 @@ String FileSegmentsHolder::toString()
         if (!ranges.empty())
             ranges += ", ";
         ranges += file_segment->range().toString();
+        if (file_segment->is_unbound)
+            ranges += "(unbound)";
     }
     return ranges;
 }

From 5966281f7c7bee20935d6e32d006ceeb4f11740f Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 4 Apr 2023 18:15:27 +0000
Subject: [PATCH 092/277] Test WriteBufferToFileSegment with several threads

---
 .../tests/gtest_lru_file_cache.cpp            | 25 ++++++++++++++++---
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp
index 483409ed4a0..929724bb6e5 100644
--- a/src/Interpreters/tests/gtest_lru_file_cache.cpp
+++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp
@@ -562,7 +562,7 @@ TEST_F(FileCacheTest, writeBuffer)
     DB::FileCache cache(cache_base_path, settings);
     cache.initialize();
 
-    auto write_to_cache = [&cache](const String & key, const Strings & data)
+    auto write_to_cache = [&cache](const String & key, const Strings & data, bool flush)
     {
         CreateFileSegmentSettings segment_settings;
         segment_settings.kind = FileSegmentKind::Temporary;
@@ -572,14 +572,31 @@ TEST_F(FileCacheTest, writeBuffer)
         EXPECT_EQ(holder.file_segments.size(), 1);
         auto & segment = holder.file_segments.front();
         WriteBufferToFileSegment out(segment.get());
+        std::list<std::thread> threads;
+        std::mutex mu;
         for (const auto & s : data)
-            out.write(s.data(), s.size());
+        {
+            /// Write from diffetent threads to check
+            /// that no assertions inside cache related to downloaderId are triggered
+            threads.emplace_back([&]
+            {
+                std::unique_lock lock(mu);
+                out.write(s.data(), s.size());
+                /// test different buffering scenarios
+                if (flush)
+                {
+                    out.next();
+                }
+            });
+        }
+        for (auto & t : threads)
+            t.join();
         return holder;
     };
 
     std::vector<fs::path> file_segment_paths;
     {
-        auto holder = write_to_cache("key1", {"abc", "defg"});
+        auto holder = write_to_cache("key1", {"abc", "defg"}, false);
         file_segment_paths.emplace_back(holder.file_segments.front()->getPathInLocalCache());
 
         ASSERT_EQ(fs::file_size(file_segment_paths.back()), 7);
@@ -587,7 +604,7 @@ TEST_F(FileCacheTest, writeBuffer)
         ASSERT_EQ(cache.getUsedCacheSize(), 7);
 
         {
-            auto holder2 = write_to_cache("key2", {"1", "22", "333", "4444", "55555"});
+            auto holder2 = write_to_cache("key2", {"1", "22", "333", "4444", "55555"}, true);
             file_segment_paths.emplace_back(holder2.file_segments.front()->getPathInLocalCache());
 
             ASSERT_EQ(fs::file_size(file_segment_paths.back()), 15);

From d73df94a56810b48472bf8fa73bb1e439b634f9a Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@yandex.ru>
Date: Tue, 4 Apr 2023 20:20:46 +0200
Subject: [PATCH 093/277] Update test.py

---
 tests/integration/test_backup_restore_new/test.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py
index ed9747f940b..98f0bd54aa7 100644
--- a/tests/integration/test_backup_restore_new/test.py
+++ b/tests/integration/test_backup_restore_new/test.py
@@ -1206,13 +1206,9 @@ def test_backup_all(exclude_system_log_tables):
 
     exclude_from_backup = []
     if exclude_system_log_tables:
-        system_log_tables = (
-            instance.query(
-                "SELECT concat('system.', table) FROM system.tables WHERE (database = 'system') AND (table LIKE '%_log')"
-            )
-            .rstrip("\n")
-            .split("\n")
-        )
+        system_log_tables = instance.query(
+            "SELECT concat('system.', table) FROM system.tables WHERE (database = 'system') AND (table LIKE '%_log')"
+        ).splitlines()
         exclude_from_backup += system_log_tables
 
     backup_command = f"BACKUP ALL {'EXCEPT TABLES ' + ','.join(exclude_from_backup) if exclude_from_backup else ''} TO {backup_name}"

From ba65be22da25b8999c83944f4edde8c3b5b76722 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 4 Apr 2023 20:15:17 +0200
Subject: [PATCH 094/277] fix bad error messages

---
 src/Interpreters/Session.cpp                  |  4 +--
 .../RocksDB/StorageEmbeddedRocksDB.cpp        |  2 +-
 src/Storages/StorageKeeperMap.cpp             |  2 +-
 tests/clickhouse-test                         |  7 ++--
 ...nd_exception_messages_formatting.reference |  6 ++--
 ..._log_and_exception_messages_formatting.sql | 35 +++++++++++++++++--
 .../00463_long_sessions_in_http_interface.sh  |  6 ++--
 7 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp
index f0bb339e278..64f7b4fc934 100644
--- a/src/Interpreters/Session.cpp
+++ b/src/Interpreters/Session.cpp
@@ -107,7 +107,7 @@ public:
         if (it == sessions.end())
         {
             if (throw_if_not_found)
-                throw Exception(ErrorCodes::SESSION_NOT_FOUND, "Session not found.");
+                throw Exception(ErrorCodes::SESSION_NOT_FOUND, "Session {} not found", session_id);
 
             /// Create a new session from current context.
             auto context = Context::createCopy(global_context);
@@ -129,7 +129,7 @@ public:
             LOG_TEST(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first);
 
             if (!session.unique())
-                throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session is locked by a concurrent client.");
+                throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session {} is locked by a concurrent client", session_id);
             return {session, false};
         }
     }
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 9ff0c152399..20839982ec3 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -276,7 +276,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt
 
     assert(commands.front().type == MutationCommand::Type::UPDATE);
     if (commands.front().column_to_update_expression.contains(primary_key))
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key cannot be updated");
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key cannot be updated (cannot update column {})", primary_key);
 
     auto interpreter = std::make_unique<MutationsInterpreter>(
         storage_ptr, metadata_snapshot, commands, context_, /*can_execute_*/ true, /*return_all_columns*/ true);
diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp
index f570f132463..5f7726e11bf 100644
--- a/src/Storages/StorageKeeperMap.cpp
+++ b/src/Storages/StorageKeeperMap.cpp
@@ -827,7 +827,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca
 
     assert(commands.front().type == MutationCommand::Type::UPDATE);
     if (commands.front().column_to_update_expression.contains(primary_key))
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key cannot be updated");
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key cannot be updated (cannot update column {})", primary_key);
 
     auto interpreter = std::make_unique<MutationsInterpreter>(
         storage_ptr, metadata_snapshot, commands, local_context, /*can_execute_*/ true, /*return_all_columns*/ true);
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 9067a8142bc..e0d51638593 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -2076,7 +2076,10 @@ def reportLogStats(args):
               'AST is too deep. Maximum: {}', 'Array sizes are too large: {}', 'Unable to connect to HDFS: {}',
               'Shutdown is called for table', 'File is not inside {}',
               'Table {} doesn''t exist', 'Database {} doesn''t exist', 'Table {}.{} doesn''t exist',
-              'File {} doesn''t exist', 'No such attribute ''{}''', 'User name ''{}'' is reserved'
+              'File {} doesn''t exist', 'No such attribute ''{}''', 'User name ''{}'' is reserved',
+              'Could not find table: {}', 'Detached part "{}" not found', 'Unknown data type family: {}',
+              'Unknown input format {}', 'Cannot UPDATE key column {}', 'Substitution {} is not set',
+              'Cannot OPTIMIZE table: {}', 'User name is empty', 'Table name is empty', 'AST is too big. Maximum: {}'
         ) AS known_short_messages
         SELECT count() AS c, message_format_string, substr(any(message), 1, 120)
         FROM system.text_log
@@ -2084,7 +2087,7 @@ def reportLogStats(args):
             AND (length(message_format_string) < 16
                 OR (length(message_format_string) < 30 AND message ilike '%DB::Exception%'))
             AND message_format_string NOT IN known_short_messages
-        GROUP BY message_format_string ORDER BY c DESC LIMIT 30 FORMAT TSVWithNamesAndTypes
+        GROUP BY message_format_string ORDER BY c DESC LIMIT 50 FORMAT TSVWithNamesAndTypes
     """
     value = clickhouse_execute(args, query).decode(errors="replace")
     print("\nTop short messages:\n")
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
index fddfbd49de3..60ac6e30c59 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
@@ -1,8 +1,8 @@
 runtime messages	0.001
 runtime exceptions	0.05
-messages shorter than 10	10
-messages shorter than 16	40
-exceptions shorter than 30	120
+messages shorter than 10	0
+messages shorter than 16	2
+exceptions shorter than 30	40
 noisy messages	0.3
 noisy Trace messages	0.16
 noisy Debug messages	0.09
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index 71f41c7a9d2..b9269b66ff7 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -14,14 +14,43 @@ select 'runtime messages', max2(coalesce(sum(length(message_format_string) = 0)
 -- Check the same for exceptions. The value was 0.03
 select 'runtime exceptions', max2(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs where message like '%DB::Exception%';
 
+-- FIXME some of the following messages are not informative and it has to be fixed
+create temporary table known_short_messages (s String) as select * from (select
+['', '({}) Keys: {}', '({}) {}', 'Aggregating', 'Became leader', 'Cleaning queue', 'Creating set.',
+'Cyclic aliases', 'Detaching {}', 'Executing {}', 'Fire events: {}', 'Found part {}', 'Loaded queue',
+'No sharding key', 'No tables', 'Query: {}', 'Removed', 'Removed part {}', 'Removing parts.',
+'Request URI: {}', 'Sending part {}', 'Sent handshake', 'Starting {}', 'Will mimic {}', 'Writing to {}',
+'dropIfEmpty', 'loadAll {}', '{} ({}:{})', '{} -> {}', '{} {}', '{}: {}', 'Query was cancelled',
+'Table {} already exists.', '{}%', 'Cancelled merging parts', 'All replicas are lost',
+'Cancelled mutating parts', 'Read object: {}', 'New segment: {}', 'Unknown geometry type {}',
+'Table {} is not replicated', '{} {}.{} already exists', 'Attempt to read after eof',
+'Replica {} already exists', 'Convert overflow', 'key must be a tuple', 'Division by zero',
+'No part {} in committed state', 'Files set to {}', 'Bytes set to {}', 'Sharding key {} is not used',
+'Cannot parse datetime', 'Bad get: has {}, requested {}', 'There is no {} in {}', 'Numeric overflow',
+'Polygon is not valid: {}', 'Decimal math overflow', '{} only accepts maps', 'Dictionary ({}) not found',
+'Unknown format {}', 'Invalid IPv4 value', 'Invalid IPv6 value', 'Unknown setting {}',
+'Unknown table function {}', 'Database {} already exists.', 'Table {} doesn''t exist',
+'Invalid credentials', 'Part {} already exists', 'Invalid mode: {}', 'Log pulling is cancelled',
+'JOIN {} cannot get JOIN keys', 'Unknown function {}{}', 'Cannot parse IPv6 {}',
+'Not found address of host: {}', '{} must contain a tuple', 'Unknown codec family: {}',
+'Expected const String column', 'Invalid partition format: {}', 'Cannot parse IPv4 {}',
+'AST is too deep. Maximum: {}', 'Array sizes are too large: {}', 'Unable to connect to HDFS: {}',
+'Shutdown is called for table', 'File is not inside {}',
+'Table {} doesn''t exist', 'Database {} doesn''t exist', 'Table {}.{} doesn''t exist',
+'File {} doesn''t exist', 'No such attribute ''{}''', 'User name ''{}'' is reserved',
+'Could not find table: {}', 'Detached part "{}" not found', 'Unknown data type family: {}',
+'Unknown input format {}', 'Cannot UPDATE key column {}', 'Substitution {} is not set',
+'Cannot OPTIMIZE table: {}', 'User name is empty', 'Table name is empty', 'AST is too big. Maximum: {}'
+] as arr) array join arr;
+
 -- Check that we don't have too many short meaningless message patterns.
-select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_string), 10) from logs where length(message_format_string) < 10;
+select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_string), 0) from logs where length(message_format_string) < 10 and message_format_string not in known_short_messages;
 
 -- Same as above. Feel free to update the threshold or remove this query if really necessary
-select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 40) from logs where length(message_format_string) < 16;
+select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 2) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
 
 -- Same as above, but exceptions must be more informative. Feel free to update the threshold or remove this query if really necessary
-select 'exceptions shorter than 30', max2(countDistinctOrDefault(message_format_string), 120) from logs where length(message_format_string) < 30 and message ilike '%DB::Exception%';
+select 'exceptions shorter than 30', max2(countDistinctOrDefault(message_format_string), 40) from logs where length(message_format_string) < 30 and message ilike '%DB::Exception%' and message_format_string not in known_short_messages;
 
 
 -- Avoid too noisy messages: top 1 message frequency must be less than 30%. We should reduce the threshold
diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh
index 89da84a5bdd..35e75c9ec4e 100755
--- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh
+++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh
@@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 
 echo "Using non-existent session with the 'session_check' flag will throw exception:"
-${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=nonexistent&session_check=1" --data-binary "SELECT 1" | grep -c -F 'Session not found'
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=nonexistent&session_check=1" --data-binary "SELECT 1" | grep -c -F 'SESSION_NOT_FOUND'
 
 echo "Using non-existent session without the 'session_check' flag will create a new session:"
 ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_1" --data-binary "SELECT 1"
@@ -30,7 +30,7 @@ ${CLICKHOUSE_CLIENT} --multiquery --query "DROP USER IF EXISTS test_00463; CREAT
 ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6&session_timeout=600" --data-binary "CREATE TEMPORARY TABLE t (s String)"
 ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "INSERT INTO t VALUES ('Hello')"
 
-${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6&session_check=1" --data-binary "SELECT 1" | grep -c -F 'Session not found'
+${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6&session_check=1" --data-binary "SELECT 1" | grep -c -F 'SESSION_NOT_FOUND'
 ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6&session_timeout=600" --data-binary "CREATE TEMPORARY TABLE t (s String)"
 ${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=test_00463&session_id=${CLICKHOUSE_DATABASE}_6" --data-binary "INSERT INTO t VALUES ('World')"
 
@@ -53,7 +53,7 @@ do
         ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_7&session_timeout=1" --data-binary "SELECT 1"
         ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_7&session_check=1" --data-binary "SELECT 1"
         sleep 3
-        ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_7&session_check=1" --data-binary "SELECT 1" | grep -c -F 'Session not found'
+        ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_7&session_check=1" --data-binary "SELECT 1" | grep -c -F 'SESSION_NOT_FOUND'
     ) | tr -d '\n' | grep -F '111' && break || sleep 1
 done
 

From 0ff4e70c84c3e127959821313ff6c5a21f0dcff4 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 4 Apr 2023 19:11:08 +0000
Subject: [PATCH 095/277] Fix 02579_fill_empty_chunk.sql

---
 src/Processors/Transforms/FillingTransform.cpp | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp
index c978314c404..5adc14d29c6 100644
--- a/src/Processors/Transforms/FillingTransform.cpp
+++ b/src/Processors/Transforms/FillingTransform.cpp
@@ -258,11 +258,17 @@ IProcessor::Status FillingTransform::prepare()
 {
     if (input.isFinished() && !output.isFinished() && !has_input && !generate_suffix)
     {
+        logDebug("prepare()", "check if need to generate suffix");
+
         should_insert_first = next_row < filling_row || first;
 
         for (size_t i = 0, size = filling_row.size(); i < size; ++i)
             next_row[i] = filling_row.getFillDescription(i).fill_to;
 
+        logDebug("prepare() filling_row", filling_row);
+        logDebug("prepare() next_row", next_row);
+        logDebug("prepare() first", first);
+
         if (first || filling_row < next_row)
         {
             /// push output data to output port if we can
@@ -272,6 +278,8 @@ IProcessor::Status FillingTransform::prepare()
                 has_output = false;
             }
 
+            logDebug("prepare()", "need to generate suffix");
+
             generate_suffix = true;
             /// return Ready to call transform() for generating filling rows after latest chunk was processed
             return Status::Ready;
@@ -392,6 +400,14 @@ void FillingTransform::initColumns(
 
 void FillingTransform::transform(Chunk & chunk)
 {
+    logDebug("new chunk rows", chunk.getNumRows());
+    logDebug("generate suffix", generate_suffix);
+
+    /// if got chunk with no rows and it's not for suffix generation, then just skip it
+    /// Note: ExpressionTransform can return chunk with no rows, see 02579_fill_empty_chunk.sql for example
+    if (!chunk.hasRows() && !generate_suffix)
+        return;
+
     Columns old_fill_columns;
     Columns old_interpolate_columns;
     Columns old_other_columns;
@@ -437,6 +453,8 @@ void FillingTransform::transform(Chunk & chunk)
         return;
     }
 
+    chassert(chunk.hasRows());
+
     const size_t num_rows = chunk.getNumRows();
     auto old_columns = chunk.detachColumns();
     initColumns(

From 10d2b1330b456e1a923394f8782a79ced679872f Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Tue, 4 Apr 2023 21:29:52 +0000
Subject: [PATCH 096/277] add perf test

---
 tests/performance/set_disable_skip_index.xml | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 tests/performance/set_disable_skip_index.xml

diff --git a/tests/performance/set_disable_skip_index.xml b/tests/performance/set_disable_skip_index.xml
new file mode 100644
index 00000000000..5769f30eac9
--- /dev/null
+++ b/tests/performance/set_disable_skip_index.xml
@@ -0,0 +1,17 @@
+<test>
+    <create_query>
+        CREATE TABLE test_in_skip_idx
+        (
+            a UInt64,
+            s String,
+            INDEX idx s TYPE bloom_filter GRANULARITY 1
+        )
+        ENGINE = MergeTree() ORDER BY a
+    </create_query>
+
+    <fill_query>INSERT INTO test_in_skip_idx SELECT number, number FROM numbers(10000000)</fill_query>
+    <fill_query>OPTIMIZE TABLE test_in_skip_idx FINAL</fill_query>
+
+    <query>SELECT count() FROM test_in_skip_idx WHERE s IN (SELECT toString(number + 10000000) FROM numbers(100000)) SETTINGS use_skip_indexes = 0</query>
+    <drop_query>DROP TABLE IF EXISTS test_in_skip_idx</drop_query>
+</test>

From 76738017a86e43c61026311f2d8163e9ec6f3615 Mon Sep 17 00:00:00 2001
From: Boris Kuschel <Boris.Kuschel@ibm.com>
Date: Tue, 4 Apr 2023 15:28:40 -0700
Subject: [PATCH 097/277] Fix krb5 with openssl

---
 contrib/krb5-cmake/CMakeLists.txt | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt
index 93b90c15201..44058456ed4 100644
--- a/contrib/krb5-cmake/CMakeLists.txt
+++ b/contrib/krb5-cmake/CMakeLists.txt
@@ -15,10 +15,6 @@ if(NOT AWK_PROGRAM)
     message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.")
 endif()
 
-if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
-    add_compile_definitions(USE_BORINGSSL=1)
-endif ()
-
 set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src")
 set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private")
 
@@ -162,6 +158,11 @@ set(ALL_SRCS
 
     "${KRB5_SOURCE_DIR}/lib/crypto/builtin/kdf.c"
     "${KRB5_SOURCE_DIR}/lib/crypto/builtin/cmac.c"
+    "${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/des_keys.c"
+    "${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/f_parity.c"
+    "${KRB5_SOURCE_DIR}/lib/crypto/builtin/enc_provider/rc4.c"
+    "${KRB5_SOURCE_DIR}/lib/crypto/builtin/hash_provider/hash_md4.c"
+    "${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4/md4.c"
     "${KRB5_SOURCE_DIR}/lib/crypto/krb/prng.c"
     "${KRB5_SOURCE_DIR}/lib/crypto/krb/enc_dk_cmac.c"
     # "${KRB5_SOURCE_DIR}/lib/crypto/krb/crc32.c"
@@ -226,7 +227,6 @@ set(ALL_SRCS
     # "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des.c"
     "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/rc4.c"
     "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des3.c"
-    #"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
     "${KRB5_SOURCE_DIR}/lib/crypto/openssl/cmac.c"
     "${KRB5_SOURCE_DIR}/lib/crypto/openssl/sha256.c"
     "${KRB5_SOURCE_DIR}/lib/crypto/openssl/hmac.c"
@@ -474,6 +474,14 @@ set(ALL_SRCS
     "${KRB5_SOURCE_DIR}/lib/krb5/krb5_libinit.c"
 )
 
+if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
+    add_compile_definitions(USE_BORINGSSL=1)
+else()
+    set(ALL_SRCS ${ALL_SRCS}
+        "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
+    )
+endif()
+
 add_custom_command(
     OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/compile_et"
     COMMAND /bin/sh
@@ -673,6 +681,7 @@ target_include_directories(_krb5 PRIVATE
     "${KRB5_SOURCE_DIR}/lib/gssapi/krb5"
     "${KRB5_SOURCE_DIR}/lib/gssapi/spnego"
     "${KRB5_SOURCE_DIR}/util/et"
+    "${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4"
     "${KRB5_SOURCE_DIR}/lib/crypto/openssl"
     "${KRB5_SOURCE_DIR}/lib/crypto/krb"
     "${KRB5_SOURCE_DIR}/util/profile"

From 40cb87c2ac90302ad79eace3fa335b47ff1eb0a2 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 29 Mar 2023 16:18:56 +0200
Subject: [PATCH 098/277] Make remote bandwidth settings server settings

Right now those two settings are applied only at start:
- max_remote_read_network_bandwidth_for_server
- max_remote_write_network_bandwidth_for_server

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Core/ServerSettings.h    |  2 ++
 src/Core/Settings.h          |  4 +--
 src/Interpreters/Context.cpp | 47 ++++++++++++++++++++++++++++--------
 src/Interpreters/Context.h   | 17 +++++++------
 4 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 753a70a7c25..a0eed407783 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -21,6 +21,8 @@ namespace DB
     M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
     M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
     M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
+    M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
+    M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
     M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
     M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
     M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 44b68f459bd..399fe5fe2bb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -100,8 +100,6 @@ class IColumn;
     M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \
     M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited. Only has meaning at server startup.", 0) \
     M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited. Only has meaning at server startup.", 0) \
-    M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited. Only has meaning at server startup.", 0) \
-    M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited. Only has meaning at server startup.", 0) \
     M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \
     M(String, stream_like_engine_insert_queue, "", "When stream like engine reads from multiple queues, user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.", 0) \
     \
@@ -777,6 +775,8 @@ class IColumn;
     MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \
     MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \
     MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \
+    MAKE_OBSOLETE(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \
+    MAKE_OBSOLETE(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \
 
     /** The section above is for obsolete settings. Do not add anything there. */
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 1358eb52cc4..fa86bad8eda 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -19,6 +19,7 @@
 #include <Coordination/KeeperDispatcher.h>
 #include <Compression/ICompressionCodec.h>
 #include <Core/BackgroundSchedulePool.h>
+#include <Core/ServerSettings.h>
 #include <Formats/FormatFactory.h>
 #include <Databases/IDatabase.h>
 #include <Storages/IStorage.h>
@@ -276,6 +277,7 @@ struct ContextSharedPart : boost::noncopyable
 
     mutable ThrottlerPtr replicated_fetches_throttler;      /// A server-wide throttler for replicated fetches
     mutable ThrottlerPtr replicated_sends_throttler;        /// A server-wide throttler for replicated sends
+
     mutable ThrottlerPtr remote_read_throttler;             /// A server-wide throttler for remote IO reads
     mutable ThrottlerPtr remote_write_throttler;            /// A server-wide throttler for remote IO writes
 
@@ -288,6 +290,8 @@ struct ContextSharedPart : boost::noncopyable
     /// Storage policy chooser for MergeTree engines
     mutable std::shared_ptr<const StoragePolicySelector> merge_tree_storage_policy_selector;
 
+    ServerSettings server_settings;
+
     std::optional<MergeTreeSettings> merge_tree_settings;   /// Settings of MergeTree* engines.
     std::optional<MergeTreeSettings> replicated_merge_tree_settings;   /// Settings of ReplicatedMergeTree* engines.
     std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
@@ -352,6 +356,9 @@ struct ContextSharedPart : boost::noncopyable
             std::cerr.flush();
             std::terminate();
         }
+
+        const Poco::Util::AbstractConfiguration & configuration = config ? *config : Poco::Util::Application::instance().config();
+        server_settings.loadSettingsFromConfig(configuration);
     }
 
 
@@ -2308,22 +2315,42 @@ ThrottlerPtr Context::getReplicatedSendsThrottler() const
 
 ThrottlerPtr Context::getRemoteReadThrottler() const
 {
-    auto lock = getLock();
-    if (!shared->remote_read_throttler)
-        shared->remote_read_throttler = std::make_shared<Throttler>(
-            settings.max_remote_read_network_bandwidth_for_server);
+    ThrottlerPtr throttler;
 
-    return shared->remote_read_throttler;
+    const auto & query_settings = getSettingsRef();
+    UInt64 bandwidth_for_server = std::max(
+        query_settings.max_remote_read_network_bandwidth_for_server,
+        /// compatibility
+        shared->server_settings.max_remote_read_network_bandwidth_for_server);
+    if (bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->remote_read_throttler)
+            shared->remote_read_throttler = std::make_shared<Throttler>(bandwidth_for_server);
+        throttler = shared->remote_read_throttler;
+    }
+
+    return throttler;
 }
 
 ThrottlerPtr Context::getRemoteWriteThrottler() const
 {
-    auto lock = getLock();
-    if (!shared->remote_write_throttler)
-        shared->remote_write_throttler = std::make_shared<Throttler>(
-            settings.max_remote_write_network_bandwidth_for_server);
+    ThrottlerPtr throttler;
 
-    return shared->remote_write_throttler;
+    const auto & query_settings = getSettingsRef();
+    UInt64 bandwidth_for_server = std::max(
+        query_settings.max_remote_write_network_bandwidth_for_server,
+        /// compatibility
+        shared->server_settings.max_remote_write_network_bandwidth_for_server);
+    if (bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->remote_write_throttler)
+            shared->remote_write_throttler = std::make_shared<Throttler>(bandwidth_for_server);
+        throttler = shared->remote_write_throttler;
+    }
+
+    return throttler;
 }
 
 bool Context::hasDistributedDDL() const
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index bbfbd4defdc..6d758ecd74d 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -6,6 +6,7 @@
 #include <Common/OpenTelemetryTraceContext.h>
 #include <Common/RemoteHostFilter.h>
 #include <Common/ThreadPool.h>
+#include <Common/Throttler_fwd.h>
 #include <Core/Block.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/Settings.h>
@@ -163,9 +164,6 @@ struct BackgroundTaskSchedulingSettings;
     class Lemmatizers;
 #endif
 
-class Throttler;
-using ThrottlerPtr = std::shared_ptr<Throttler>;
-
 class ZooKeeperMetadataTransaction;
 using ZooKeeperMetadataTransactionPtr = std::shared_ptr<ZooKeeperMetadataTransaction>;
 
@@ -916,11 +914,6 @@ public:
     BackgroundSchedulePool & getMessageBrokerSchedulePool() const;
     BackgroundSchedulePool & getDistributedSchedulePool() const;
 
-    ThrottlerPtr getReplicatedFetchesThrottler() const;
-    ThrottlerPtr getReplicatedSendsThrottler() const;
-    ThrottlerPtr getRemoteReadThrottler() const;
-    ThrottlerPtr getRemoteWriteThrottler() const;
-
     /// Has distributed_ddl configuration or not.
     bool hasDistributedDDL() const;
     void setDDLWorker(std::unique_ptr<DDLWorker> ddl_worker);
@@ -1152,6 +1145,14 @@ private:
     DiskSelectorPtr getDiskSelector(std::lock_guard<std::mutex> & lock) const;
 
     DisksMap getDisksMap(std::lock_guard<std::mutex> & lock) const;
+
+    /// Throttling
+public:
+    ThrottlerPtr getReplicatedFetchesThrottler() const;
+    ThrottlerPtr getReplicatedSendsThrottler() const;
+
+    ThrottlerPtr getRemoteReadThrottler() const;
+    ThrottlerPtr getRemoteWriteThrottler() const;
 };
 
 struct HTTPContext : public IHTTPContext

From abdb682048ceb50400f57ec16039e49e3d3b400b Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 30 Mar 2023 18:50:12 +0200
Subject: [PATCH 099/277] Move some methods for Context into module part

This will be required later, for per-query bandwidth limiting.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/Context.cpp | 15 +++++++++++++++
 src/Interpreters/Context.h   |  6 +++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index fa86bad8eda..dd3adb10d75 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1764,6 +1764,21 @@ ContextMutablePtr Context::getBufferContext() const
     return buffer_context;
 }
 
+void Context::makeQueryContext()
+{
+    query_context = shared_from_this();
+}
+
+void Context::makeSessionContext()
+{
+    session_context = shared_from_this();
+}
+
+void Context::makeGlobalContext()
+{
+    initGlobal();
+    global_context = shared_from_this();
+}
 
 const EmbeddedDictionaries & Context::getEmbeddedDictionaries() const
 {
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 6d758ecd74d..ea6bed4fb33 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -781,9 +781,9 @@ public:
     void setQueryContext(ContextMutablePtr context_) { query_context = context_; }
     void setSessionContext(ContextMutablePtr context_) { session_context = context_; }
 
-    void makeQueryContext() { query_context = shared_from_this(); }
-    void makeSessionContext() { session_context = shared_from_this(); }
-    void makeGlobalContext() { initGlobal(); global_context = shared_from_this(); }
+    void makeQueryContext();
+    void makeSessionContext();
+    void makeGlobalContext();
 
     const Settings & getSettingsRef() const { return settings; }
 

From b3406beeb7ca5566dd2fa6df97452a3ce00888cc Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 30 Mar 2023 19:00:34 +0200
Subject: [PATCH 100/277] Add per-query network throttling

Controlled with:
- max_remote_read_network_bandwidth
- max_remote_write_network_bandwidth

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/ProfileEvents.cpp |  8 ++++----
 src/Core/Settings.h          |  2 ++
 src/Interpreters/Context.cpp | 22 ++++++++++++++++++++++
 src/Interpreters/Context.h   |  4 ++++
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 1d035952f13..f352cca5f4e 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -75,10 +75,10 @@
     M(S3GetRequestThrottlerSleepMicroseconds, "Total time a query was sleeping to conform S3 GET and SELECT request throttling.") \
     M(S3PutRequestThrottlerCount, "Number of S3 PUT, COPY, POST and LIST requests passed through throttler.") \
     M(S3PutRequestThrottlerSleepMicroseconds, "Total time a query was sleeping to conform S3 PUT, COPY, POST and LIST request throttling.") \
-    M(RemoteReadThrottlerBytes, "Bytes passed through 'max_remote_read_network_bandwidth_for_server' throttler.") \
-    M(RemoteReadThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_read_network_bandwidth_for_server' throttling.") \
-    M(RemoteWriteThrottlerBytes, "Bytes passed through 'max_remote_write_network_bandwidth_for_server' throttler.") \
-    M(RemoteWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_write_network_bandwidth_for_server' throttling.") \
+    M(RemoteReadThrottlerBytes, "Bytes passed through 'max_remote_read_network_bandwidth_for_server'/'max_remote_read_network_bandwidth' throttler.") \
+    M(RemoteReadThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_read_network_bandwidth_for_server'/'max_remote_read_network_bandwidth' throttling.") \
+    M(RemoteWriteThrottlerBytes, "Bytes passed through 'max_remote_write_network_bandwidth_for_server'/'max_remote_write_network_bandwidth' throttler.") \
+    M(RemoteWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_write_network_bandwidth_for_server'/'max_remote_write_network_bandwidth' throttling.") \
     M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform all throttling settings.") \
     \
     M(QueryMaskingRulesMatch, "Number of times query masking rules was successfully matched.") \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 399fe5fe2bb..901593e39e8 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -100,6 +100,8 @@ class IColumn;
     M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \
     M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited. Only has meaning at server startup.", 0) \
     M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited. Only has meaning at server startup.", 0) \
+    M(UInt64, max_remote_read_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for read.", 0) \
+    M(UInt64, max_remote_write_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for write.", 0) \
     M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \
     M(String, stream_like_engine_insert_queue, "", "When stream like engine reads from multiple queues, user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.", 0) \
     \
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index dd3adb10d75..c0368b74d42 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1767,6 +1767,12 @@ ContextMutablePtr Context::getBufferContext() const
 void Context::makeQueryContext()
 {
     query_context = shared_from_this();
+
+    /// Create throttlers, to inherit the ThrottlePtr in the context copies.
+    {
+        getRemoteReadThrottler();
+        getRemoteWriteThrottler();
+    }
 }
 
 void Context::makeSessionContext()
@@ -2345,6 +2351,14 @@ ThrottlerPtr Context::getRemoteReadThrottler() const
         throttler = shared->remote_read_throttler;
     }
 
+    if (query_settings.max_remote_read_network_bandwidth)
+    {
+        auto lock = getLock();
+        if (!remote_read_query_throttler)
+            remote_read_query_throttler = std::make_shared<Throttler>(query_settings.max_remote_read_network_bandwidth, throttler);
+        throttler = remote_read_query_throttler;
+    }
+
     return throttler;
 }
 
@@ -2365,6 +2379,14 @@ ThrottlerPtr Context::getRemoteWriteThrottler() const
         throttler = shared->remote_write_throttler;
     }
 
+    if (query_settings.max_remote_write_network_bandwidth)
+    {
+        auto lock = getLock();
+        if (!remote_write_query_throttler)
+            remote_write_query_throttler = std::make_shared<Throttler>(query_settings.max_remote_write_network_bandwidth, throttler);
+        throttler = remote_write_query_throttler;
+    }
+
     return throttler;
 }
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index ea6bed4fb33..a446cf98717 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1153,6 +1153,10 @@ public:
 
     ThrottlerPtr getRemoteReadThrottler() const;
     ThrottlerPtr getRemoteWriteThrottler() const;
+
+private:
+    mutable ThrottlerPtr remote_read_query_throttler;       /// A query-wide throttler for remote IO reads
+    mutable ThrottlerPtr remote_write_query_throttler;      /// A query-wide throttler for remote IO writes
 };
 
 struct HTTPContext : public IHTTPContext

From a25dd1d3483044f43fd83051c1d46ca73e551c56 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 30 Mar 2023 19:02:28 +0200
Subject: [PATCH 101/277] Add ability to throttle local IO on per-query/server
 basis

Server settings:
- max_local_read_bandwidth_for_server
- max_local_write_bandwidth_for_server

Query settings:
- max_local_read_bandwidth
- max_local_write_bandwidth

This is the preparation for adding ability to throttle BACKUPs

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/ProfileEvents.cpp                  |  4 ++
 src/Core/ServerSettings.h                     |  2 +
 src/Core/Settings.h                           |  2 +
 src/Disks/DiskLocal.cpp                       |  6 +-
 src/Disks/IO/createReadBufferFromFileBase.cpp | 48 +++++++++++++--
 src/IO/AsynchronousReadBufferFromFile.h       |  7 ++-
 ...ynchronousReadBufferFromFileDescriptor.cpp | 13 +++-
 ...AsynchronousReadBufferFromFileDescriptor.h |  5 +-
 src/IO/ReadBufferFromFile.cpp                 | 11 ++--
 src/IO/ReadBufferFromFile.h                   | 11 ++--
 src/IO/ReadBufferFromFileDescriptor.cpp       |  8 +++
 src/IO/ReadBufferFromFileDescriptor.h         | 12 +++-
 src/IO/ReadSettings.h                         |  3 +-
 src/IO/WriteBufferFromFile.cpp                |  6 +-
 src/IO/WriteBufferFromFile.h                  |  3 +
 src/IO/WriteBufferFromFileDescriptor.cpp      |  9 +++
 src/IO/WriteBufferFromFileDescriptor.h        |  3 +
 src/IO/WriteBufferFromTemporaryFile.cpp       |  2 +-
 src/IO/WriteSettings.h                        |  1 +
 src/IO/examples/o_direct_and_dirty_pages.cpp  |  2 +-
 src/Interpreters/Context.cpp                  | 59 +++++++++++++++++++
 src/Interpreters/Context.h                    |  6 ++
 .../02703_max_local_read_bandwidth.reference  |  3 +
 .../02703_max_local_read_bandwidth.sh         | 39 ++++++++++++
 .../02703_max_local_write_bandwidth.reference |  1 +
 .../02703_max_local_write_bandwidth.sh        | 25 ++++++++
 26 files changed, 263 insertions(+), 28 deletions(-)
 create mode 100644 tests/queries/0_stateless/02703_max_local_read_bandwidth.reference
 create mode 100755 tests/queries/0_stateless/02703_max_local_read_bandwidth.sh
 create mode 100644 tests/queries/0_stateless/02703_max_local_write_bandwidth.reference
 create mode 100755 tests/queries/0_stateless/02703_max_local_write_bandwidth.sh

diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index f352cca5f4e..a17d73e1673 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -79,6 +79,10 @@
     M(RemoteReadThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_read_network_bandwidth_for_server'/'max_remote_read_network_bandwidth' throttling.") \
     M(RemoteWriteThrottlerBytes, "Bytes passed through 'max_remote_write_network_bandwidth_for_server'/'max_remote_write_network_bandwidth' throttler.") \
     M(RemoteWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_write_network_bandwidth_for_server'/'max_remote_write_network_bandwidth' throttling.") \
+    M(LocalReadThrottlerBytes, "Bytes passed through 'max_local_read_bandwidth_for_server'/'max_local_read_bandwidth' throttler.") \
+    M(LocalReadThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_local_read_bandwidth_for_server'/'max_local_read_bandwidth' throttling.") \
+    M(LocalWriteThrottlerBytes, "Bytes passed through 'max_local_write_bandwidth_for_server'/'max_local_write_bandwidth' throttler.") \
+    M(LocalWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_local_write_bandwidth_for_server'/'max_local_write_bandwidth' throttling.") \
     M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform all throttling settings.") \
     \
     M(QueryMaskingRulesMatch, "Number of times query masking rules was successfully matched.") \
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index a0eed407783..36c5a6c6be8 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -23,6 +23,8 @@ namespace DB
     M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
     M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
     M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
+    M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
+    M(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \
     M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
     M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
     M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 901593e39e8..f1127916ca5 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -102,6 +102,8 @@ class IColumn;
     M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited. Only has meaning at server startup.", 0) \
     M(UInt64, max_remote_read_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for read.", 0) \
     M(UInt64, max_remote_write_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for write.", 0) \
+    M(UInt64, max_local_read_bandwidth, 0, "The maximum speed of local reads in bytes per second.", 0) \
+    M(UInt64, max_local_write_bandwidth, 0, "The maximum speed of local writes in bytes per second.", 0) \
     M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \
     M(String, stream_like_engine_insert_queue, "", "When stream like engine reads from multiple queues, user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.", 0) \
     \
diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index a3b7e413014..160fcb5732c 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -1,4 +1,5 @@
 #include "DiskLocal.h"
+#include <Common/Throttler_fwd.h>
 #include <Common/createHardLink.h>
 #include "DiskFactory.h"
 
@@ -367,10 +368,11 @@ std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path,
 }
 
 std::unique_ptr<WriteBufferFromFileBase>
-DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &)
+DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings)
 {
     int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1;
-    return std::make_unique<WriteBufferFromFile>(fs::path(disk_path) / path, buf_size, flags);
+    return std::make_unique<WriteBufferFromFile>(
+        fs::path(disk_path) / path, buf_size, flags, settings.local_throttler);
 }
 
 void DiskLocal::removeFile(const String & path)
diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp
index 04496e36826..8e9a1d86628 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@@ -76,11 +76,25 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
 
         if (settings.local_fs_method == LocalFSReadMethod::read)
         {
-            res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
+            res = std::make_unique<ReadBufferFromFile>(
+                filename,
+                buffer_size,
+                actual_flags,
+                existing_memory,
+                buffer_alignment,
+                file_size,
+                settings.local_throttler);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
         {
-            res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
+            res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(
+                filename,
+                buffer_size,
+                actual_flags,
+                existing_memory,
+                buffer_alignment,
+                file_size,
+                settings.local_throttler);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::io_uring)
         {
@@ -90,7 +104,15 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
                 throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system");
 
             res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                *reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
+                *reader,
+                settings.priority,
+                filename,
+                buffer_size,
+                actual_flags,
+                existing_memory,
+                buffer_alignment,
+                file_size,
+                settings.local_throttler);
 #else
             throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Read method io_uring is only supported in Linux");
 #endif
@@ -103,7 +125,15 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
 
             auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER);
             res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
+                reader,
+                settings.priority,
+                filename,
+                buffer_size,
+                actual_flags,
+                existing_memory,
+                buffer_alignment,
+                file_size,
+                settings.local_throttler);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
         {
@@ -113,7 +143,15 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
 
             auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER);
             res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
+                reader,
+                settings.priority,
+                filename,
+                buffer_size,
+                actual_flags,
+                existing_memory,
+                buffer_alignment,
+                file_size,
+                settings.local_throttler);
         }
         else
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method");
diff --git a/src/IO/AsynchronousReadBufferFromFile.h b/src/IO/AsynchronousReadBufferFromFile.h
index 1b7eeec4f19..0ac0a820e48 100644
--- a/src/IO/AsynchronousReadBufferFromFile.h
+++ b/src/IO/AsynchronousReadBufferFromFile.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <Common/Throttler_fwd.h>
 #include <IO/AsynchronousReadBufferFromFileDescriptor.h>
 #include <IO/OpenedFileCache.h>
 
@@ -7,6 +8,7 @@
 namespace DB
 {
 
+/* NOTE: Unused */
 class AsynchronousReadBufferFromFile : public AsynchronousReadBufferFromFileDescriptor
 {
 protected:
@@ -62,8 +64,9 @@ public:
         int flags = -1,
         char * existing_memory = nullptr,
         size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt)
-        : AsynchronousReadBufferFromFileDescriptor(reader_, priority_, -1, buf_size, existing_memory, alignment, file_size_)
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler_ = {})
+        : AsynchronousReadBufferFromFileDescriptor(reader_, priority_, -1, buf_size, existing_memory, alignment, file_size_, throttler_)
         , file_name(file_name_)
     {
         file = OpenedFileCache::instance().get(file_name, flags);
diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp
index dc8dcc6c3e6..743892d24f6 100644
--- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp
+++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp
@@ -5,14 +5,17 @@
 #include <Common/Stopwatch.h>
 #include <Common/Exception.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/Throttler.h>
+#include <Common/filesystemHelpers.h>
 #include <IO/AsynchronousReadBufferFromFileDescriptor.h>
 #include <IO/WriteHelpers.h>
-#include <Common/filesystemHelpers.h>
 
 
 namespace ProfileEvents
 {
     extern const Event AsynchronousReadWaitMicroseconds;
+    extern const Event LocalReadThrottlerBytes;
+    extern const Event LocalReadThrottlerSleepMicroseconds;
 }
 
 namespace CurrentMetrics
@@ -92,6 +95,8 @@ bool AsynchronousReadBufferFromFileDescriptor::nextImpl()
 
         assert(offset <= size);
         size_t bytes_read = size - offset;
+        if (throttler)
+            throttler->add(bytes_read, ProfileEvents::LocalReadThrottlerBytes, ProfileEvents::LocalReadThrottlerSleepMicroseconds);
 
         if (bytes_read)
         {
@@ -117,6 +122,8 @@ bool AsynchronousReadBufferFromFileDescriptor::nextImpl()
 
         assert(offset <= size);
         size_t bytes_read = size - offset;
+        if (throttler)
+            throttler->add(bytes_read, ProfileEvents::LocalReadThrottlerBytes, ProfileEvents::LocalReadThrottlerSleepMicroseconds);
 
         if (bytes_read)
         {
@@ -149,12 +156,14 @@ AsynchronousReadBufferFromFileDescriptor::AsynchronousReadBufferFromFileDescript
     size_t buf_size,
     char * existing_memory,
     size_t alignment,
-    std::optional<size_t> file_size_)
+    std::optional<size_t> file_size_,
+    ThrottlerPtr throttler_)
     : ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_)
     , reader(reader_)
     , base_priority(priority_)
     , required_alignment(alignment)
     , fd(fd_)
+    , throttler(throttler_)
 {
     if (required_alignment > buf_size)
         throw Exception(
diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.h b/src/IO/AsynchronousReadBufferFromFileDescriptor.h
index 1629f344b35..dff56baef4e 100644
--- a/src/IO/AsynchronousReadBufferFromFileDescriptor.h
+++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.h
@@ -3,6 +3,7 @@
 #include <IO/ReadBufferFromFileBase.h>
 #include <IO/AsynchronousReader.h>
 #include <Interpreters/Context.h>
+#include <Common/Throttler_fwd.h>
 
 #include <optional>
 #include <unistd.h>
@@ -26,6 +27,7 @@ protected:
     size_t file_offset_of_buffer_end = 0; /// What offset in file corresponds to working_buffer.end().
     size_t bytes_to_ignore = 0;           /// How many bytes should we ignore upon a new read request.
     int fd;
+    ThrottlerPtr throttler;
 
     bool nextImpl() override;
 
@@ -42,7 +44,8 @@ public:
         size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
         char * existing_memory = nullptr,
         size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt);
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler_ = {});
 
     ~AsynchronousReadBufferFromFileDescriptor() override;
 
diff --git a/src/IO/ReadBufferFromFile.cpp b/src/IO/ReadBufferFromFile.cpp
index 3b2decc1f94..79ac62c6421 100644
--- a/src/IO/ReadBufferFromFile.cpp
+++ b/src/IO/ReadBufferFromFile.cpp
@@ -30,8 +30,10 @@ ReadBufferFromFile::ReadBufferFromFile(
     int flags,
     char * existing_memory,
     size_t alignment,
-    std::optional<size_t> file_size_)
-    : ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_size_), file_name(file_name_)
+    std::optional<size_t> file_size_,
+    ThrottlerPtr throttler_)
+    : ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_size_, throttler_)
+    , file_name(file_name_)
 {
     ProfileEvents::increment(ProfileEvents::FileOpen);
 
@@ -61,8 +63,9 @@ ReadBufferFromFile::ReadBufferFromFile(
     size_t buf_size,
     char * existing_memory,
     size_t alignment,
-    std::optional<size_t> file_size_)
-    : ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_)
+    std::optional<size_t> file_size_,
+    ThrottlerPtr throttler_)
+    : ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_, throttler_)
     , file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
 {
     fd_ = -1;
diff --git a/src/IO/ReadBufferFromFile.h b/src/IO/ReadBufferFromFile.h
index a720f8dd36d..462453d974d 100644
--- a/src/IO/ReadBufferFromFile.h
+++ b/src/IO/ReadBufferFromFile.h
@@ -29,7 +29,8 @@ public:
         int flags = -1,
         char * existing_memory = nullptr,
         size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt);
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler = {});
 
     /// Use pre-opened file descriptor.
     explicit ReadBufferFromFile(
@@ -38,7 +39,8 @@ public:
         size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
         char * existing_memory = nullptr,
         size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt);
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler = {});
 
     ~ReadBufferFromFile() override;
 
@@ -88,8 +90,9 @@ public:
         int flags = -1,
         char * existing_memory = nullptr,
         size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt)
-        : ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment, file_size_)
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler_ = {})
+        : ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment, file_size_, throttler_)
         , file_name(file_name_)
     {
         file = OpenedFileCache::instance().get(file_name, flags);
diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp
index 65df93413dd..1e7ea855364 100644
--- a/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/src/IO/ReadBufferFromFileDescriptor.cpp
@@ -5,6 +5,7 @@
 #include <Common/Stopwatch.h>
 #include <Common/Exception.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/Throttler.h>
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/WriteHelpers.h>
 #include <Common/filesystemHelpers.h>
@@ -21,6 +22,8 @@ namespace ProfileEvents
     extern const Event ReadBufferFromFileDescriptorReadBytes;
     extern const Event DiskReadElapsedMicroseconds;
     extern const Event Seek;
+    extern const Event LocalReadThrottlerBytes;
+    extern const Event LocalReadThrottlerSleepMicroseconds;
 }
 
 namespace CurrentMetrics
@@ -82,7 +85,12 @@ bool ReadBufferFromFileDescriptor::nextImpl()
         }
 
         if (res > 0)
+        {
             bytes_read += res;
+            if (throttler)
+                throttler->add(res, ProfileEvents::LocalReadThrottlerBytes, ProfileEvents::LocalReadThrottlerSleepMicroseconds);
+        }
+
 
         /// It reports real time spent including the time spent while thread was preempted doing nothing.
         /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h
index ebd0f6db32d..46bf04d41ed 100644
--- a/src/IO/ReadBufferFromFileDescriptor.h
+++ b/src/IO/ReadBufferFromFileDescriptor.h
@@ -2,6 +2,7 @@
 
 #include <IO/ReadBufferFromFileBase.h>
 #include <Interpreters/Context_fwd.h>
+#include <Common/Throttler_fwd.h>
 
 #include <unistd.h>
 
@@ -21,6 +22,8 @@ protected:
 
     int fd;
 
+    ThrottlerPtr throttler;
+
     bool nextImpl() override;
     void prefetch(int64_t priority) override;
 
@@ -33,10 +36,12 @@ public:
         size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
         char * existing_memory = nullptr,
         size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt)
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler_ = {})
         : ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_)
         , required_alignment(alignment)
         , fd(fd_)
+        , throttler(throttler_)
     {
     }
 
@@ -78,8 +83,9 @@ public:
         size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
         char * existing_memory = nullptr,
         size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt)
-        : ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_)
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler_ = {})
+        : ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_, throttler_)
     {
         use_pread = true;
     }
diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h
index a96766202ff..8c676b2d00b 100644
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@@ -81,7 +81,7 @@ struct ReadSettings
     size_t mmap_threshold = 0;
     MMappedFileCache * mmap_cache = nullptr;
 
-    /// For 'pread_threadpool' method. Lower is more priority.
+    /// For 'pread_threadpool'/'io_uring' method. Lower is more priority.
     size_t priority = 0;
 
     bool load_marks_asynchronously = true;
@@ -109,6 +109,7 @@ struct ReadSettings
 
     /// Bandwidth throttler to use during reading
     ThrottlerPtr remote_throttler;
+    ThrottlerPtr local_throttler;
 
     // Resource to be used during reading
     ResourceLink resource_link;
diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp
index 95a4bc8a652..e58f1e3a60c 100644
--- a/src/IO/WriteBufferFromFile.cpp
+++ b/src/IO/WriteBufferFromFile.cpp
@@ -29,10 +29,11 @@ WriteBufferFromFile::WriteBufferFromFile(
     const std::string & file_name_,
     size_t buf_size,
     int flags,
+    ThrottlerPtr throttler_,
     mode_t mode,
     char * existing_memory,
     size_t alignment)
-    : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_name_)
+    : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, throttler_, alignment, file_name_)
 {
     ProfileEvents::increment(ProfileEvents::FileOpen);
 
@@ -63,9 +64,10 @@ WriteBufferFromFile::WriteBufferFromFile(
     int & fd_,
     const std::string & original_file_name,
     size_t buf_size,
+    ThrottlerPtr throttler_,
     char * existing_memory,
     size_t alignment)
-    : WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, original_file_name)
+    : WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, throttler_, alignment, original_file_name)
 {
     fd_ = -1;
 }
diff --git a/src/IO/WriteBufferFromFile.h b/src/IO/WriteBufferFromFile.h
index 3363a568bac..57847d893af 100644
--- a/src/IO/WriteBufferFromFile.h
+++ b/src/IO/WriteBufferFromFile.h
@@ -3,6 +3,7 @@
 #include <sys/types.h>
 
 #include <Common/CurrentMetrics.h>
+#include <Common/Throttler_fwd.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
 
 
@@ -32,6 +33,7 @@ public:
         const std::string & file_name_,
         size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
         int flags = -1,
+        ThrottlerPtr throttler_ = {},
         mode_t mode = 0666,
         char * existing_memory = nullptr,
         size_t alignment = 0);
@@ -41,6 +43,7 @@ public:
         int & fd,   /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
         const std::string & original_file_name = {},
         size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+        ThrottlerPtr throttler_ = {},
         char * existing_memory = nullptr,
         size_t alignment = 0);
 
diff --git a/src/IO/WriteBufferFromFileDescriptor.cpp b/src/IO/WriteBufferFromFileDescriptor.cpp
index ba49c16c11f..135ff608967 100644
--- a/src/IO/WriteBufferFromFileDescriptor.cpp
+++ b/src/IO/WriteBufferFromFileDescriptor.cpp
@@ -3,6 +3,7 @@
 #include <cassert>
 #include <sys/stat.h>
 
+#include <Common/Throttler.h>
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentMetrics.h>
@@ -20,6 +21,8 @@ namespace ProfileEvents
     extern const Event DiskWriteElapsedMicroseconds;
     extern const Event FileSync;
     extern const Event FileSyncElapsedMicroseconds;
+    extern const Event LocalWriteThrottlerBytes;
+    extern const Event LocalWriteThrottlerSleepMicroseconds;
 }
 
 namespace CurrentMetrics
@@ -71,7 +74,11 @@ void WriteBufferFromFileDescriptor::nextImpl()
         }
 
         if (res > 0)
+        {
             bytes_written += res;
+            if (throttler)
+                throttler->add(res, ProfileEvents::LocalWriteThrottlerBytes, ProfileEvents::LocalWriteThrottlerSleepMicroseconds);
+        }
     }
 
     ProfileEvents::increment(ProfileEvents::DiskWriteElapsedMicroseconds, watch.elapsedMicroseconds());
@@ -85,10 +92,12 @@ WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor(
     int fd_,
     size_t buf_size,
     char * existing_memory,
+    ThrottlerPtr throttler_,
     size_t alignment,
     std::string file_name_)
     : WriteBufferFromFileBase(buf_size, existing_memory, alignment)
     , fd(fd_)
+    , throttler(throttler_)
     , file_name(std::move(file_name_))
 {
 }
diff --git a/src/IO/WriteBufferFromFileDescriptor.h b/src/IO/WriteBufferFromFileDescriptor.h
index cc69567932f..cb73b1e1d08 100644
--- a/src/IO/WriteBufferFromFileDescriptor.h
+++ b/src/IO/WriteBufferFromFileDescriptor.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <IO/WriteBufferFromFileBase.h>
+#include <Common/Throttler_fwd.h>
 
 
 namespace DB
@@ -15,6 +16,7 @@ public:
         int fd_ = -1,
         size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
         char * existing_memory = nullptr,
+        ThrottlerPtr throttler_ = {},
         size_t alignment = 0,
         std::string file_name_ = "");
 
@@ -49,6 +51,7 @@ protected:
     void nextImpl() override;
 
     int fd;
+    ThrottlerPtr throttler;
 
     /// If file has name contains filename, otherwise contains string "(fd=...)"
     std::string file_name;
diff --git a/src/IO/WriteBufferFromTemporaryFile.cpp b/src/IO/WriteBufferFromTemporaryFile.cpp
index 4467a3d3d62..6528175cdb4 100644
--- a/src/IO/WriteBufferFromTemporaryFile.cpp
+++ b/src/IO/WriteBufferFromTemporaryFile.cpp
@@ -14,7 +14,7 @@ namespace ErrorCodes
 
 
 WriteBufferFromTemporaryFile::WriteBufferFromTemporaryFile(std::unique_ptr<PocoTemporaryFile> && tmp_file_)
-    : WriteBufferFromFile(tmp_file_->path(), DBMS_DEFAULT_BUFFER_SIZE, O_RDWR | O_TRUNC | O_CREAT, 0600), tmp_file(std::move(tmp_file_))
+    : WriteBufferFromFile(tmp_file_->path(), DBMS_DEFAULT_BUFFER_SIZE, O_RDWR | O_TRUNC | O_CREAT, /* throttler= */ {}, 0600), tmp_file(std::move(tmp_file_))
 {}
 
 
diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h
index e7cd229bd6c..e160796d9a3 100644
--- a/src/IO/WriteSettings.h
+++ b/src/IO/WriteSettings.h
@@ -11,6 +11,7 @@ struct WriteSettings
 {
     /// Bandwidth throttler to use during writing
     ThrottlerPtr remote_throttler;
+    ThrottlerPtr local_throttler;
 
     // Resource to be used during reading
     ResourceLink resource_link;
diff --git a/src/IO/examples/o_direct_and_dirty_pages.cpp b/src/IO/examples/o_direct_and_dirty_pages.cpp
index 228b7ffe146..85a29830af2 100644
--- a/src/IO/examples/o_direct_and_dirty_pages.cpp
+++ b/src/IO/examples/o_direct_and_dirty_pages.cpp
@@ -46,7 +46,7 @@ int main(int, char **)
         /// Write to file with O_DIRECT, read as usual.
 
         {
-            WriteBufferFromFile wb("test2", BUF_SIZE, O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0666, nullptr, page_size);
+            WriteBufferFromFile wb("test2", BUF_SIZE, O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, /* throttler= */ {}, 0666, nullptr, page_size);
 
             for (size_t i = 0; i < N; ++i)
                 writeStringBinary(test, wb);
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index c0368b74d42..f45f2ddc3a1 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -281,6 +281,9 @@ struct ContextSharedPart : boost::noncopyable
     mutable ThrottlerPtr remote_read_throttler;             /// A server-wide throttler for remote IO reads
     mutable ThrottlerPtr remote_write_throttler;            /// A server-wide throttler for remote IO writes
 
+    mutable ThrottlerPtr local_read_throttler;              /// A server-wide throttler for local IO reads
+    mutable ThrottlerPtr local_write_throttler;             /// A server-wide throttler for local IO writes
+
     MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
     std::unique_ptr<DDLWorker> ddl_worker;                  /// Process ddl commands from zk.
     /// Rules for selecting the compression settings, depending on the size of the part.
@@ -1772,6 +1775,12 @@ void Context::makeQueryContext()
     {
         getRemoteReadThrottler();
         getRemoteWriteThrottler();
+
+        getLocalReadThrottler();
+        getLocalWriteThrottler();
+
+        getBackupsReadThrottler();
+        getBackupsWriteThrottler();
     }
 }
 
@@ -2390,6 +2399,54 @@ ThrottlerPtr Context::getRemoteWriteThrottler() const
     return throttler;
 }
 
+ThrottlerPtr Context::getLocalReadThrottler() const
+{
+    ThrottlerPtr throttler;
+
+    if (shared->server_settings.max_local_read_bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->local_read_throttler)
+            shared->local_read_throttler = std::make_shared<Throttler>(shared->server_settings.max_local_read_bandwidth_for_server);
+        throttler = shared->local_read_throttler;
+    }
+
+    const auto & query_settings = getSettingsRef();
+    if (query_settings.max_local_read_bandwidth)
+    {
+        auto lock = getLock();
+        if (!local_read_query_throttler)
+            local_read_query_throttler = std::make_shared<Throttler>(query_settings.max_local_read_bandwidth, throttler);
+        throttler = local_read_query_throttler;
+    }
+
+    return throttler;
+}
+
+ThrottlerPtr Context::getLocalWriteThrottler() const
+{
+    ThrottlerPtr throttler;
+
+    if (shared->server_settings.max_local_write_bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->local_write_throttler)
+            shared->local_write_throttler = std::make_shared<Throttler>(shared->server_settings.max_local_write_bandwidth_for_server);
+        throttler = shared->local_write_throttler;
+    }
+
+    const auto & query_settings = getSettingsRef();
+    if (query_settings.max_local_write_bandwidth)
+    {
+        auto lock = getLock();
+        if (!local_write_query_throttler)
+            local_write_query_throttler = std::make_shared<Throttler>(query_settings.max_local_write_bandwidth, throttler);
+        throttler = local_write_query_throttler;
+    }
+
+    return throttler;
+}
+
 bool Context::hasDistributedDDL() const
 {
     return getConfigRef().has("distributed_ddl");
@@ -4098,6 +4155,7 @@ ReadSettings Context::getReadSettings() const
     res.priority = settings.read_priority;
 
     res.remote_throttler = getRemoteReadThrottler();
+    res.local_throttler = getLocalReadThrottler();
 
     res.http_max_tries = settings.http_max_tries;
     res.http_retry_initial_backoff_ms = settings.http_retry_initial_backoff_ms;
@@ -4120,6 +4178,7 @@ WriteSettings Context::getWriteSettings() const
     res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload;
 
     res.remote_throttler = getRemoteWriteThrottler();
+    res.local_throttler = getLocalWriteThrottler();
 
     return res;
 }
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index a446cf98717..2418d830b2a 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1154,9 +1154,15 @@ public:
     ThrottlerPtr getRemoteReadThrottler() const;
     ThrottlerPtr getRemoteWriteThrottler() const;
 
+    ThrottlerPtr getLocalReadThrottler() const;
+    ThrottlerPtr getLocalWriteThrottler() const;
+
 private:
     mutable ThrottlerPtr remote_read_query_throttler;       /// A query-wide throttler for remote IO reads
     mutable ThrottlerPtr remote_write_query_throttler;      /// A query-wide throttler for remote IO writes
+
+    mutable ThrottlerPtr local_read_query_throttler;        /// A query-wide throttler for local IO reads
+    mutable ThrottlerPtr local_write_query_throttler;       /// A query-wide throttler for local IO writes
 };
 
 struct HTTPContext : public IHTTPContext
diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.reference b/tests/queries/0_stateless/02703_max_local_read_bandwidth.reference
new file mode 100644
index 00000000000..e2968e9fef5
--- /dev/null
+++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.reference
@@ -0,0 +1,3 @@
+read	1	1	1	1
+pread	1	1	1	1
+pread_threadpool	1	1	1	1
diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh
new file mode 100755
index 00000000000..bdcdb38846c
--- /dev/null
+++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -nm -q "
+    drop table if exists data;
+    create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, min_bytes_for_compact_part=0;
+"
+
+# reading 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds
+$CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)"
+
+read_methods=(
+    read
+    pread
+    pread_threadpool
+    # NOTE: io_uring doing all IO from one thread, that is not attached to the query
+    # io_uring
+    # NOTE: mmap cannot be throttled
+    # mmap
+)
+for read_method in "${read_methods[@]}"; do
+    query_id=$(random_str 10)
+    $CLICKHOUSE_CLIENT --query_id "$query_id" -q "select * from data format Null settings max_local_read_bandwidth='1M', local_filesystem_read_method='$read_method'"
+    $CLICKHOUSE_CLIENT -nm -q "
+        SYSTEM FLUSH LOGS;
+        SELECT
+            '$read_method',
+            query_duration_ms >= 7e3,
+            ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] > 8e6,
+            ProfileEvents['LocalReadThrottlerBytes'] > 8e6,
+            ProfileEvents['LocalReadThrottlerSleepMicroseconds'] > 7e6
+        FROM system.query_log
+        WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart'
+    "
+done
diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.reference b/tests/queries/0_stateless/02703_max_local_write_bandwidth.reference
new file mode 100644
index 00000000000..ad05a699da0
--- /dev/null
+++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.reference
@@ -0,0 +1 @@
+1	1	1	1
diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh
new file mode 100755
index 00000000000..276a15ca6cc
--- /dev/null
+++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# Tags: no-s3-storage
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -nm -q "
+    drop table if exists data;
+    create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, min_bytes_for_compact_part=0;
+"
+
+query_id=$(random_str 10)
+# writes 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds
+$CLICKHOUSE_CLIENT --query_id "$query_id" -q "insert into data select * from numbers(1e6) settings max_local_write_bandwidth='1M'"
+$CLICKHOUSE_CLIENT -nm -q "
+    SYSTEM FLUSH LOGS;
+    SELECT
+        query_duration_ms >= 7e3,
+        ProfileEvents['WriteBufferFromFileDescriptorWriteBytes'] > 8e6,
+        ProfileEvents['LocalWriteThrottlerBytes'] > 8e6,
+        ProfileEvents['LocalWriteThrottlerSleepMicroseconds'] > 7e6
+    FROM system.query_log
+    WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart'
+"

From 218b1f9c293a9b4320cee1ec13a255b1a4b37a75 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 30 Mar 2023 19:06:49 +0200
Subject: [PATCH 102/277] Add ability to throttle BACKUPs on per-server/backup
 basis

Server settings:
- backup_read_bandwidth_for_server
- backup_write_bandwidth_for_server

Query settings:
- backup_read_bandwidth
- backup_write_bandwidth

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Backups/BackupEntryFromAppendOnlyFile.cpp |  3 +-
 src/Backups/BackupEntryFromAppendOnlyFile.h   |  1 +
 src/Backups/BackupEntryFromImmutableFile.cpp  | 10 +++-
 src/Backups/BackupEntryFromImmutableFile.h    |  3 +
 src/Backups/BackupIO.cpp                      |  5 +-
 src/Backups/BackupIO.h                        |  4 +-
 src/Backups/BackupIO_Disk.cpp                 |  6 +-
 src/Backups/BackupIO_File.cpp                 |  2 +-
 src/Backups/BackupIO_S3.cpp                   |  6 +-
 src/Backups/BackupIO_S3.h                     |  2 +-
 src/Backups/BackupImpl.cpp                    | 21 ++++---
 src/Backups/BackupImpl.h                      |  3 +-
 src/Core/ServerSettings.h                     |  2 +
 src/Core/Settings.h                           |  2 +
 src/Disks/IDisk.cpp                           |  5 +-
 src/Disks/IDisk.h                             |  4 +-
 src/Interpreters/Context.cpp                  | 59 +++++++++++++++++++
 src/Interpreters/Context.h                    |  9 +++
 .../MergeTree/DataPartStorageOnDiskBase.cpp   |  4 +-
 .../MergeTree/DataPartStorageOnDiskBase.h     |  1 +
 src/Storages/MergeTree/IDataPartStorage.h     |  1 +
 src/Storages/MergeTree/MergeTreeData.cpp      | 16 +----
 src/Storages/MergeTree/MergeTreeData.h        |  3 -
 src/Storages/StorageLog.cpp                   |  9 ++-
 src/Storages/StorageMemory.cpp                | 21 +++++--
 src/Storages/StorageStripeLog.cpp             |  9 ++-
 tests/config/config.d/backups.xml             |  6 ++
 tests/config/install.sh                       |  1 +
 .../02704_backup_read_bandwidth.reference     |  1 +
 .../02704_backup_read_bandwidth.sh            | 25 ++++++++
 .../02704_backup_write_bandwidth.reference    |  1 +
 .../02704_backup_write_bandwidth.sh           | 25 ++++++++
 32 files changed, 217 insertions(+), 53 deletions(-)
 create mode 100644 tests/config/config.d/backups.xml
 create mode 100644 tests/queries/0_stateless/02704_backup_read_bandwidth.reference
 create mode 100755 tests/queries/0_stateless/02704_backup_read_bandwidth.sh
 create mode 100644 tests/queries/0_stateless/02704_backup_write_bandwidth.reference
 create mode 100755 tests/queries/0_stateless/02704_backup_write_bandwidth.sh

diff --git a/src/Backups/BackupEntryFromAppendOnlyFile.cpp b/src/Backups/BackupEntryFromAppendOnlyFile.cpp
index 9bab101bc35..5384a69d890 100644
--- a/src/Backups/BackupEntryFromAppendOnlyFile.cpp
+++ b/src/Backups/BackupEntryFromAppendOnlyFile.cpp
@@ -8,10 +8,11 @@ namespace DB
 BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile(
     const DiskPtr & disk_,
     const String & file_path_,
+    const ReadSettings & settings_,
     const std::optional<UInt64> & file_size_,
     const std::optional<UInt128> & checksum_,
     const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
-    : BackupEntryFromImmutableFile(disk_, file_path_, file_size_, checksum_, temporary_file_)
+    : BackupEntryFromImmutableFile(disk_, file_path_, settings_, file_size_, checksum_, temporary_file_)
     , limit(BackupEntryFromImmutableFile::getSize())
 {
 }
diff --git a/src/Backups/BackupEntryFromAppendOnlyFile.h b/src/Backups/BackupEntryFromAppendOnlyFile.h
index c6055b86268..b0cee38c6be 100644
--- a/src/Backups/BackupEntryFromAppendOnlyFile.h
+++ b/src/Backups/BackupEntryFromAppendOnlyFile.h
@@ -16,6 +16,7 @@ public:
     BackupEntryFromAppendOnlyFile(
         const DiskPtr & disk_,
         const String & file_path_,
+        const ReadSettings & settings_,
         const std::optional<UInt64> & file_size_ = {},
         const std::optional<UInt128> & checksum_ = {},
         const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
diff --git a/src/Backups/BackupEntryFromImmutableFile.cpp b/src/Backups/BackupEntryFromImmutableFile.cpp
index 86b9c13fb9a..48783a3bb63 100644
--- a/src/Backups/BackupEntryFromImmutableFile.cpp
+++ b/src/Backups/BackupEntryFromImmutableFile.cpp
@@ -11,10 +11,16 @@ namespace DB
 BackupEntryFromImmutableFile::BackupEntryFromImmutableFile(
     const DiskPtr & disk_,
     const String & file_path_,
+    const ReadSettings & settings_,
     const std::optional<UInt64> & file_size_,
     const std::optional<UInt128> & checksum_,
     const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
-    : disk(disk_), file_path(file_path_), file_size(file_size_), checksum(checksum_), temporary_file_on_disk(temporary_file_)
+    : disk(disk_)
+    , file_path(file_path_)
+    , settings(settings_)
+    , file_size(file_size_)
+    , checksum(checksum_)
+    , temporary_file_on_disk(temporary_file_)
 {
 }
 
@@ -30,7 +36,7 @@ UInt64 BackupEntryFromImmutableFile::getSize() const
 
 std::unique_ptr<SeekableReadBuffer> BackupEntryFromImmutableFile::getReadBuffer() const
 {
-    return disk->readFile(file_path);
+    return disk->readFile(file_path, settings);
 }
 
 
diff --git a/src/Backups/BackupEntryFromImmutableFile.h b/src/Backups/BackupEntryFromImmutableFile.h
index 99241c691cb..66f1fade294 100644
--- a/src/Backups/BackupEntryFromImmutableFile.h
+++ b/src/Backups/BackupEntryFromImmutableFile.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Backups/IBackupEntry.h>
+#include <IO/ReadSettings.h>
 #include <base/defines.h>
 #include <mutex>
 
@@ -19,6 +20,7 @@ public:
     BackupEntryFromImmutableFile(
         const DiskPtr & disk_,
         const String & file_path_,
+        const ReadSettings & settings_,
         const std::optional<UInt64> & file_size_ = {},
         const std::optional<UInt128> & checksum_ = {},
         const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
@@ -37,6 +39,7 @@ public:
 private:
     const DiskPtr disk;
     const String file_path;
+    ReadSettings settings;
     mutable std::optional<UInt64> file_size TSA_GUARDED_BY(get_file_size_mutex);
     mutable std::mutex get_file_size_mutex;
     const std::optional<UInt128> checksum;
diff --git a/src/Backups/BackupIO.cpp b/src/Backups/BackupIO.cpp
index cc252c2f1bd..a1d854f6b7c 100644
--- a/src/Backups/BackupIO.cpp
+++ b/src/Backups/BackupIO.cpp
@@ -22,13 +22,14 @@ void IBackupReader::copyFileToDisk(const String & file_name, size_t size, DiskPt
     write_buffer->finalize();
 }
 
-void IBackupWriter::copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
+void IBackupWriter::copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name, ThrottlerPtr throttler)
 {
     auto read_buffer = create_read_buffer();
     if (offset)
         read_buffer->seek(offset, SEEK_SET);
     auto write_buffer = writeFile(dest_file_name);
-    copyData(*read_buffer, *write_buffer, size);
+    std::atomic<int> cancelled;
+    copyDataWithThrottler(*read_buffer, *write_buffer, size, cancelled, throttler);
     write_buffer->finalize();
 }
 
diff --git a/src/Backups/BackupIO.h b/src/Backups/BackupIO.h
index cf3d29ee51e..d6a7eb0dd74 100644
--- a/src/Backups/BackupIO.h
+++ b/src/Backups/BackupIO.h
@@ -3,6 +3,7 @@
 #include <Core/Types.h>
 #include <Disks/DiskType.h>
 #include <Disks/IDisk.h>
+#include <Common/Throttler_fwd.h>
 
 namespace DB
 {
@@ -36,8 +37,9 @@ public:
     virtual void removeFile(const String & file_name) = 0;
     virtual void removeFiles(const Strings & file_names) = 0;
     virtual DataSourceDescription getDataSourceDescription() const = 0;
-    virtual void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name);
+    virtual void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name, ThrottlerPtr throttler);
     virtual bool supportNativeCopy(DataSourceDescription /* data_source_description */) const { return false; }
+    // Ignore throttling, copyDataToFile() should be used if throttling was requested.
     virtual void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name);
 };
 
diff --git a/src/Backups/BackupIO_Disk.cpp b/src/Backups/BackupIO_Disk.cpp
index cc6076541d0..41f28965420 100644
--- a/src/Backups/BackupIO_Disk.cpp
+++ b/src/Backups/BackupIO_Disk.cpp
@@ -50,7 +50,9 @@ void BackupReaderDisk::copyFileToDisk(const String & file_name, size_t size, Dis
 }
 
 
-BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_) : disk(disk_), path(path_)
+BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_)
+    : disk(disk_)
+    , path(path_)
 {
 }
 
@@ -130,7 +132,7 @@ void BackupWriterDisk::copyFileNative(DiskPtr src_disk, const String & src_file_
     if ((src_offset != 0) || (src_size != src_disk->getFileSize(src_file_name)))
     {
         auto create_read_buffer = [src_disk, src_file_name] { return src_disk->readFile(src_file_name); };
-        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
+        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name, /* throttler= */ {});
         return;
     }
 
diff --git a/src/Backups/BackupIO_File.cpp b/src/Backups/BackupIO_File.cpp
index 5bf6d54928d..e754275197b 100644
--- a/src/Backups/BackupIO_File.cpp
+++ b/src/Backups/BackupIO_File.cpp
@@ -155,7 +155,7 @@ void BackupWriterFile::copyFileNative(DiskPtr src_disk, const String & src_file_
     if ((src_offset != 0) || (src_size != fs::file_size(abs_source_path)))
     {
         auto create_read_buffer = [abs_source_path] { return createReadBufferFromFileBase(abs_source_path, {}); };
-        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
+        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name, /* throttler= */ {});
         return;
     }
 
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index f7d518b064d..f703fa4615c 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -190,7 +190,7 @@ void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_na
     if (objects.size() > 1)
     {
         auto create_read_buffer = [src_disk, src_file_name] { return src_disk->readFile(src_file_name); };
-        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
+        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name, /* throttler= */ {});
     }
     else
     {
@@ -203,8 +203,10 @@ void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_na
 }
 
 void BackupWriterS3::copyDataToFile(
-    const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
+    const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name, ThrottlerPtr throttler)
 {
+    // FIXME:
+    (void)throttler;
     copyDataToS3File(create_read_buffer, offset, size, client, s3_uri.bucket, fs::path(s3_uri.key) / dest_file_name, request_settings, {},
                      threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
 }
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index 94e61248428..16663c01340 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -46,7 +46,7 @@ public:
     bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
     std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
 
-    void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name) override;
+    void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name, ThrottlerPtr throttler) override;
 
     void removeFile(const String & file_name) override;
     void removeFiles(const Strings & file_names) override;
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index 0ab1bf7f997..c15c1fd9db7 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -81,7 +81,8 @@ BackupImpl::BackupImpl(
     const std::optional<BackupInfo> & base_backup_info_,
     std::shared_ptr<IBackupReader> reader_,
     const ContextPtr & context_)
-    : backup_name_for_logging(backup_name_for_logging_)
+    : context(context_)
+    , backup_name_for_logging(backup_name_for_logging_)
     , use_archive(!archive_params_.archive_name.empty())
     , archive_params(archive_params_)
     , open_mode(OpenMode::READ)
@@ -90,7 +91,7 @@ BackupImpl::BackupImpl(
     , version(INITIAL_BACKUP_VERSION)
     , base_backup_info(base_backup_info_)
 {
-    open(context_);
+    open();
 }
 
 
@@ -104,7 +105,8 @@ BackupImpl::BackupImpl(
     const std::shared_ptr<IBackupCoordination> & coordination_,
     const std::optional<UUID> & backup_uuid_,
     bool deduplicate_files_)
-    : backup_name_for_logging(backup_name_for_logging_)
+    : context(context_)
+    , backup_name_for_logging(backup_name_for_logging_)
     , use_archive(!archive_params_.archive_name.empty())
     , archive_params(archive_params_)
     , open_mode(OpenMode::WRITE)
@@ -117,7 +119,7 @@ BackupImpl::BackupImpl(
     , deduplicate_files(deduplicate_files_)
     , log(&Poco::Logger::get("BackupImpl"))
 {
-    open(context_);
+    open();
 }
 
 
@@ -133,7 +135,7 @@ BackupImpl::~BackupImpl()
     }
 }
 
-void BackupImpl::open(const ContextPtr & context)
+void BackupImpl::open()
 {
     std::lock_guard lock{mutex};
 
@@ -832,9 +834,11 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
     auto writer_description = writer->getDataSourceDescription();
     auto reader_description = entry->getDataSourceDescription();
 
+    bool has_throttler = context->getBackupsReadThrottler() || context->getBackupsWriteThrottler();
+
     /// We need to copy whole file without archive, we can do it faster
     /// if source and destination are compatible
-    if (!use_archive && writer->supportNativeCopy(reader_description))
+    if (!use_archive && !has_throttler && writer->supportNativeCopy(reader_description))
     {
         /// Should be much faster than writing data through server.
         LOG_TRACE(log, "Will copy file {} using native copy", info.data_file_name);
@@ -860,7 +864,8 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
             auto read_buffer = entry->getReadBuffer();
             if (info.base_size != 0)
                 read_buffer->seek(info.base_size, SEEK_SET);
-            copyData(*read_buffer, *out);
+            std::atomic<int> cancelled;
+            copyDataWithThrottler(*read_buffer, *out, cancelled, context->getBackupsWriteThrottler());
             out->finalize();
         }
         else
@@ -869,7 +874,7 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
             auto create_read_buffer = [entry] { return entry->getReadBuffer(); };
 
             /// NOTE: `mutex` must be unlocked here otherwise writing will be in one thread maximum and hence slow.
-            writer->copyDataToFile(create_read_buffer, info.base_size, info.size - info.base_size, info.data_file_name);
+            writer->copyDataToFile(create_read_buffer, info.base_size, info.size - info.base_size, info.data_file_name, context->getBackupsWriteThrottler());
         }
     }
 
diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h
index bf94926c46c..a70f16f411c 100644
--- a/src/Backups/BackupImpl.h
+++ b/src/Backups/BackupImpl.h
@@ -85,7 +85,7 @@ public:
     bool supportsWritingInMultipleThreads() const override { return !use_archive; }
 
 private:
-    void open(const ContextPtr & context);
+    void open();
     void close();
 
     void openArchive();
@@ -109,6 +109,7 @@ private:
     /// Calculates and sets `compressed_size`.
     void setCompressedSize();
 
+    ContextPtr context;
     const String backup_name_for_logging;
     const bool use_archive;
     const ArchiveParams archive_params;
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 36c5a6c6be8..56d79f73b09 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -29,6 +29,8 @@ namespace DB
     M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
     M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
     M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
+    M(UInt64, backup_read_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
+    M(UInt64, backup_write_bandwidth_for_server, 0, "The maximum write speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
     M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
     M(Int32, max_connections, 1024, "Max server connections.", 0) \
     M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index f1127916ca5..4c4f65c6e5f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -424,6 +424,8 @@ class IColumn;
     M(UInt64, backup_restore_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
     M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \
     M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \
+    M(UInt64, backup_read_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
+    M(UInt64, backup_write_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
     \
     M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
     M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp
index 4969cc7c700..e966633a43f 100644
--- a/src/Disks/IDisk.cpp
+++ b/src/Disks/IDisk.cpp
@@ -27,14 +27,15 @@ bool IDisk::isDirectoryEmpty(const String & path) const
     return !iterateDirectory(path)->isValid();
 }
 
-void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path, const WriteSettings & settings) /// NOLINT
+void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path, const WriteSettings & settings, ThrottlerPtr throttler) /// NOLINT
 {
     LOG_DEBUG(&Poco::Logger::get("IDisk"), "Copying from {} (path: {}) {} to {} (path: {}) {}.",
               getName(), getPath(), from_file_path, to_disk.getName(), to_disk.getPath(), to_file_path);
 
     auto in = readFile(from_file_path);
     auto out = to_disk.writeFile(to_file_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings);
-    copyData(*in, *out);
+    std::atomic<int> cancelled;
+    copyDataWithThrottler(*in, *out, cancelled, throttler);
     out->finalize();
 }
 
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 4e488bbb39a..37b1a41b3cc 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -5,6 +5,7 @@
 #include <base/types.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Exception.h>
+#include <Common/Throttler_fwd.h>
 #include <Disks/Executor.h>
 #include <Disks/DiskType.h>
 #include <IO/ReadSettings.h>
@@ -190,7 +191,8 @@ public:
         const String & from_file_path,
         IDisk & to_disk,
         const String & to_file_path,
-        const WriteSettings & settings = {});
+        const WriteSettings & settings = {},
+        ThrottlerPtr throttler = {});
 
     /// List files at `path` and add their names to `file_names`
     virtual void listFiles(const String & path, std::vector<String> & file_names) const = 0;
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index f45f2ddc3a1..920f5b89e8c 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -284,6 +284,9 @@ struct ContextSharedPart : boost::noncopyable
     mutable ThrottlerPtr local_read_throttler;              /// A server-wide throttler for local IO reads
     mutable ThrottlerPtr local_write_throttler;             /// A server-wide throttler for local IO writes
 
+    mutable ThrottlerPtr backups_read_server_throttler;     /// A server-wide throttler for backups reads
+    mutable ThrottlerPtr backups_write_server_throttler;    /// A server-wide throttler for backups writes
+
     MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
     std::unique_ptr<DDLWorker> ddl_worker;                  /// Process ddl commands from zk.
     /// Rules for selecting the compression settings, depending on the size of the part.
@@ -2447,6 +2450,54 @@ ThrottlerPtr Context::getLocalWriteThrottler() const
     return throttler;
 }
 
+ThrottlerPtr Context::getBackupsReadThrottler() const
+{
+    ThrottlerPtr throttler;
+
+    if (shared->server_settings.backup_read_bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->backups_read_server_throttler)
+            shared->backups_read_server_throttler = std::make_shared<Throttler>(shared->server_settings.backup_read_bandwidth_for_server);
+        throttler = shared->backups_read_server_throttler;
+    }
+
+    const auto & query_settings = getSettingsRef();
+    if (query_settings.backup_read_bandwidth)
+    {
+        auto lock = getLock();
+        if (!backups_read_query_throttler)
+            backups_read_query_throttler = std::make_shared<Throttler>(query_settings.backup_read_bandwidth, throttler);
+        throttler = backups_read_query_throttler;
+    }
+
+    return throttler;
+}
+
+ThrottlerPtr Context::getBackupsWriteThrottler() const
+{
+    ThrottlerPtr throttler;
+
+    if (shared->server_settings.backup_write_bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->backups_write_server_throttler)
+            shared->backups_write_server_throttler = std::make_shared<Throttler>(shared->server_settings.backup_write_bandwidth_for_server);
+        throttler = shared->backups_write_server_throttler;
+    }
+
+    const auto & query_settings = getSettingsRef();
+    if (query_settings.backup_write_bandwidth)
+    {
+        auto lock = getLock();
+        if (!backups_write_query_throttler)
+            backups_write_query_throttler = std::make_shared<Throttler>(query_settings.backup_write_bandwidth, throttler);
+        throttler = backups_write_query_throttler;
+    }
+
+    return throttler;
+}
+
 bool Context::hasDistributedDDL() const
 {
     return getConfigRef().has("distributed_ddl");
@@ -4167,6 +4218,14 @@ ReadSettings Context::getReadSettings() const
     return res;
 }
 
+ReadSettings Context::getBackupReadSettings() const
+{
+    ReadSettings settings = getReadSettings();
+    settings.remote_throttler = getBackupsReadThrottler();
+    settings.local_throttler = getBackupsReadThrottler();
+    return settings;
+}
+
 WriteSettings Context::getWriteSettings() const
 {
     WriteSettings res;
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 2418d830b2a..c1d64b6813e 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1109,6 +1109,9 @@ public:
     /** Get settings for reading from filesystem. */
     ReadSettings getReadSettings() const;
 
+    /** Get settings for reading from filesystem for BACKUPs. */
+    ReadSettings getBackupReadSettings() const;
+
     /** Get settings for writing to filesystem. */
     WriteSettings getWriteSettings() const;
 
@@ -1157,12 +1160,18 @@ public:
     ThrottlerPtr getLocalReadThrottler() const;
     ThrottlerPtr getLocalWriteThrottler() const;
 
+    ThrottlerPtr getBackupsReadThrottler() const;
+    ThrottlerPtr getBackupsWriteThrottler() const;
+
 private:
     mutable ThrottlerPtr remote_read_query_throttler;       /// A query-wide throttler for remote IO reads
     mutable ThrottlerPtr remote_write_query_throttler;      /// A query-wide throttler for remote IO writes
 
     mutable ThrottlerPtr local_read_query_throttler;        /// A query-wide throttler for local IO reads
     mutable ThrottlerPtr local_write_query_throttler;       /// A query-wide throttler for local IO writes
+
+    mutable ThrottlerPtr backups_read_query_throttler;      /// A query-wide throttler for backups reads
+    mutable ThrottlerPtr backups_write_query_throttler;     /// A query-wide throttler for backups writes
 };
 
 struct HTTPContext : public IHTTPContext
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
index 175df9b6e28..9b601d9f3af 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
@@ -5,6 +5,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
 #include <Common/logger_useful.h>
+#include <Interpreters/Context.h>
 #include <Storages/MergeTree/localBackup.h>
 #include <Backups/BackupEntryFromSmallFile.h>
 #include <Backups/BackupEntryFromImmutableFile.h>
@@ -311,6 +312,7 @@ DataPartStorageOnDiskBase::getReplicatedFilesDescriptionForRemoteDisk(const Name
 }
 
 void DataPartStorageOnDiskBase::backup(
+    const ReadSettings & read_settings,
     const MergeTreeDataPartChecksums & checksums,
     const NameSet & files_without_checksums,
     const String & path_in_backup,
@@ -386,7 +388,7 @@ void DataPartStorageOnDiskBase::backup(
 
         backup_entries.emplace_back(
             filepath_in_backup,
-            std::make_unique<BackupEntryFromImmutableFile>(disk, filepath_on_disk, file_size, file_hash, temp_dir_owner));
+            std::make_unique<BackupEntryFromImmutableFile>(disk, filepath_on_disk, read_settings, file_size, file_hash, temp_dir_owner));
     }
 }
 
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
index 7c408dcf381..11806e25a1e 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
@@ -49,6 +49,7 @@ public:
     ReplicatedFilesDescription getReplicatedFilesDescriptionForRemoteDisk(const NameSet & file_names) const override;
 
     void backup(
+        const ReadSettings & read_settings,
         const MergeTreeDataPartChecksums & checksums,
         const NameSet & files_without_checksums,
         const String & path_in_backup,
diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h
index f92784cb0da..4d7212eb17b 100644
--- a/src/Storages/MergeTree/IDataPartStorage.h
+++ b/src/Storages/MergeTree/IDataPartStorage.h
@@ -197,6 +197,7 @@ public:
     /// Also creates a new tmp_dir for internal disk (if disk is mentioned the first time).
     using TemporaryFilesOnDisks = std::map<DiskPtr, std::shared_ptr<TemporaryFileOnDisk>>;
     virtual void backup(
+        const ReadSettings & read_settings,
         const MergeTreeDataPartChecksums & checksums,
         const NameSet & files_without_checksums,
         const String & path_in_backup,
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 66c52e6e24c..694f7b8d86e 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -4869,24 +4869,12 @@ Pipe MergeTreeData::alterPartition(
 }
 
 
-void MergeTreeData::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & partitions)
-{
-    auto local_context = backup_entries_collector.getContext();
-
-    DataPartsVector data_parts;
-    if (partitions)
-        data_parts = getVisibleDataPartsVectorInPartitions(local_context, getPartitionIDsFromQuery(*partitions, local_context));
-    else
-        data_parts = getVisibleDataPartsVector(local_context);
-
-    backup_entries_collector.addBackupEntries(backupParts(data_parts, data_path_in_backup, local_context));
-}
-
 BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const ContextPtr & local_context)
 {
     BackupEntries backup_entries;
     std::map<DiskPtr, std::shared_ptr<TemporaryFileOnDisk>> temp_dirs;
     TableLockHolder table_lock;
+    ReadSettings read_settings = local_context->getBackupReadSettings();
 
     for (const auto & part : data_parts)
     {
@@ -4916,6 +4904,7 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con
 
         BackupEntries backup_entries_from_part;
         part->getDataPartStorage().backup(
+            read_settings,
             part->checksums,
             part->getFileNamesWithoutChecksums(),
             data_path_in_backup,
@@ -4927,6 +4916,7 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con
         for (const auto & [projection_name, projection_part] : projection_parts)
         {
             projection_part->getDataPartStorage().backup(
+                read_settings,
                 projection_part->checksums,
                 projection_part->getFileNamesWithoutChecksums(),
                 fs::path{data_path_in_backup} / part->name,
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 0be932ccdaf..8312efa216d 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -751,9 +751,6 @@ public:
         ContextPtr context,
         TableLockHolder & table_lock_holder);
 
-    /// Makes backup entries to backup the data of the storage.
-    void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;
-
     /// Extract data from the backup and put it to the storage.
     void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;
 
diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp
index 772ed34b7a9..8264d67aaba 100644
--- a/src/Storages/StorageLog.cpp
+++ b/src/Storages/StorageLog.cpp
@@ -926,7 +926,10 @@ std::optional<UInt64> StorageLog::totalBytes(const Settings &) const
 
 void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & /* partitions */)
 {
-    auto lock_timeout = getLockTimeout(backup_entries_collector.getContext());
+    auto local_context = backup_entries_collector.getContext();
+    ReadSettings read_settings = local_context->getBackupReadSettings();
+
+    auto lock_timeout = getLockTimeout(local_context);
     loadMarks(lock_timeout);
 
     ReadLock lock{rwlock, lock_timeout};
@@ -951,7 +954,7 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c
         backup_entries_collector.addBackupEntry(
             data_path_in_backup_fs / data_file_name,
             std::make_unique<BackupEntryFromAppendOnlyFile>(
-                disk, hardlink_file_path, file_checker.getFileSize(data_file.path), std::nullopt, temp_dir_owner));
+                disk, hardlink_file_path, read_settings, file_checker.getFileSize(data_file.path), std::nullopt, temp_dir_owner));
     }
 
     /// __marks.mrk
@@ -964,7 +967,7 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c
         backup_entries_collector.addBackupEntry(
             data_path_in_backup_fs / marks_file_name,
             std::make_unique<BackupEntryFromAppendOnlyFile>(
-                disk, hardlink_file_path, file_checker.getFileSize(marks_file_path), std::nullopt, temp_dir_owner));
+                disk, hardlink_file_path, read_settings, file_checker.getFileSize(marks_file_path), std::nullopt, temp_dir_owner));
     }
 
     /// sizes.json
diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index 11688582877..c568178a469 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -290,12 +290,14 @@ namespace
     {
     public:
         MemoryBackup(
+            ContextPtr context_,
             const StorageMetadataPtr & metadata_snapshot_,
             const std::shared_ptr<const Blocks> blocks_,
             const String & data_path_in_backup,
             const DiskPtr & temp_disk_,
             UInt64 max_compress_block_size_)
-            : metadata_snapshot(metadata_snapshot_)
+            : context(context_)
+            , metadata_snapshot(metadata_snapshot_)
             , blocks(blocks_)
             , temp_disk(temp_disk_)
             , max_compress_block_size(max_compress_block_size_)
@@ -326,6 +328,8 @@ namespace
 
         BackupEntries generate() override
         {
+            ReadSettings read_settings = context->getBackupReadSettings();
+
             BackupEntries backup_entries;
             backup_entries.resize(file_paths.size());
 
@@ -342,7 +346,7 @@ namespace
                 NativeWriter block_out{data_out, 0, metadata_snapshot->getSampleBlock(), false, &index};
                 for (const auto & block : *blocks)
                     block_out.write(block);
-                backup_entries[data_bin_pos] = {file_paths[data_bin_pos], std::make_shared<BackupEntryFromImmutableFile>(temp_disk, data_file_path)};
+                backup_entries[data_bin_pos] = {file_paths[data_bin_pos], std::make_shared<BackupEntryFromImmutableFile>(temp_disk, data_file_path, read_settings)};
             }
 
             /// Writing index.mrk
@@ -351,7 +355,7 @@ namespace
                 auto index_mrk_out_compressed = temp_disk->writeFile(index_mrk_path);
                 CompressedWriteBuffer index_mrk_out{*index_mrk_out_compressed};
                 index.write(index_mrk_out);
-                backup_entries[index_mrk_pos] = {file_paths[index_mrk_pos], std::make_shared<BackupEntryFromImmutableFile>(temp_disk, index_mrk_path)};
+                backup_entries[index_mrk_pos] = {file_paths[index_mrk_pos], std::make_shared<BackupEntryFromImmutableFile>(temp_disk, index_mrk_path, read_settings)};
             }
 
             /// Writing columns.txt
@@ -389,6 +393,7 @@ namespace
             return backup_entries;
         }
 
+        ContextPtr context;
         StorageMetadataPtr metadata_snapshot;
         std::shared_ptr<const Blocks> blocks;
         DiskPtr temp_disk;
@@ -403,9 +408,13 @@ void StorageMemory::backupData(BackupEntriesCollector & backup_entries_collector
 {
     auto temp_disk = backup_entries_collector.getContext()->getTemporaryVolume()->getDisk(0);
     auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size;
-    backup_entries_collector.addBackupEntries(
-        std::make_shared<MemoryBackup>(getInMemoryMetadataPtr(), data.get(), data_path_in_backup, temp_disk, max_compress_block_size)
-            ->getBackupEntries());
+    backup_entries_collector.addBackupEntries(std::make_shared<MemoryBackup>(
+        backup_entries_collector.getContext(),
+        getInMemoryMetadataPtr(),
+        data.get(),
+        data_path_in_backup,
+        temp_disk,
+        max_compress_block_size)->getBackupEntries());
 }
 
 void StorageMemory::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional<ASTs> & /* partitions */)
diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp
index 30585250be2..d54725b8b39 100644
--- a/src/Storages/StorageStripeLog.cpp
+++ b/src/Storages/StorageStripeLog.cpp
@@ -527,7 +527,10 @@ std::optional<UInt64> StorageStripeLog::totalBytes(const Settings &) const
 
 void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & /* partitions */)
 {
-    auto lock_timeout = getLockTimeout(backup_entries_collector.getContext());
+    auto local_context = backup_entries_collector.getContext();
+    ReadSettings read_settings = local_context->getBackupReadSettings();
+
+    auto lock_timeout = getLockTimeout(local_context);
     loadIndices(lock_timeout);
 
     ReadLock lock{rwlock, lock_timeout};
@@ -551,7 +554,7 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec
         backup_entries_collector.addBackupEntry(
             data_path_in_backup_fs / data_file_name,
             std::make_unique<BackupEntryFromAppendOnlyFile>(
-                disk, hardlink_file_path, file_checker.getFileSize(data_file_path), std::nullopt, temp_dir_owner));
+                disk, hardlink_file_path, read_settings, file_checker.getFileSize(data_file_path), std::nullopt, temp_dir_owner));
     }
 
     /// index.mrk
@@ -563,7 +566,7 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec
         backup_entries_collector.addBackupEntry(
             data_path_in_backup_fs / index_file_name,
             std::make_unique<BackupEntryFromAppendOnlyFile>(
-                disk, hardlink_file_path, file_checker.getFileSize(index_file_path), std::nullopt, temp_dir_owner));
+                disk, hardlink_file_path, read_settings, file_checker.getFileSize(index_file_path), std::nullopt, temp_dir_owner));
     }
 
     /// sizes.json
diff --git a/tests/config/config.d/backups.xml b/tests/config/config.d/backups.xml
new file mode 100644
index 00000000000..48f7a256233
--- /dev/null
+++ b/tests/config/config.d/backups.xml
@@ -0,0 +1,6 @@
+<clickhouse>
+    <backups>
+        <allowed_disk>default</allowed_disk>
+        <allowed_path>/backups</allowed_path>
+    </backups>
+</clickhouse>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index 44eab0e4db0..77e8a8460ad 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -56,6 +56,7 @@ ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/reverse_dns_query_function.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/
 ln -sf $SRC_PATH/config.d/disable_s3_env_credentials.xml $DEST_SERVER_PATH/config.d/
+ln -sf $SRC_PATH/config.d/backups.xml $DEST_SERVER_PATH/config.d/
 
 # Not supported with fasttest.
 if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ]
diff --git a/tests/queries/0_stateless/02704_backup_read_bandwidth.reference b/tests/queries/0_stateless/02704_backup_read_bandwidth.reference
new file mode 100644
index 00000000000..9972842f982
--- /dev/null
+++ b/tests/queries/0_stateless/02704_backup_read_bandwidth.reference
@@ -0,0 +1 @@
+1	1
diff --git a/tests/queries/0_stateless/02704_backup_read_bandwidth.sh b/tests/queries/0_stateless/02704_backup_read_bandwidth.sh
new file mode 100755
index 00000000000..9c2708af614
--- /dev/null
+++ b/tests/queries/0_stateless/02704_backup_read_bandwidth.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -nm -q "
+    drop table if exists data;
+    create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, min_bytes_for_compact_part=0;
+"
+
+# reading 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds
+$CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)"
+
+query_id=$(random_str 10)
+$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('default', 'backups/$CLICKHOUSE_DATABASE/data/backup1')" --backup_read_bandwidth=1M > /dev/null
+$CLICKHOUSE_CLIENT -nm -q "
+    SYSTEM FLUSH LOGS;
+    SELECT
+        query_duration_ms >= 7e3,
+        ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] > 8e6
+    FROM system.query_log
+    WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart'
+"
diff --git a/tests/queries/0_stateless/02704_backup_write_bandwidth.reference b/tests/queries/0_stateless/02704_backup_write_bandwidth.reference
new file mode 100644
index 00000000000..9972842f982
--- /dev/null
+++ b/tests/queries/0_stateless/02704_backup_write_bandwidth.reference
@@ -0,0 +1 @@
+1	1
diff --git a/tests/queries/0_stateless/02704_backup_write_bandwidth.sh b/tests/queries/0_stateless/02704_backup_write_bandwidth.sh
new file mode 100755
index 00000000000..163bc955fb3
--- /dev/null
+++ b/tests/queries/0_stateless/02704_backup_write_bandwidth.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -nm -q "
+    drop table if exists data;
+    create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, min_bytes_for_compact_part=0;
+"
+
+# 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds
+$CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)"
+
+query_id=$(random_str 10)
+$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('default', 'backups/$CLICKHOUSE_DATABASE/data/backup1')" --backup_write_bandwidth=1M > /dev/null
+$CLICKHOUSE_CLIENT -nm -q "
+    SYSTEM FLUSH LOGS;
+    SELECT
+        query_duration_ms >= 7e3,
+        ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] > 8e6
+    FROM system.query_log
+    WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart'
+"

From c332d290d81565855072025bde19fbecee940e3b Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 30 Mar 2023 19:12:00 +0200
Subject: [PATCH 103/277] Keep only one throttler for BACKUPs IO (instead of
 separate read/write)

There is no need in separate read/write throttling, because you cannot
write faster then read anyway, and plus this makes the code less cleaner

(and also it will allow avoid implementing throttling backups to S3,
since it does not use common S3 writer).

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Backups/BackupIO.cpp                      |  5 +-
 src/Backups/BackupIO.h                        |  3 +-
 src/Backups/BackupIO_Disk.cpp                 |  2 +-
 src/Backups/BackupIO_File.cpp                 |  2 +-
 src/Backups/BackupIO_S3.cpp                   |  6 +--
 src/Backups/BackupIO_S3.h                     |  2 +-
 src/Backups/BackupImpl.cpp                    |  9 ++--
 src/Core/ServerSettings.h                     |  3 +-
 src/Core/Settings.h                           |  3 +-
 src/Interpreters/Context.cpp                  | 52 +++++--------------
 src/Interpreters/Context.h                    |  6 +--
 ...rence => 02704_backup_bandwidth.reference} |  0
 ...bandwidth.sh => 02704_backup_bandwidth.sh} |  2 +-
 .../02704_backup_write_bandwidth.reference    |  1 -
 .../02704_backup_write_bandwidth.sh           | 25 ---------
 15 files changed, 29 insertions(+), 92 deletions(-)
 rename tests/queries/0_stateless/{02704_backup_read_bandwidth.reference => 02704_backup_bandwidth.reference} (100%)
 rename tests/queries/0_stateless/{02704_backup_read_bandwidth.sh => 02704_backup_bandwidth.sh} (96%)
 delete mode 100644 tests/queries/0_stateless/02704_backup_write_bandwidth.reference
 delete mode 100755 tests/queries/0_stateless/02704_backup_write_bandwidth.sh

diff --git a/src/Backups/BackupIO.cpp b/src/Backups/BackupIO.cpp
index a1d854f6b7c..cc252c2f1bd 100644
--- a/src/Backups/BackupIO.cpp
+++ b/src/Backups/BackupIO.cpp
@@ -22,14 +22,13 @@ void IBackupReader::copyFileToDisk(const String & file_name, size_t size, DiskPt
     write_buffer->finalize();
 }
 
-void IBackupWriter::copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name, ThrottlerPtr throttler)
+void IBackupWriter::copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
 {
     auto read_buffer = create_read_buffer();
     if (offset)
         read_buffer->seek(offset, SEEK_SET);
     auto write_buffer = writeFile(dest_file_name);
-    std::atomic<int> cancelled;
-    copyDataWithThrottler(*read_buffer, *write_buffer, size, cancelled, throttler);
+    copyData(*read_buffer, *write_buffer, size);
     write_buffer->finalize();
 }
 
diff --git a/src/Backups/BackupIO.h b/src/Backups/BackupIO.h
index d6a7eb0dd74..5955978acf8 100644
--- a/src/Backups/BackupIO.h
+++ b/src/Backups/BackupIO.h
@@ -3,7 +3,6 @@
 #include <Core/Types.h>
 #include <Disks/DiskType.h>
 #include <Disks/IDisk.h>
-#include <Common/Throttler_fwd.h>
 
 namespace DB
 {
@@ -37,7 +36,7 @@ public:
     virtual void removeFile(const String & file_name) = 0;
     virtual void removeFiles(const Strings & file_names) = 0;
     virtual DataSourceDescription getDataSourceDescription() const = 0;
-    virtual void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name, ThrottlerPtr throttler);
+    virtual void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name);
     virtual bool supportNativeCopy(DataSourceDescription /* data_source_description */) const { return false; }
     // Ignore throttling, copyDataToFile() should be used if throttling was requested.
     virtual void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name);
diff --git a/src/Backups/BackupIO_Disk.cpp b/src/Backups/BackupIO_Disk.cpp
index 41f28965420..6a6c3556037 100644
--- a/src/Backups/BackupIO_Disk.cpp
+++ b/src/Backups/BackupIO_Disk.cpp
@@ -132,7 +132,7 @@ void BackupWriterDisk::copyFileNative(DiskPtr src_disk, const String & src_file_
     if ((src_offset != 0) || (src_size != src_disk->getFileSize(src_file_name)))
     {
         auto create_read_buffer = [src_disk, src_file_name] { return src_disk->readFile(src_file_name); };
-        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name, /* throttler= */ {});
+        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
         return;
     }
 
diff --git a/src/Backups/BackupIO_File.cpp b/src/Backups/BackupIO_File.cpp
index e754275197b..5bf6d54928d 100644
--- a/src/Backups/BackupIO_File.cpp
+++ b/src/Backups/BackupIO_File.cpp
@@ -155,7 +155,7 @@ void BackupWriterFile::copyFileNative(DiskPtr src_disk, const String & src_file_
     if ((src_offset != 0) || (src_size != fs::file_size(abs_source_path)))
     {
         auto create_read_buffer = [abs_source_path] { return createReadBufferFromFileBase(abs_source_path, {}); };
-        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name, /* throttler= */ {});
+        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
         return;
     }
 
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index f703fa4615c..f7d518b064d 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -190,7 +190,7 @@ void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_na
     if (objects.size() > 1)
     {
         auto create_read_buffer = [src_disk, src_file_name] { return src_disk->readFile(src_file_name); };
-        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name, /* throttler= */ {});
+        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
     }
     else
     {
@@ -203,10 +203,8 @@ void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_na
 }
 
 void BackupWriterS3::copyDataToFile(
-    const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name, ThrottlerPtr throttler)
+    const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
 {
-    // FIXME:
-    (void)throttler;
     copyDataToS3File(create_read_buffer, offset, size, client, s3_uri.bucket, fs::path(s3_uri.key) / dest_file_name, request_settings, {},
                      threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
 }
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index 16663c01340..94e61248428 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -46,7 +46,7 @@ public:
     bool fileContentsEqual(const String & file_name, const String & expected_file_contents) override;
     std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
 
-    void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name, ThrottlerPtr throttler) override;
+    void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name) override;
 
     void removeFile(const String & file_name) override;
     void removeFiles(const Strings & file_names) override;
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index c15c1fd9db7..6ff7a41828b 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -834,11 +834,9 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
     auto writer_description = writer->getDataSourceDescription();
     auto reader_description = entry->getDataSourceDescription();
 
-    bool has_throttler = context->getBackupsReadThrottler() || context->getBackupsWriteThrottler();
-
     /// We need to copy whole file without archive, we can do it faster
     /// if source and destination are compatible
-    if (!use_archive && !has_throttler && writer->supportNativeCopy(reader_description))
+    if (!use_archive && !context->getBackupsThrottler() && writer->supportNativeCopy(reader_description))
     {
         /// Should be much faster than writing data through server.
         LOG_TRACE(log, "Will copy file {} using native copy", info.data_file_name);
@@ -864,8 +862,7 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
             auto read_buffer = entry->getReadBuffer();
             if (info.base_size != 0)
                 read_buffer->seek(info.base_size, SEEK_SET);
-            std::atomic<int> cancelled;
-            copyDataWithThrottler(*read_buffer, *out, cancelled, context->getBackupsWriteThrottler());
+            copyData(*read_buffer, *out);
             out->finalize();
         }
         else
@@ -874,7 +871,7 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
             auto create_read_buffer = [entry] { return entry->getReadBuffer(); };
 
             /// NOTE: `mutex` must be unlocked here otherwise writing will be in one thread maximum and hence slow.
-            writer->copyDataToFile(create_read_buffer, info.base_size, info.size - info.base_size, info.data_file_name, context->getBackupsWriteThrottler());
+            writer->copyDataToFile(create_read_buffer, info.base_size, info.size - info.base_size, info.data_file_name);
         }
     }
 
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 56d79f73b09..8df05f172a4 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -29,8 +29,7 @@ namespace DB
     M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
     M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
     M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
-    M(UInt64, backup_read_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
-    M(UInt64, backup_write_bandwidth_for_server, 0, "The maximum write speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
+    M(UInt64, backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
     M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
     M(Int32, max_connections, 1024, "Max server connections.", 0) \
     M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4c4f65c6e5f..e88387c9c98 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -424,8 +424,7 @@ class IColumn;
     M(UInt64, backup_restore_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
     M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \
     M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \
-    M(UInt64, backup_read_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
-    M(UInt64, backup_write_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
+    M(UInt64, backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
     \
     M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
     M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 920f5b89e8c..6a27b360d99 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -284,8 +284,7 @@ struct ContextSharedPart : boost::noncopyable
     mutable ThrottlerPtr local_read_throttler;              /// A server-wide throttler for local IO reads
     mutable ThrottlerPtr local_write_throttler;             /// A server-wide throttler for local IO writes
 
-    mutable ThrottlerPtr backups_read_server_throttler;     /// A server-wide throttler for backups reads
-    mutable ThrottlerPtr backups_write_server_throttler;    /// A server-wide throttler for backups writes
+    mutable ThrottlerPtr backups_server_throttler;          /// A server-wide throttler for BACKUPs
 
     MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
     std::unique_ptr<DDLWorker> ddl_worker;                  /// Process ddl commands from zk.
@@ -1782,8 +1781,7 @@ void Context::makeQueryContext()
         getLocalReadThrottler();
         getLocalWriteThrottler();
 
-        getBackupsReadThrottler();
-        getBackupsWriteThrottler();
+        getBackupsThrottler();
     }
 }
 
@@ -2450,49 +2448,25 @@ ThrottlerPtr Context::getLocalWriteThrottler() const
     return throttler;
 }
 
-ThrottlerPtr Context::getBackupsReadThrottler() const
+ThrottlerPtr Context::getBackupsThrottler() const
 {
     ThrottlerPtr throttler;
 
-    if (shared->server_settings.backup_read_bandwidth_for_server)
+    if (shared->server_settings.backup_bandwidth_for_server)
     {
         auto lock = getLock();
-        if (!shared->backups_read_server_throttler)
-            shared->backups_read_server_throttler = std::make_shared<Throttler>(shared->server_settings.backup_read_bandwidth_for_server);
-        throttler = shared->backups_read_server_throttler;
+        if (!shared->backups_server_throttler)
+            shared->backups_server_throttler = std::make_shared<Throttler>(shared->server_settings.backup_bandwidth_for_server);
+        throttler = shared->backups_server_throttler;
     }
 
     const auto & query_settings = getSettingsRef();
-    if (query_settings.backup_read_bandwidth)
+    if (query_settings.backup_bandwidth)
     {
         auto lock = getLock();
-        if (!backups_read_query_throttler)
-            backups_read_query_throttler = std::make_shared<Throttler>(query_settings.backup_read_bandwidth, throttler);
-        throttler = backups_read_query_throttler;
-    }
-
-    return throttler;
-}
-
-ThrottlerPtr Context::getBackupsWriteThrottler() const
-{
-    ThrottlerPtr throttler;
-
-    if (shared->server_settings.backup_write_bandwidth_for_server)
-    {
-        auto lock = getLock();
-        if (!shared->backups_write_server_throttler)
-            shared->backups_write_server_throttler = std::make_shared<Throttler>(shared->server_settings.backup_write_bandwidth_for_server);
-        throttler = shared->backups_write_server_throttler;
-    }
-
-    const auto & query_settings = getSettingsRef();
-    if (query_settings.backup_write_bandwidth)
-    {
-        auto lock = getLock();
-        if (!backups_write_query_throttler)
-            backups_write_query_throttler = std::make_shared<Throttler>(query_settings.backup_write_bandwidth, throttler);
-        throttler = backups_write_query_throttler;
+        if (!backups_query_throttler)
+            backups_query_throttler = std::make_shared<Throttler>(query_settings.backup_bandwidth, throttler);
+        throttler = backups_query_throttler;
     }
 
     return throttler;
@@ -4221,8 +4195,8 @@ ReadSettings Context::getReadSettings() const
 ReadSettings Context::getBackupReadSettings() const
 {
     ReadSettings settings = getReadSettings();
-    settings.remote_throttler = getBackupsReadThrottler();
-    settings.local_throttler = getBackupsReadThrottler();
+    settings.remote_throttler = getBackupsThrottler();
+    settings.local_throttler = getBackupsThrottler();
     return settings;
 }
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index c1d64b6813e..5876096c0e4 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1160,8 +1160,7 @@ public:
     ThrottlerPtr getLocalReadThrottler() const;
     ThrottlerPtr getLocalWriteThrottler() const;
 
-    ThrottlerPtr getBackupsReadThrottler() const;
-    ThrottlerPtr getBackupsWriteThrottler() const;
+    ThrottlerPtr getBackupsThrottler() const;
 
 private:
     mutable ThrottlerPtr remote_read_query_throttler;       /// A query-wide throttler for remote IO reads
@@ -1170,8 +1169,7 @@ private:
     mutable ThrottlerPtr local_read_query_throttler;        /// A query-wide throttler for local IO reads
     mutable ThrottlerPtr local_write_query_throttler;       /// A query-wide throttler for local IO writes
 
-    mutable ThrottlerPtr backups_read_query_throttler;      /// A query-wide throttler for backups reads
-    mutable ThrottlerPtr backups_write_query_throttler;     /// A query-wide throttler for backups writes
+    mutable ThrottlerPtr backups_query_throttler;           /// A query-wide throttler for BACKUPs
 };
 
 struct HTTPContext : public IHTTPContext
diff --git a/tests/queries/0_stateless/02704_backup_read_bandwidth.reference b/tests/queries/0_stateless/02704_backup_bandwidth.reference
similarity index 100%
rename from tests/queries/0_stateless/02704_backup_read_bandwidth.reference
rename to tests/queries/0_stateless/02704_backup_bandwidth.reference
diff --git a/tests/queries/0_stateless/02704_backup_read_bandwidth.sh b/tests/queries/0_stateless/02704_backup_bandwidth.sh
similarity index 96%
rename from tests/queries/0_stateless/02704_backup_read_bandwidth.sh
rename to tests/queries/0_stateless/02704_backup_bandwidth.sh
index 9c2708af614..2add23501e9 100755
--- a/tests/queries/0_stateless/02704_backup_read_bandwidth.sh
+++ b/tests/queries/0_stateless/02704_backup_bandwidth.sh
@@ -14,7 +14,7 @@ $CLICKHOUSE_CLIENT -nm -q "
 $CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)"
 
 query_id=$(random_str 10)
-$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('default', 'backups/$CLICKHOUSE_DATABASE/data/backup1')" --backup_read_bandwidth=1M > /dev/null
+$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('default', 'backups/$CLICKHOUSE_DATABASE/data/backup1')" --backup_bandwidth=1M > /dev/null
 $CLICKHOUSE_CLIENT -nm -q "
     SYSTEM FLUSH LOGS;
     SELECT
diff --git a/tests/queries/0_stateless/02704_backup_write_bandwidth.reference b/tests/queries/0_stateless/02704_backup_write_bandwidth.reference
deleted file mode 100644
index 9972842f982..00000000000
--- a/tests/queries/0_stateless/02704_backup_write_bandwidth.reference
+++ /dev/null
@@ -1 +0,0 @@
-1	1
diff --git a/tests/queries/0_stateless/02704_backup_write_bandwidth.sh b/tests/queries/0_stateless/02704_backup_write_bandwidth.sh
deleted file mode 100755
index 163bc955fb3..00000000000
--- a/tests/queries/0_stateless/02704_backup_write_bandwidth.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-s3-storage, no-random-settings, no-random-merge-tree-settings
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-$CLICKHOUSE_CLIENT -nm -q "
-    drop table if exists data;
-    create table data (key UInt64 CODEC(NONE)) engine=MergeTree() order by tuple() settings min_bytes_for_wide_part=1e9, min_bytes_for_compact_part=0;
-"
-
-# 1e6*8 bytes with 1M bandwith it should take (8-1)/1=7 seconds
-$CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)"
-
-query_id=$(random_str 10)
-$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('default', 'backups/$CLICKHOUSE_DATABASE/data/backup1')" --backup_write_bandwidth=1M > /dev/null
-$CLICKHOUSE_CLIENT -nm -q "
-    SYSTEM FLUSH LOGS;
-    SELECT
-        query_duration_ms >= 7e3,
-        ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] > 8e6
-    FROM system.query_log
-    WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart'
-"

From a8c1407bc407e349639e740dc5171b125d4b272a Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 30 Mar 2023 20:14:17 +0200
Subject: [PATCH 104/277] Use existing ServerSettings in Context instead of
 reading it again

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/Context.cpp | 27 +++++----------------------
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 6a27b360d99..3c98929a4f6 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -43,7 +43,6 @@
 #include <Interpreters/ExternalLoaderXMLConfigRepository.h>
 #include <Interpreters/TemporaryDataOnDisk.h>
 #include <Interpreters/Cache/QueryCache.h>
-#include <Core/ServerSettings.h>
 #include <Core/Settings.h>
 #include <Core/SettingsQuirks.h>
 #include <Access/AccessControl.h>
@@ -2226,11 +2225,8 @@ BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
     auto lock = getLock();
     if (!shared->buffer_flush_schedule_pool)
     {
-        ServerSettings server_settings;
-        server_settings.loadSettingsFromConfig(getConfigRef());
-
         shared->buffer_flush_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            server_settings.background_buffer_flush_schedule_pool_size,
+            shared->server_settings.background_buffer_flush_schedule_pool_size,
             CurrentMetrics::BackgroundBufferFlushSchedulePoolTask,
             CurrentMetrics::BackgroundBufferFlushSchedulePoolSize,
             "BgBufSchPool");
@@ -2275,11 +2271,8 @@ BackgroundSchedulePool & Context::getSchedulePool() const
     auto lock = getLock();
     if (!shared->schedule_pool)
     {
-        ServerSettings server_settings;
-        server_settings.loadSettingsFromConfig(getConfigRef());
-
         shared->schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            server_settings.background_schedule_pool_size,
+            shared->server_settings.background_schedule_pool_size,
             CurrentMetrics::BackgroundSchedulePoolTask,
             CurrentMetrics::BackgroundSchedulePoolSize,
             "BgSchPool");
@@ -2293,11 +2286,8 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() const
     auto lock = getLock();
     if (!shared->distributed_schedule_pool)
     {
-        ServerSettings server_settings;
-        server_settings.loadSettingsFromConfig(getConfigRef());
-
         shared->distributed_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            server_settings.background_distributed_schedule_pool_size,
+            shared->server_settings.background_distributed_schedule_pool_size,
             CurrentMetrics::BackgroundDistributedSchedulePoolTask,
             CurrentMetrics::BackgroundDistributedSchedulePoolSize,
             "BgDistSchPool");
@@ -2311,11 +2301,8 @@ BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const
     auto lock = getLock();
     if (!shared->message_broker_schedule_pool)
     {
-        ServerSettings server_settings;
-        server_settings.loadSettingsFromConfig(getConfigRef());
-
         shared->message_broker_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            server_settings.background_message_broker_schedule_pool_size,
+            shared->server_settings.background_message_broker_schedule_pool_size,
             CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask,
             CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize,
             "BgMBSchPool");
@@ -3959,11 +3946,7 @@ void Context::initializeBackgroundExecutorsIfNeeded()
     if (shared->are_background_executors_initialized)
         return;
 
-    const auto & config = getConfigRef();
-
-    ServerSettings server_settings;
-    server_settings.loadSettingsFromConfig(config);
-
+    const ServerSettings & server_settings = shared->server_settings;
     size_t background_pool_size = server_settings.background_pool_size;
     auto background_merges_mutations_concurrency_ratio = server_settings.background_merges_mutations_concurrency_ratio;
     size_t background_pool_max_tasks_count = static_cast<size_t>(background_pool_size * background_merges_mutations_concurrency_ratio);

From ad96d3c249dd128d87894856cda17e4bae7a0d27 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 30 Mar 2023 21:10:57 +0200
Subject: [PATCH 105/277] Do not require proper Poco Application for Context

    $ yes | head | clickhouse-obfuscator --structure 'id String' --seed foo --input-format TSV --output-format TSV
    Poco::Exception. Code: 1000, e.code() = 0, Null pointer: _pInstance in file "/src/ch/clickhouse/base/poco/Util/include/Poco/Util/Application.h", line 446, Stack trace (when copying this message, always include the lines below):

    0. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/exception:134: std::exception::capture() @ 0x15605622 in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    1. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/exception:112: std::exception::exception[abi:v15000]() @ 0x156055ed in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    2. /src/ch/clickhouse/base/poco/Foundation/src/Exception.cpp:27: Poco::Exception::Exception(String const&, int) @ 0x2ebd2d80 in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    3. /src/ch/clickhouse/base/poco/Foundation/src/Exception.cpp:132: Poco::LogicException::LogicException(String const&, int) @ 0x2ebd3667 in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    4. /src/ch/clickhouse/base/poco/Foundation/src/Exception.cpp:134: Poco::NullPointerException::NullPointerException(String const&, int) @ 0x2ebd3da7 in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    5. /src/ch/clickhouse/base/poco/Foundation/src/Bugcheck.cpp:42: Poco::Bugcheck::nullPointer(char const*, char const*, int) @ 0x2ebc5851 in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    6. /src/ch/clickhouse/base/poco/Util/include/Poco/Util/Application.h:446: Poco::Util::Application::instance() @ 0x1ee3e6cd in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    7. /src/ch/clickhouse/src/Interpreters/Context.cpp:358: DB::ContextSharedPart::ContextSharedPart() @ 0x263b0cca in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    8. /src/ch/clickhouse/contrib/llvm-project/libcxx/include/__memory/unique_ptr.h:714: std::__unique_if<DB::ContextSharedPart>::__unique_single std::make_unique[abi:v15000]<DB::ContextSharedPart>() @ 0x26387b6e in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    9. /src/ch/clickhouse/src/Interpreters/Context.cpp:652: DB::Context::createShared() @ 0x26361a3c in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    10. /src/ch/clickhouse/programs/obfuscator/Obfuscator.cpp:1293: mainEntryClickHouseObfuscator(int, char**) @ 0x1ef6eb26 in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    11. /src/ch/clickhouse/programs/main.cpp:481: main @ 0x155e569a in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
    12. ? @ 0x7ffff7dd2790 in ?
    13. __libc_start_main @ 0x7ffff7dd284a in ?
    14. _start @ 0x155e51ae in /src/ch/clickhouse/.cmake-debug/programs/clickhouse
     (version 23.3.1.2537)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/Context.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 3c98929a4f6..6fd0f256ac4 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -360,9 +360,6 @@ struct ContextSharedPart : boost::noncopyable
             std::cerr.flush();
             std::terminate();
         }
-
-        const Poco::Util::AbstractConfiguration & configuration = config ? *config : Poco::Util::Application::instance().config();
-        server_settings.loadSettingsFromConfig(configuration);
     }
 
 
@@ -3490,6 +3487,9 @@ void Context::setApplicationType(ApplicationType type)
 {
     /// Lock isn't required, you should set it at start
     shared->application_type = type;
+
+    if (type == ApplicationType::SERVER)
+        shared->server_settings.loadSettingsFromConfig(Poco::Util::Application::instance().config());
 }
 
 void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & config)

From 684ecc24e217852af32eceb855e6f3319d724709 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 3 Apr 2023 14:50:47 +0200
Subject: [PATCH 106/277] Ignore deprecated values
 max_remote_{read,write}_network_bandwidth_for_server

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/Context.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 6fd0f256ac4..dfe7342ce02 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2333,10 +2333,9 @@ ThrottlerPtr Context::getRemoteReadThrottler() const
     ThrottlerPtr throttler;
 
     const auto & query_settings = getSettingsRef();
-    UInt64 bandwidth_for_server = std::max(
-        query_settings.max_remote_read_network_bandwidth_for_server,
-        /// compatibility
-        shared->server_settings.max_remote_read_network_bandwidth_for_server);
+    UInt64 bandwidth_for_server = query_settings.max_remote_read_network_bandwidth_for_server;
+    if (shared->server_settings.max_remote_read_network_bandwidth_for_server.changed)
+        bandwidth_for_server = shared->server_settings.max_remote_read_network_bandwidth_for_server.changed;
     if (bandwidth_for_server)
     {
         auto lock = getLock();
@@ -2361,10 +2360,9 @@ ThrottlerPtr Context::getRemoteWriteThrottler() const
     ThrottlerPtr throttler;
 
     const auto & query_settings = getSettingsRef();
-    UInt64 bandwidth_for_server = std::max(
-        query_settings.max_remote_write_network_bandwidth_for_server,
-        /// compatibility
-        shared->server_settings.max_remote_write_network_bandwidth_for_server);
+    UInt64 bandwidth_for_server = query_settings.max_remote_write_network_bandwidth_for_server;
+    if (shared->server_settings.max_remote_write_network_bandwidth_for_server.changed)
+        bandwidth_for_server = shared->server_settings.max_remote_write_network_bandwidth_for_server.changed;
     if (bandwidth_for_server)
     {
         auto lock = getLock();

From 61405b827df5e7a9601d1eab41d52b519db5c339 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 3 Apr 2023 14:52:47 +0200
Subject: [PATCH 107/277] Use MAKE_DEPRECATED_BY_SERVER_CONFIG() for deprecated
 throttle settings

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Core/ServerSettings.cpp  | 5 ++++-
 src/Core/Settings.h          | 5 +++--
 src/Interpreters/Context.cpp | 8 ++------
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp
index 0a94b0dffcc..2b4ee6485bc 100644
--- a/src/Core/ServerSettings.cpp
+++ b/src/Core/ServerSettings.cpp
@@ -19,7 +19,10 @@ void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfigurat
         "background_buffer_flush_schedule_pool_size",
         "background_schedule_pool_size",
         "background_message_broker_schedule_pool_size",
-        "background_distributed_schedule_pool_size"
+        "background_distributed_schedule_pool_size",
+
+        "max_remote_read_network_bandwidth_for_server",
+        "max_remote_write_network_bandwidth_for_server",
     };
 
     for (auto setting : all())
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index e88387c9c98..fe5e1a1a787 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -738,6 +738,7 @@ class IColumn;
 #define MAKE_OBSOLETE(M, TYPE, NAME, DEFAULT) \
     M(TYPE, NAME, DEFAULT, "Obsolete setting, does nothing.", BaseSettingsHelpers::Flags::OBSOLETE)
 
+/// NOTE: ServerSettings::loadSettingsFromConfig() should be updated to include this settings
 #define MAKE_DEPRECATED_BY_SERVER_CONFIG(M, TYPE, NAME, DEFAULT) \
     M(TYPE, NAME, DEFAULT, "User-level setting is deprecated, and it must be defined in the server configuration instead.", BaseSettingsHelpers::Flags::OBSOLETE)
 
@@ -771,6 +772,8 @@ class IColumn;
     MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_schedule_pool_size, 128) \
     MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_message_broker_schedule_pool_size, 16) \
     MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \
+    MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \
+    MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \
     /* ---- */ \
     MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \
     MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0)                                                                                 \
@@ -780,8 +783,6 @@ class IColumn;
     MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \
     MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \
     MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \
-    MAKE_OBSOLETE(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \
-    MAKE_OBSOLETE(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \
 
     /** The section above is for obsolete settings. Do not add anything there. */
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index dfe7342ce02..d3d15671ae3 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2333,9 +2333,7 @@ ThrottlerPtr Context::getRemoteReadThrottler() const
     ThrottlerPtr throttler;
 
     const auto & query_settings = getSettingsRef();
-    UInt64 bandwidth_for_server = query_settings.max_remote_read_network_bandwidth_for_server;
-    if (shared->server_settings.max_remote_read_network_bandwidth_for_server.changed)
-        bandwidth_for_server = shared->server_settings.max_remote_read_network_bandwidth_for_server.changed;
+    UInt64 bandwidth_for_server = shared->server_settings.max_remote_read_network_bandwidth_for_server;
     if (bandwidth_for_server)
     {
         auto lock = getLock();
@@ -2360,9 +2358,7 @@ ThrottlerPtr Context::getRemoteWriteThrottler() const
     ThrottlerPtr throttler;
 
     const auto & query_settings = getSettingsRef();
-    UInt64 bandwidth_for_server = query_settings.max_remote_write_network_bandwidth_for_server;
-    if (shared->server_settings.max_remote_write_network_bandwidth_for_server.changed)
-        bandwidth_for_server = shared->server_settings.max_remote_write_network_bandwidth_for_server.changed;
+    UInt64 bandwidth_for_server = shared->server_settings.max_remote_write_network_bandwidth_for_server;
     if (bandwidth_for_server)
     {
         auto lock = getLock();

From c8597fbb9a0ff05f3508d957d9daaea260c5ad50 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 3 Apr 2023 15:07:22 +0200
Subject: [PATCH 108/277] Do not throttle S3-S3 backups if native copy is
 possible

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Backups/BackupIO.h        |  2 +-
 src/Backups/BackupIO_Disk.cpp |  4 ++--
 src/Backups/BackupIO_Disk.h   |  2 +-
 src/Backups/BackupIO_File.cpp |  4 ++--
 src/Backups/BackupIO_File.h   |  2 +-
 src/Backups/BackupIO_S3.cpp   |  6 +++---
 src/Backups/BackupIO_S3.h     |  2 +-
 src/Backups/BackupImpl.cpp    | 12 ++++++------
 src/Backups/BackupImpl.h      |  4 ++--
 9 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/Backups/BackupIO.h b/src/Backups/BackupIO.h
index 5955978acf8..66016c42b18 100644
--- a/src/Backups/BackupIO.h
+++ b/src/Backups/BackupIO.h
@@ -37,7 +37,7 @@ public:
     virtual void removeFiles(const Strings & file_names) = 0;
     virtual DataSourceDescription getDataSourceDescription() const = 0;
     virtual void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name);
-    virtual bool supportNativeCopy(DataSourceDescription /* data_source_description */) const { return false; }
+    virtual bool supportNativeCopy(DataSourceDescription /* data_source_description */, bool /* has_throttling */) const { return false; }
     // Ignore throttling, copyDataToFile() should be used if throttling was requested.
     virtual void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name);
 };
diff --git a/src/Backups/BackupIO_Disk.cpp b/src/Backups/BackupIO_Disk.cpp
index 6a6c3556037..3d7085d94a2 100644
--- a/src/Backups/BackupIO_Disk.cpp
+++ b/src/Backups/BackupIO_Disk.cpp
@@ -119,9 +119,9 @@ DataSourceDescription BackupReaderDisk::getDataSourceDescription() const
     return disk->getDataSourceDescription();
 }
 
-bool BackupWriterDisk::supportNativeCopy(DataSourceDescription data_source_description) const
+bool BackupWriterDisk::supportNativeCopy(DataSourceDescription data_source_description, bool has_throttling) const
 {
-    return data_source_description == disk->getDataSourceDescription();
+    return !has_throttling && data_source_description == disk->getDataSourceDescription();
 }
 
 void BackupWriterDisk::copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name)
diff --git a/src/Backups/BackupIO_Disk.h b/src/Backups/BackupIO_Disk.h
index 600e4f8ff39..10f36eb00b7 100644
--- a/src/Backups/BackupIO_Disk.h
+++ b/src/Backups/BackupIO_Disk.h
@@ -41,7 +41,7 @@ public:
     void removeFiles(const Strings & file_names) override;
     DataSourceDescription getDataSourceDescription() const override;
 
-    bool supportNativeCopy(DataSourceDescription data_source_description) const override;
+    bool supportNativeCopy(DataSourceDescription data_source_description, bool has_throttling) const override;
     void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
 
 private:
diff --git a/src/Backups/BackupIO_File.cpp b/src/Backups/BackupIO_File.cpp
index 5bf6d54928d..03c843043aa 100644
--- a/src/Backups/BackupIO_File.cpp
+++ b/src/Backups/BackupIO_File.cpp
@@ -139,9 +139,9 @@ DataSourceDescription BackupReaderFile::getDataSourceDescription() const
 }
 
 
-bool BackupWriterFile::supportNativeCopy(DataSourceDescription data_source_description) const
+bool BackupWriterFile::supportNativeCopy(DataSourceDescription data_source_description, bool has_throttling) const
 {
-    return data_source_description == getDataSourceDescription();
+    return !has_throttling && data_source_description == getDataSourceDescription();
 }
 
 void BackupWriterFile::copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name)
diff --git a/src/Backups/BackupIO_File.h b/src/Backups/BackupIO_File.h
index e1f4324a39f..c563b1cd6a0 100644
--- a/src/Backups/BackupIO_File.h
+++ b/src/Backups/BackupIO_File.h
@@ -37,7 +37,7 @@ public:
     void removeFile(const String & file_name) override;
     void removeFiles(const Strings & file_names) override;
     DataSourceDescription getDataSourceDescription() const override;
-    bool supportNativeCopy(DataSourceDescription data_source_description) const override;
+    bool supportNativeCopy(DataSourceDescription data_source_description, bool has_throttling) const override;
     void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
 
 private:
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index f7d518b064d..e821af77f41 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -163,7 +163,7 @@ BackupWriterS3::BackupWriterS3(
     const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
     : s3_uri(s3_uri_)
     , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
-    , read_settings(context_->getReadSettings())
+    , read_settings(context_->getBackupReadSettings())
     , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
     , log(&Poco::Logger::get("BackupWriterS3"))
 {
@@ -176,7 +176,7 @@ DataSourceDescription BackupWriterS3::getDataSourceDescription() const
     return DataSourceDescription{DataSourceType::S3, s3_uri.endpoint, false, false};
 }
 
-bool BackupWriterS3::supportNativeCopy(DataSourceDescription data_source_description) const
+bool BackupWriterS3::supportNativeCopy(DataSourceDescription data_source_description, bool /* has_throttling */) const
 {
     return getDataSourceDescription() == data_source_description;
 }
@@ -189,7 +189,7 @@ void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_na
     auto objects = src_disk->getStorageObjects(src_file_name);
     if (objects.size() > 1)
     {
-        auto create_read_buffer = [src_disk, src_file_name] { return src_disk->readFile(src_file_name); };
+        auto create_read_buffer = [this, src_disk, src_file_name] { return src_disk->readFile(src_file_name, read_settings); };
         copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
     }
     else
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index 94e61248428..89f112b861f 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -52,7 +52,7 @@ public:
     void removeFiles(const Strings & file_names) override;
 
     DataSourceDescription getDataSourceDescription() const override;
-    bool supportNativeCopy(DataSourceDescription data_source_description) const override;
+    bool supportNativeCopy(DataSourceDescription data_source_description, bool has_throttling) const override;
     void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
 
 private:
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index 6ff7a41828b..06aff388d32 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -81,7 +81,7 @@ BackupImpl::BackupImpl(
     const std::optional<BackupInfo> & base_backup_info_,
     std::shared_ptr<IBackupReader> reader_,
     const ContextPtr & context_)
-    : context(context_)
+    : has_throttler(static_cast<bool>(context_->getBackupsThrottler()))
     , backup_name_for_logging(backup_name_for_logging_)
     , use_archive(!archive_params_.archive_name.empty())
     , archive_params(archive_params_)
@@ -91,7 +91,7 @@ BackupImpl::BackupImpl(
     , version(INITIAL_BACKUP_VERSION)
     , base_backup_info(base_backup_info_)
 {
-    open();
+    open(context_);
 }
 
 
@@ -105,7 +105,7 @@ BackupImpl::BackupImpl(
     const std::shared_ptr<IBackupCoordination> & coordination_,
     const std::optional<UUID> & backup_uuid_,
     bool deduplicate_files_)
-    : context(context_)
+    : has_throttler(static_cast<bool>(context_->getBackupsThrottler()))
     , backup_name_for_logging(backup_name_for_logging_)
     , use_archive(!archive_params_.archive_name.empty())
     , archive_params(archive_params_)
@@ -119,7 +119,7 @@ BackupImpl::BackupImpl(
     , deduplicate_files(deduplicate_files_)
     , log(&Poco::Logger::get("BackupImpl"))
 {
-    open();
+    open(context_);
 }
 
 
@@ -135,7 +135,7 @@ BackupImpl::~BackupImpl()
     }
 }
 
-void BackupImpl::open()
+void BackupImpl::open(const ContextPtr & context)
 {
     std::lock_guard lock{mutex};
 
@@ -836,7 +836,7 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
 
     /// We need to copy whole file without archive, we can do it faster
     /// if source and destination are compatible
-    if (!use_archive && !context->getBackupsThrottler() && writer->supportNativeCopy(reader_description))
+    if (!use_archive && writer->supportNativeCopy(reader_description, has_throttler))
     {
         /// Should be much faster than writing data through server.
         LOG_TRACE(log, "Will copy file {} using native copy", info.data_file_name);
diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h
index a70f16f411c..e3e33795942 100644
--- a/src/Backups/BackupImpl.h
+++ b/src/Backups/BackupImpl.h
@@ -85,7 +85,7 @@ public:
     bool supportsWritingInMultipleThreads() const override { return !use_archive; }
 
 private:
-    void open();
+    void open(const ContextPtr & context);
     void close();
 
     void openArchive();
@@ -109,7 +109,7 @@ private:
     /// Calculates and sets `compressed_size`.
     void setCompressedSize();
 
-    ContextPtr context;
+    const bool has_throttler;
     const String backup_name_for_logging;
     const bool use_archive;
     const ArchiveParams archive_params;

From dd9f0f409b05446f108e21979ef585299e141bf3 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 4 Apr 2023 11:57:55 +0200
Subject: [PATCH 109/277] Remove knowledge about throttling from
 IBackupWriter::supportNativeCopy()

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Backups/BackupIO.cpp                        |  6 ++++++
 src/Backups/BackupIO.h                          | 17 +++++++++++++++--
 src/Backups/BackupIO_Disk.cpp                   | 13 +++++++------
 src/Backups/BackupIO_Disk.h                     |  5 +++--
 src/Backups/BackupIO_File.cpp                   | 12 +++++++-----
 src/Backups/BackupIO_File.h                     |  5 +++--
 src/Backups/BackupIO_S3.cpp                     |  6 +++---
 src/Backups/BackupIO_S3.h                       |  4 ++--
 src/Backups/BackupImpl.cpp                      |  8 +++-----
 src/Backups/BackupImpl.h                        |  1 -
 .../registerBackupEnginesFileAndDisk.cpp        |  4 ++--
 11 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/src/Backups/BackupIO.cpp b/src/Backups/BackupIO.cpp
index cc252c2f1bd..f78e6df23a8 100644
--- a/src/Backups/BackupIO.cpp
+++ b/src/Backups/BackupIO.cpp
@@ -3,6 +3,7 @@
 #include <IO/copyData.h>
 #include <IO/WriteBufferFromFileBase.h>
 #include <IO/SeekableReadBuffer.h>
+#include <Interpreters/Context.h>
 
 
 namespace DB
@@ -22,6 +23,11 @@ void IBackupReader::copyFileToDisk(const String & file_name, size_t size, DiskPt
     write_buffer->finalize();
 }
 
+IBackupWriter::IBackupWriter(const ContextPtr & context_)
+    : read_settings(context_->getBackupReadSettings())
+    , has_throttling(static_cast<bool>(context_->getBackupsThrottler()))
+{}
+
 void IBackupWriter::copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
 {
     auto read_buffer = create_read_buffer();
diff --git a/src/Backups/BackupIO.h b/src/Backups/BackupIO.h
index 66016c42b18..aef9c14e83e 100644
--- a/src/Backups/BackupIO.h
+++ b/src/Backups/BackupIO.h
@@ -3,6 +3,8 @@
 #include <Core/Types.h>
 #include <Disks/DiskType.h>
 #include <Disks/IDisk.h>
+#include <IO/ReadSettings.h>
+#include <Interpreters/Context_fwd.h>
 
 namespace DB
 {
@@ -28,6 +30,8 @@ class IBackupWriter /// BackupWriterFile, BackupWriterDisk
 public:
     using CreateReadBufferFunction = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 
+    explicit IBackupWriter(const ContextPtr & context_);
+
     virtual ~IBackupWriter() = default;
     virtual bool fileExists(const String & file_name) = 0;
     virtual UInt64 getFileSize(const String & file_name) = 0;
@@ -37,9 +41,18 @@ public:
     virtual void removeFiles(const Strings & file_names) = 0;
     virtual DataSourceDescription getDataSourceDescription() const = 0;
     virtual void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name);
-    virtual bool supportNativeCopy(DataSourceDescription /* data_source_description */, bool /* has_throttling */) const { return false; }
-    // Ignore throttling, copyDataToFile() should be used if throttling was requested.
+    virtual bool supportNativeCopy(DataSourceDescription /* data_source_description */) const { return false; }
+
+    /// Copy file using native copy (optimized for S3 to use CopyObject)
+    ///
+    /// NOTE: It still may fall back to copyDataToFile() if native copy is not possible:
+    /// - different buckets
+    /// - throttling had been requested
     virtual void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name);
+
+protected:
+    const ReadSettings read_settings;
+    const bool has_throttling;
 };
 
 }
diff --git a/src/Backups/BackupIO_Disk.cpp b/src/Backups/BackupIO_Disk.cpp
index 3d7085d94a2..10d7a572f6b 100644
--- a/src/Backups/BackupIO_Disk.cpp
+++ b/src/Backups/BackupIO_Disk.cpp
@@ -50,8 +50,9 @@ void BackupReaderDisk::copyFileToDisk(const String & file_name, size_t size, Dis
 }
 
 
-BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_)
-    : disk(disk_)
+BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_, const ContextPtr & context_)
+    : IBackupWriter(context_)
+    , disk(disk_)
     , path(path_)
 {
 }
@@ -119,9 +120,9 @@ DataSourceDescription BackupReaderDisk::getDataSourceDescription() const
     return disk->getDataSourceDescription();
 }
 
-bool BackupWriterDisk::supportNativeCopy(DataSourceDescription data_source_description, bool has_throttling) const
+bool BackupWriterDisk::supportNativeCopy(DataSourceDescription data_source_description) const
 {
-    return !has_throttling && data_source_description == disk->getDataSourceDescription();
+    return data_source_description == disk->getDataSourceDescription();
 }
 
 void BackupWriterDisk::copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name)
@@ -129,9 +130,9 @@ void BackupWriterDisk::copyFileNative(DiskPtr src_disk, const String & src_file_
     if (!src_disk)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot natively copy data to disk without source disk");
 
-    if ((src_offset != 0) || (src_size != src_disk->getFileSize(src_file_name)))
+    if (has_throttling || (src_offset != 0) || (src_size != src_disk->getFileSize(src_file_name)))
     {
-        auto create_read_buffer = [src_disk, src_file_name] { return src_disk->readFile(src_file_name); };
+        auto create_read_buffer = [this, src_disk, src_file_name] { return src_disk->readFile(src_file_name, read_settings); };
         copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
         return;
     }
diff --git a/src/Backups/BackupIO_Disk.h b/src/Backups/BackupIO_Disk.h
index 10f36eb00b7..be34847000d 100644
--- a/src/Backups/BackupIO_Disk.h
+++ b/src/Backups/BackupIO_Disk.h
@@ -2,6 +2,7 @@
 
 #include <filesystem>
 #include <Backups/BackupIO.h>
+#include <Interpreters/Context_fwd.h>
 
 namespace DB
 {
@@ -30,7 +31,7 @@ private:
 class BackupWriterDisk : public IBackupWriter
 {
 public:
-    BackupWriterDisk(const DiskPtr & disk_, const String & path_);
+    BackupWriterDisk(const DiskPtr & disk_, const String & path_, const ContextPtr & context_);
     ~BackupWriterDisk() override;
 
     bool fileExists(const String & file_name) override;
@@ -41,7 +42,7 @@ public:
     void removeFiles(const Strings & file_names) override;
     DataSourceDescription getDataSourceDescription() const override;
 
-    bool supportNativeCopy(DataSourceDescription data_source_description, bool has_throttling) const override;
+    bool supportNativeCopy(DataSourceDescription data_source_description) const override;
     void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
 
 private:
diff --git a/src/Backups/BackupIO_File.cpp b/src/Backups/BackupIO_File.cpp
index 03c843043aa..d4c9d0cb210 100644
--- a/src/Backups/BackupIO_File.cpp
+++ b/src/Backups/BackupIO_File.cpp
@@ -49,7 +49,9 @@ void BackupReaderFile::copyFileToDisk(const String & file_name, size_t size, Dis
 }
 
 
-BackupWriterFile::BackupWriterFile(const String & path_) : path(path_)
+BackupWriterFile::BackupWriterFile(const String & path_, const ContextPtr & context_)
+    : IBackupWriter(context_)
+    , path(path_)
 {
 }
 
@@ -139,9 +141,9 @@ DataSourceDescription BackupReaderFile::getDataSourceDescription() const
 }
 
 
-bool BackupWriterFile::supportNativeCopy(DataSourceDescription data_source_description, bool has_throttling) const
+bool BackupWriterFile::supportNativeCopy(DataSourceDescription data_source_description) const
 {
-    return !has_throttling && data_source_description == getDataSourceDescription();
+    return data_source_description == getDataSourceDescription();
 }
 
 void BackupWriterFile::copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name)
@@ -152,9 +154,9 @@ void BackupWriterFile::copyFileNative(DiskPtr src_disk, const String & src_file_
     else
         abs_source_path = fs::absolute(src_file_name);
 
-    if ((src_offset != 0) || (src_size != fs::file_size(abs_source_path)))
+    if (has_throttling || (src_offset != 0) || (src_size != fs::file_size(abs_source_path)))
     {
-        auto create_read_buffer = [abs_source_path] { return createReadBufferFromFileBase(abs_source_path, {}); };
+        auto create_read_buffer = [this, abs_source_path] { return createReadBufferFromFileBase(abs_source_path, read_settings); };
         copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
         return;
     }
diff --git a/src/Backups/BackupIO_File.h b/src/Backups/BackupIO_File.h
index c563b1cd6a0..d4b6e13d546 100644
--- a/src/Backups/BackupIO_File.h
+++ b/src/Backups/BackupIO_File.h
@@ -2,6 +2,7 @@
 
 #include <filesystem>
 #include <Backups/BackupIO.h>
+#include <Interpreters/Context_fwd.h>
 
 namespace DB
 {
@@ -27,7 +28,7 @@ private:
 class BackupWriterFile : public IBackupWriter
 {
 public:
-    explicit BackupWriterFile(const String & path_);
+    explicit BackupWriterFile(const String & path_, const ContextPtr & context_);
     ~BackupWriterFile() override;
 
     bool fileExists(const String & file_name) override;
@@ -37,7 +38,7 @@ public:
     void removeFile(const String & file_name) override;
     void removeFiles(const Strings & file_names) override;
     DataSourceDescription getDataSourceDescription() const override;
-    bool supportNativeCopy(DataSourceDescription data_source_description, bool has_throttling) const override;
+    bool supportNativeCopy(DataSourceDescription data_source_description) const override;
     void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
 
 private:
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index e821af77f41..ba5ba170427 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -161,9 +161,9 @@ void BackupReaderS3::copyFileToDisk(const String & file_name, size_t size, DiskP
 
 BackupWriterS3::BackupWriterS3(
     const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
-    : s3_uri(s3_uri_)
+    : IBackupWriter(context_)
+    , s3_uri(s3_uri_)
     , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
-    , read_settings(context_->getBackupReadSettings())
     , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
     , log(&Poco::Logger::get("BackupWriterS3"))
 {
@@ -176,7 +176,7 @@ DataSourceDescription BackupWriterS3::getDataSourceDescription() const
     return DataSourceDescription{DataSourceType::S3, s3_uri.endpoint, false, false};
 }
 
-bool BackupWriterS3::supportNativeCopy(DataSourceDescription data_source_description, bool /* has_throttling */) const
+bool BackupWriterS3::supportNativeCopy(DataSourceDescription data_source_description) const
 {
     return getDataSourceDescription() == data_source_description;
 }
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index 89f112b861f..c32a6b48660 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -7,6 +7,7 @@
 #include <IO/ReadSettings.h>
 #include <IO/S3Common.h>
 #include <Storages/StorageS3Settings.h>
+#include <Interpreters/Context_fwd.h>
 
 
 namespace DB
@@ -52,7 +53,7 @@ public:
     void removeFiles(const Strings & file_names) override;
 
     DataSourceDescription getDataSourceDescription() const override;
-    bool supportNativeCopy(DataSourceDescription data_source_description, bool has_throttling) const override;
+    bool supportNativeCopy(DataSourceDescription data_source_description) const override;
     void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name) override;
 
 private:
@@ -76,7 +77,6 @@ private:
 
     S3::URI s3_uri;
     std::shared_ptr<S3::Client> client;
-    ReadSettings read_settings;
     S3Settings::RequestSettings request_settings;
     Poco::Logger * log;
     std::optional<bool> supports_batch_delete;
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index 06aff388d32..0ab1bf7f997 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -81,8 +81,7 @@ BackupImpl::BackupImpl(
     const std::optional<BackupInfo> & base_backup_info_,
     std::shared_ptr<IBackupReader> reader_,
     const ContextPtr & context_)
-    : has_throttler(static_cast<bool>(context_->getBackupsThrottler()))
-    , backup_name_for_logging(backup_name_for_logging_)
+    : backup_name_for_logging(backup_name_for_logging_)
     , use_archive(!archive_params_.archive_name.empty())
     , archive_params(archive_params_)
     , open_mode(OpenMode::READ)
@@ -105,8 +104,7 @@ BackupImpl::BackupImpl(
     const std::shared_ptr<IBackupCoordination> & coordination_,
     const std::optional<UUID> & backup_uuid_,
     bool deduplicate_files_)
-    : has_throttler(static_cast<bool>(context_->getBackupsThrottler()))
-    , backup_name_for_logging(backup_name_for_logging_)
+    : backup_name_for_logging(backup_name_for_logging_)
     , use_archive(!archive_params_.archive_name.empty())
     , archive_params(archive_params_)
     , open_mode(OpenMode::WRITE)
@@ -836,7 +834,7 @@ void BackupImpl::writeFile(const BackupFileInfo & info, BackupEntryPtr entry)
 
     /// We need to copy whole file without archive, we can do it faster
     /// if source and destination are compatible
-    if (!use_archive && writer->supportNativeCopy(reader_description, has_throttler))
+    if (!use_archive && writer->supportNativeCopy(reader_description))
     {
         /// Should be much faster than writing data through server.
         LOG_TRACE(log, "Will copy file {} using native copy", info.data_file_name);
diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h
index e3e33795942..bf94926c46c 100644
--- a/src/Backups/BackupImpl.h
+++ b/src/Backups/BackupImpl.h
@@ -109,7 +109,6 @@ private:
     /// Calculates and sets `compressed_size`.
     void setCompressedSize();
 
-    const bool has_throttler;
     const String backup_name_for_logging;
     const bool use_archive;
     const ArchiveParams archive_params;
diff --git a/src/Backups/registerBackupEnginesFileAndDisk.cpp b/src/Backups/registerBackupEnginesFileAndDisk.cpp
index 51b14fbc1d8..46f44471e6f 100644
--- a/src/Backups/registerBackupEnginesFileAndDisk.cpp
+++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp
@@ -178,9 +178,9 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
         {
             std::shared_ptr<IBackupWriter> writer;
             if (engine_name == "File")
-                writer = std::make_shared<BackupWriterFile>(path);
+                writer = std::make_shared<BackupWriterFile>(path, params.context);
             else
-                writer = std::make_shared<BackupWriterDisk>(disk, path);
+                writer = std::make_shared<BackupWriterDisk>(disk, path, params.context);
             return std::make_unique<BackupImpl>(
                 backup_name_for_logging,
                 archive_params,

From f0f58de79c815b20e180dc52f304dc723e408d81 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 5 Apr 2023 09:36:22 +0200
Subject: [PATCH 110/277] Remove throttler for IDisk::copyFile()

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Disks/IDisk.cpp | 5 ++---
 src/Disks/IDisk.h   | 4 +---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp
index e966633a43f..4969cc7c700 100644
--- a/src/Disks/IDisk.cpp
+++ b/src/Disks/IDisk.cpp
@@ -27,15 +27,14 @@ bool IDisk::isDirectoryEmpty(const String & path) const
     return !iterateDirectory(path)->isValid();
 }
 
-void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path, const WriteSettings & settings, ThrottlerPtr throttler) /// NOLINT
+void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const String & to_file_path, const WriteSettings & settings) /// NOLINT
 {
     LOG_DEBUG(&Poco::Logger::get("IDisk"), "Copying from {} (path: {}) {} to {} (path: {}) {}.",
               getName(), getPath(), from_file_path, to_disk.getName(), to_disk.getPath(), to_file_path);
 
     auto in = readFile(from_file_path);
     auto out = to_disk.writeFile(to_file_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, settings);
-    std::atomic<int> cancelled;
-    copyDataWithThrottler(*in, *out, cancelled, throttler);
+    copyData(*in, *out);
     out->finalize();
 }
 
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 37b1a41b3cc..4e488bbb39a 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -5,7 +5,6 @@
 #include <base/types.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Exception.h>
-#include <Common/Throttler_fwd.h>
 #include <Disks/Executor.h>
 #include <Disks/DiskType.h>
 #include <IO/ReadSettings.h>
@@ -191,8 +190,7 @@ public:
         const String & from_file_path,
         IDisk & to_disk,
         const String & to_file_path,
-        const WriteSettings & settings = {},
-        ThrottlerPtr throttler = {});
+        const WriteSettings & settings = {});
 
     /// List files at `path` and add their names to `file_names`
     virtual void listFiles(const String & path, std::vector<String> & file_names) const = 0;

From 598b0506283c3a02b36f8ece499fcd83aa83babc Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 5 Apr 2023 07:50:26 +0000
Subject: [PATCH 111/277] AST Fuzzer: Fix assertion in TopK serialization

Problem:
  https://s3.amazonaws.com/clickhouse-test-reports/0/fa5b2bd4a5b02336bca8837c473a7124f8ecedf2/fuzzer_astfuzzerasan/report.html

The new assertion in the Varint code was introduced with (*). It rejects
values whose serialization cannot be deserialized (and this behavior
cannot be changed due to historical reasons). Such values should be
exceptionally rare in practice but AST fuzzer managers to trigger them.

The fix is similar to (**): Bypass the check by limiting the value to
the maximum allowed value.

(if AST fuzzer triggers finds more violations of the assertion, we might
consider throwing an exception instead)

(*) https://github.com/ClickHouse/ClickHouse/pull/48154
(**) https://github.com/ClickHouse/ClickHouse/pull/48154/files#diff-653c0a18dfdaa86262c78dc6b25550add0487f165b4ad053e86f530388f6203a
---
 src/Common/SpaceSaving.h |  4 ++--
 src/IO/VarInt.h          | 30 ++++++++++++------------------
 2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/src/Common/SpaceSaving.h b/src/Common/SpaceSaving.h
index 476e107067b..c83e836eb83 100644
--- a/src/Common/SpaceSaving.h
+++ b/src/Common/SpaceSaving.h
@@ -94,8 +94,8 @@ public:
         void write(WriteBuffer & wb) const
         {
             writeBinary(key, wb);
-            writeVarUInt(count, wb);
-            writeVarUInt(error, wb);
+            writeVarUIntOverflow(count, wb);
+            writeVarUIntOverflow(error, wb);
         }
 
         void read(ReadBuffer & rb)
diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h
index fa1908cf615..a314ea40cd1 100644
--- a/src/IO/VarInt.h
+++ b/src/IO/VarInt.h
@@ -15,25 +15,19 @@ namespace ErrorCodes
 }
 
 
-/** Variable-Length Quantity (VLQ) Base-128 compression
- *
- * NOTE: Due to historical reasons, only up to 1<<63-1 are supported, which
- * cannot be changed without breaking the backward compatibility.
- * Also some drivers may support full 1<<64 range (i.e. python -
- * clickhouse-driver), while others has the same limitations as ClickHouse
- * (i.e. Rust - clickhouse-rs).
- * So implementing VLQ for the whole 1<<64 range will require different set of
- * helpers.
- */
-constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1;
+/// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding.
 
-/** Write UInt64 in variable length format (base128) */
+/// Write UInt64 in variable length format (base128)
 void writeVarUInt(UInt64 x, std::ostream & ostr);
 void writeVarUInt(UInt64 x, WriteBuffer & ostr);
 char * writeVarUInt(UInt64 x, char * ostr);
 
+/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under
+/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the
+/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python))
+constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1;
 
-/** Write UInt64 in variable length format, wrap the value to VAR_UINT_MAX if it exceed VAR_UINT_MAX (to bypass sanity check) */
+/// Write UInt64 in variable length format (base128), limit the value to VAR_UINT_MAX if it exceed VAR_UINT_MAX (to bypass sanity check)
 template <typename ...Args>
 auto writeVarUIntOverflow(UInt64 x, Args && ... args)
 {
@@ -41,20 +35,20 @@ auto writeVarUIntOverflow(UInt64 x, Args && ... args)
 }
 
 
-/** Read UInt64, written in variable length format (base128) */
+/// Read UInt64, written in variable length format (base128)
 void readVarUInt(UInt64 & x, std::istream & istr);
 void readVarUInt(UInt64 & x, ReadBuffer & istr);
 const char * readVarUInt(UInt64 & x, const char * istr, size_t size);
 
 
-/** Get the length of UInt64 in VarUInt format */
+/// Get the length of UInt64 in VarUInt format
 size_t getLengthOfVarUInt(UInt64 x);
 
-/** Get the Int64 length in VarInt format */
+/// Get the Int64 length in VarInt format
 size_t getLengthOfVarInt(Int64 x);
 
 
-/** Write Int64 in variable length format (base128) */
+/// Write Int64 in variable length format (base128)
 template <typename OUT>
 inline void writeVarInt(Int64 x, OUT & ostr)
 {
@@ -67,7 +61,7 @@ inline char * writeVarInt(Int64 x, char * ostr)
 }
 
 
-/** Read Int64, written in variable length format (base128) */
+/// Read Int64, written in variable length format (base128)
 template <typename IN>
 inline void readVarInt(Int64 & x, IN & istr)
 {

From 7f1569dd6e80ff7e3fb82ea2dcb791f6d6ce3fe6 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 5 Apr 2023 10:13:01 +0200
Subject: [PATCH 112/277] Fix 02352_lightweight_delete flakiness (due to index
 granularity randomization)

CI: https://s3.amazonaws.com/clickhouse-test-reports/48242/c8da660d96bea153fa0c1dc8fbdb4a560e139a56/stateless_tests__debug__[4/5].html
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/queries/0_stateless/02352_lightweight_delete.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02352_lightweight_delete.sql b/tests/queries/0_stateless/02352_lightweight_delete.sql
index e1759e56a3a..b13688282a4 100644
--- a/tests/queries/0_stateless/02352_lightweight_delete.sql
+++ b/tests/queries/0_stateless/02352_lightweight_delete.sql
@@ -1,6 +1,6 @@
 DROP TABLE IF EXISTS lwd_test;
 
-CREATE TABLE lwd_test (id UInt64 , value String) ENGINE MergeTree() ORDER BY id;
+CREATE TABLE lwd_test (id UInt64 , value String) ENGINE MergeTree() ORDER BY id SETTINGS index_granularity=8192, index_granularity_bytes='10Mi';
 
 INSERT INTO lwd_test SELECT number, randomString(10) FROM system.numbers LIMIT 1000000;
 

From 8f06972dc5744ff0fa295811056964e0be10bbd4 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 5 Apr 2023 10:18:57 +0200
Subject: [PATCH 113/277] Set index granularity settings for other two
 lightweight delete tests

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../0_stateless/02319_lightweight_delete_on_merge_tree.sql      | 2 +-
 .../02319_lightweight_delete_on_merge_tree_compact_parts.sql    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql
index 00b90bb38b5..9413c664293 100644
--- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql
+++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql
@@ -74,7 +74,7 @@ select table, partition, name, rows from system.parts where database = currentDa
 drop table t_light;
 
 SELECT '-----Test lightweight delete in multi blocks-----';
-CREATE TABLE t_large(a UInt32, b int) ENGINE=MergeTree order BY a settings min_bytes_for_wide_part=0;
+CREATE TABLE t_large(a UInt32, b int) ENGINE=MergeTree order BY a settings min_bytes_for_wide_part=0, index_granularity=8192, index_granularity_bytes='10Mi';
 INSERT INTO t_large SELECT number + 1, number + 1  FROM numbers(100000);
 
 DELETE FROM t_large WHERE a = 50000;
diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql
index 4e9f3db0b96..db0567f252a 100644
--- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql
+++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree_compact_parts.sql
@@ -80,7 +80,7 @@ select table, partition, name, rows from system.parts where database = currentDa
 drop table t_light;
 
 SELECT '-----Test lightweight delete in multi blocks-----';
-CREATE TABLE t_large(a UInt32, b int) ENGINE=MergeTree order BY a settings min_bytes_for_wide_part=0;
+CREATE TABLE t_large(a UInt32, b int) ENGINE=MergeTree order BY a settings min_bytes_for_wide_part=0, index_granularity=8192, index_granularity_bytes='10Mi';
 INSERT INTO t_large SELECT number + 1, number + 1  FROM numbers(100000);
 
 DELETE FROM t_large WHERE a = 50000;

From 0a71656ee6283ef572ab5609812f596b263d5cb4 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 5 Apr 2023 08:45:59 +0000
Subject: [PATCH 114/277] Fix build

---
 base/base/find_symbols.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h
index 01efe7046bc..fe5d3bbadab 100644
--- a/base/base/find_symbols.h
+++ b/base/base/find_symbols.h
@@ -36,7 +36,7 @@
 
 namespace detail
 {
-template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); }
+template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); } // NOLINT(misc-redundant-expression)
 
 #if defined(__SSE2__)
 template <char s0>

From 4008155a6e34fa708eddc3b9648a3b287a34b300 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Wed, 5 Apr 2023 10:54:25 +0200
Subject: [PATCH 115/277] Add max_ prefix for backup_bandwidth settings

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Core/ServerSettings.h                                 | 2 +-
 src/Core/Settings.h                                       | 2 +-
 src/Interpreters/Context.cpp                              | 8 ++++----
 ...dth.reference => 02704_max_backup_bandwidth.reference} | 0
 ..._backup_bandwidth.sh => 02704_max_backup_bandwidth.sh} | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)
 rename tests/queries/0_stateless/{02704_backup_bandwidth.reference => 02704_max_backup_bandwidth.reference} (100%)
 rename tests/queries/0_stateless/{02704_backup_bandwidth.sh => 02704_max_backup_bandwidth.sh} (96%)

diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 8df05f172a4..aabc89cc6d7 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -29,7 +29,7 @@ namespace DB
     M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
     M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
     M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
-    M(UInt64, backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
+    M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
     M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
     M(Int32, max_connections, 1024, "Max server connections.", 0) \
     M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index fe5e1a1a787..e125126429f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -424,7 +424,7 @@ class IColumn;
     M(UInt64, backup_restore_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
     M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \
     M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \
-    M(UInt64, backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
+    M(UInt64, max_backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
     \
     M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
     M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index d3d15671ae3..d97473d0a36 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2430,20 +2430,20 @@ ThrottlerPtr Context::getBackupsThrottler() const
 {
     ThrottlerPtr throttler;
 
-    if (shared->server_settings.backup_bandwidth_for_server)
+    if (shared->server_settings.max_backup_bandwidth_for_server)
     {
         auto lock = getLock();
         if (!shared->backups_server_throttler)
-            shared->backups_server_throttler = std::make_shared<Throttler>(shared->server_settings.backup_bandwidth_for_server);
+            shared->backups_server_throttler = std::make_shared<Throttler>(shared->server_settings.max_backup_bandwidth_for_server);
         throttler = shared->backups_server_throttler;
     }
 
     const auto & query_settings = getSettingsRef();
-    if (query_settings.backup_bandwidth)
+    if (query_settings.max_backup_bandwidth)
     {
         auto lock = getLock();
         if (!backups_query_throttler)
-            backups_query_throttler = std::make_shared<Throttler>(query_settings.backup_bandwidth, throttler);
+            backups_query_throttler = std::make_shared<Throttler>(query_settings.max_backup_bandwidth, throttler);
         throttler = backups_query_throttler;
     }
 
diff --git a/tests/queries/0_stateless/02704_backup_bandwidth.reference b/tests/queries/0_stateless/02704_max_backup_bandwidth.reference
similarity index 100%
rename from tests/queries/0_stateless/02704_backup_bandwidth.reference
rename to tests/queries/0_stateless/02704_max_backup_bandwidth.reference
diff --git a/tests/queries/0_stateless/02704_backup_bandwidth.sh b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh
similarity index 96%
rename from tests/queries/0_stateless/02704_backup_bandwidth.sh
rename to tests/queries/0_stateless/02704_max_backup_bandwidth.sh
index 2add23501e9..c9ad23031b6 100755
--- a/tests/queries/0_stateless/02704_backup_bandwidth.sh
+++ b/tests/queries/0_stateless/02704_max_backup_bandwidth.sh
@@ -14,7 +14,7 @@ $CLICKHOUSE_CLIENT -nm -q "
 $CLICKHOUSE_CLIENT -q "insert into data select * from numbers(1e6)"
 
 query_id=$(random_str 10)
-$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('default', 'backups/$CLICKHOUSE_DATABASE/data/backup1')" --backup_bandwidth=1M > /dev/null
+$CLICKHOUSE_CLIENT --query_id "$query_id" -q "backup table data to Disk('default', 'backups/$CLICKHOUSE_DATABASE/data/backup1')" --max_backup_bandwidth=1M > /dev/null
 $CLICKHOUSE_CLIENT -nm -q "
     SYSTEM FLUSH LOGS;
     SELECT

From f7586e38ed3c063fa11d6783e5dbb2a47ca53f8d Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 5 Apr 2023 08:56:01 +0000
Subject: [PATCH 116/277] Fix flaky test_keeper_snapshots

---
 tests/integration/test_keeper_snapshots/test.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py
index ce57a852dca..2e126ed1152 100644
--- a/tests/integration/test_keeper_snapshots/test.py
+++ b/tests/integration/test_keeper_snapshots/test.py
@@ -57,9 +57,10 @@ def restart_clickhouse():
 
 
 def test_state_after_restart(started_cluster):
+    keeper_utils.wait_until_connected(started_cluster, node)
+    node_zk = None
+    node_zk2 = None
     try:
-        node_zk = None
-        node_zk2 = None
         node_zk = get_connection_zk("node")
 
         node_zk.create("/test_state_after_restart", b"somevalue")
@@ -108,9 +109,10 @@ def test_state_after_restart(started_cluster):
 
 
 def test_ephemeral_after_restart(started_cluster):
+    keeper_utils.wait_until_connected(started_cluster, node)
+    node_zk = None
+    node_zk2 = None
     try:
-        node_zk = None
-        node_zk2 = None
         node_zk = get_connection_zk("node")
 
         session_id = node_zk._session_id

From 0d5d2a9b557c808fb2985824479c1c2c358fb2c4 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 5 Apr 2023 08:26:00 +0000
Subject: [PATCH 117/277] Implement %f in parseDateTime()

Fixes: #48394

@cc OP
---
 .../functions/type-conversion-functions.md    |  1 -
 src/Functions/parseDateTime.cpp               | 36 +++++++++++++++++--
 .../02668_parse_datetime.reference            |  7 ++++
 .../0_stateless/02668_parse_datetime.sql      |  7 ++++
 4 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 213ed187f15..5ce72caa3b9 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -1245,7 +1245,6 @@ Returns DateTime values parsed from input string according to a MySQL style form
 **Supported format specifiers**
 
 All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except:
-- %f: fractional second
 - %Q: Quarter (1-4) 
 
 **Example**
diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index 6077e00f915..b929bc45878 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -1035,6 +1035,36 @@ namespace
                 return cur;
             }
 
+            static Pos mysqlMicrosecond(Pos cur, Pos end, const String & fragment, DateTime & /*date*/)
+            {
+                checkSpace(cur, end, 6, "mysqlMicrosecond requires size >= 6", fragment);
+
+                Pos start = cur;
+                auto check_is_number = [&](Pos pos) {
+                    if (*pos < '0' || *pos > '9')
+                        throw Exception(
+                            ErrorCodes::CANNOT_PARSE_DATETIME,
+                            "Unable to parse fragment '{}' from '{}' because '{}'' is not a number ",
+                            fragment,
+                            std::string_view(start, end),
+                            *cur);
+                };
+
+                check_is_number(cur);
+                ++cur;
+                check_is_number(cur);
+                ++cur;
+                check_is_number(cur);
+                ++cur;
+                check_is_number(cur);
+                ++cur;
+                check_is_number(cur);
+                ++cur;
+                check_is_number(cur);
+                ++cur;
+                return cur;
+            }
+
             static Pos mysqlISO8601Time(Pos cur, Pos end, const String & fragment, DateTime & date)
             {
                 checkSpace(cur, end, 8, "mysqlISO8601Time requires size >= 8", fragment);
@@ -1446,6 +1476,10 @@ namespace
                             instructions.emplace_back(ACTION_ARGS(Instruction::mysqlDayOfMonthSpacePadded));
                             break;
 
+                        // Fractional seconds
+                        case 'f':
+                            instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMicrosecond));
+                            break;
 
                         // Short YYYY-MM-DD date, equivalent to %Y-%m-%d   2001-08-23
                         case 'F':
@@ -1593,8 +1627,6 @@ namespace
                         /// Unimplemented
 
                         /// Fractional seconds
-                        case 'f':
-                            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for fractional seconds");
                         case 'U':
                             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for WEEK (Sun-Sat)");
                         case 'v':
diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference
index 6bcd4a42c10..a5b5ad7d109 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.reference
+++ b/tests/queries/0_stateless/02668_parse_datetime.reference
@@ -190,6 +190,13 @@ select parseDateTime('00/', '%s/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'U
 select parseDateTime('60', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('-1', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('123456789', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+-- microsecond
+select parseDateTime('000000', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC');
+1
+select parseDateTime('456789', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC');
+1
+select parseDateTime('42', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError NOT_ENOUGH_SPACE }
+select parseDateTime('12ABCD', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 -- mixed YMD format
 select parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s', 'UTC') = toDateTime('2021-01-04 23:00:00', 'UTC');
 1
diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql
index abe3505de03..33e84120521 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.sql
+++ b/tests/queries/0_stateless/02668_parse_datetime.sql
@@ -127,6 +127,13 @@ select parseDateTime('00/', '%s/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'U
 select parseDateTime('60', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('-1', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('123456789', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+
+-- microsecond
+select parseDateTime('000000', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC');
+select parseDateTime('456789', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC');
+select parseDateTime('42', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError NOT_ENOUGH_SPACE }
+select parseDateTime('12ABCD', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+
 -- mixed YMD format
 select parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s', 'UTC') = toDateTime('2021-01-04 23:00:00', 'UTC');
 select parseDateTime('2019-07-03 11:04:10', '%Y-%m-%d %H:%i:%s', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');

From 2276d4feb478a133afe2779706b01bfcd726b731 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 5 Apr 2023 11:19:04 +0200
Subject: [PATCH 118/277] Backups have no context and no process list element

---
 src/Backups/BackupEntriesCollector.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp
index 4e904bdc402..14ecccb0c81 100644
--- a/src/Backups/BackupEntriesCollector.cpp
+++ b/src/Backups/BackupEntriesCollector.cpp
@@ -490,7 +490,7 @@ std::vector<std::pair<ASTPtr, StoragePtr>> BackupEntriesCollector::findTablesInD
     {
         /// Database or table could be replicated - so may use ZooKeeper. We need to retry.
         auto zookeeper_retries_info = global_zookeeper_retries_info;
-        ZooKeeperRetriesControl retries_ctl("getTablesForBackup", zookeeper_retries_info, context->getProcessListElement());
+        ZooKeeperRetriesControl retries_ctl("getTablesForBackup", zookeeper_retries_info, nullptr);
         retries_ctl.retryLoop([&](){ db_tables = database->getTablesForBackup(filter_by_table_name, context); });
     }
     catch (Exception & e)

From 7fe06f8435f819f530efe55e3a135640bf3681ef Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 30 Mar 2023 11:59:51 +0000
Subject: [PATCH 119/277] Do not take lock for shared context in
 setTempDataOnDisk

---
 src/Interpreters/Context.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 522107dccc9..e155536af5f 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -754,15 +754,17 @@ VolumePtr Context::getTemporaryVolume() const
 
 TemporaryDataOnDiskScopePtr Context::getTempDataOnDisk() const
 {
-    auto lock = getLock();
     if (this->temp_data_on_disk)
         return this->temp_data_on_disk;
+
+    auto lock = getLock();
     return shared->temp_data_on_disk;
 }
 
 void Context::setTempDataOnDisk(TemporaryDataOnDiskScopePtr temp_data_on_disk_)
 {
-    auto lock = getLock();
+    /// It's set from `ProcessList::insert` in `executeQueryImpl` before query execution
+    /// so no races with `getTempDataOnDisk` which is called from query execution.
     this->temp_data_on_disk = std::move(temp_data_on_disk_);
 }
 
@@ -846,7 +848,7 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s
 {
     std::lock_guard lock(shared->storage_policies_mutex);
 
-     StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name);
+    StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name);
     if (tmp_policy->getVolumes().size() != 1)
             throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
             "Policy '{}' is used temporary files, such policy should have exactly one volume", policy_name);

From f46f098c78edd740fd86a7844e4f9fa43ff64482 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 5 Apr 2023 09:55:49 +0000
Subject: [PATCH 120/277] Better

---
 src/Formats/ProtobufSerializer.cpp             | 12 ++++++------
 ...protobuf_unnamed_tuple_as_nested_message.sh | 18 ++----------------
 .../format_schemas/02707_schema.proto          | 11 +++++++++++
 3 files changed, 19 insertions(+), 22 deletions(-)
 create mode 100644 tests/queries/0_stateless/format_schemas/02707_schema.proto

diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp
index 00ef659c0ff..2017b4c1d7f 100644
--- a/src/Formats/ProtobufSerializer.cpp
+++ b/src/Formats/ProtobufSerializer.cpp
@@ -3453,7 +3453,7 @@ namespace
                     const auto & tuple_data_type = assert_cast<const DataTypeTuple &>(*data_type);
                     size_t size_of_tuple = tuple_data_type.getElements().size();
 
-                    if (field_descriptor.message_type())
+                    if (auto * message_type = field_descriptor.message_type())
                     {
                         bool have_explicit_names = tuple_data_type.haveExplicitNames();
                         Names element_names;
@@ -3464,15 +3464,15 @@ namespace
                         else
                         {
                             /// Match unnamed Tuple elements and Message fields by position.
-                            size_t field_count = field_descriptor.message_type()->field_count();
-                            if (field_count != tuple_data_type.getElements().size())
+                            size_t field_count = message_type->field_count();
+                            if (field_count != size_of_tuple)
                                 throw Exception(
                                     ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS,
                                     "The number of fields in Protobuf message ({}) is not equal to the number of elements in unnamed Tuple ({})",
                                     field_count,
-                                    tuple_data_type.getElements().size());
+                                    size_of_tuple);
                             for (size_t i = 0; i != field_count; ++i)
-                                element_names.push_back(field_descriptor.message_type()->field(static_cast<int>(i))->name());
+                                element_names.push_back(message_type->field(static_cast<int>(i))->name());
                         }
 
                         /// Try to serialize as a nested message.
@@ -3481,7 +3481,7 @@ namespace
                             size_of_tuple,
                             element_names.data(),
                             tuple_data_type.getElements().data(),
-                            *field_descriptor.message_type(),
+                            *message_type,
                             /* with_length_delimiter = */ false,
                             google_wrappers_special_treatment,
                             &field_descriptor,
diff --git a/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh b/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh
index af0ddc88145..735117c6603 100755
--- a/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh
+++ b/tests/queries/0_stateless/02707_protobuf_unnamed_tuple_as_nested_message.sh
@@ -4,21 +4,7 @@
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
+SCHEMADIR=$CURDIR/format_schemas
 
-echo 'syntax = "proto3";
-
-message Nested {
-    int32 a = 1;
-    string b = 2;
-    repeated int32 c = 3;
-};
-
-message Message {
-    Nested x = 1;
-};' > 02707_schema_$CLICKHOUSE_TEST_UNIQUE_NAME.proto
-
-
-$CLICKHOUSE_LOCAL -q "select tuple(42, 'Hello', [1,2,3]) as x format Protobuf settings format_schema='02707_schema_$CLICKHOUSE_TEST_UNIQUE_NAME:Message'" | $CLICKHOUSE_LOCAL --input-format Protobuf --structure='x Tuple(UInt32, String, Array(UInt32))' -q "select * from table" --format_schema="02707_schema_$CLICKHOUSE_TEST_UNIQUE_NAME:Message"
-
-rm 02707_schema_$CLICKHOUSE_TEST_UNIQUE_NAME.proto
+$CLICKHOUSE_LOCAL -q "select tuple(42, 'Hello', [1,2,3]) as x format Protobuf settings format_schema='$SCHEMADIR/02707_schema:Message'" | $CLICKHOUSE_LOCAL --input-format Protobuf --structure='x Tuple(UInt32, String, Array(UInt32))' -q "select * from table" --format_schema="$SCHEMADIR/02707_schema:Message"
 
diff --git a/tests/queries/0_stateless/format_schemas/02707_schema.proto b/tests/queries/0_stateless/format_schemas/02707_schema.proto
new file mode 100644
index 00000000000..afbc1f854b1
--- /dev/null
+++ b/tests/queries/0_stateless/format_schemas/02707_schema.proto
@@ -0,0 +1,11 @@
+syntax = "proto3";
+
+message Nested {
+    int32 a = 1;
+    string b = 2;
+    repeated int32 c = 3;
+};
+
+message Message {
+    Nested x = 1;
+};

From 8bd997d6801e76fc6d707b98cc7a8aa0dd426adc Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 5 Apr 2023 11:57:41 +0200
Subject: [PATCH 121/277] Update docs/en/interfaces/formats.md

Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com>
---
 docs/en/interfaces/formats.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 298239b2a1a..5f66a70a8d2 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -1236,7 +1236,7 @@ For output it uses the following correspondence between ClickHouse types and BSO
 |-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
 | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                  | `\x08` boolean                                                                                                |
 | [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md)        | `\x10` int32                                                                                                  |
-| [Int16/UInt16(/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md)      | `\x10` int32                                                                                                  |
+| [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md)      | `\x10` int32                                                                                                  |
 | [Int32](/docs/en/sql-reference/data-types/int-uint.md)                                                                | `\x10` int32                                                                                                  |
 | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)                                                               | `\x12` int64                                                                                                  |
 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                         | `\x12` int64                                                                                                  |

From bae1286b634b34e58cf898eb5a8c7153fa19e2fe Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 5 Apr 2023 10:07:44 +0000
Subject: [PATCH 122/277] Update temp_data_on_disk in shared context

---
 src/Interpreters/Context.cpp     | 50 ++++++++++++++++++++++++--------
 src/Interpreters/Context.h       |  1 +
 src/Interpreters/ProcessList.cpp |  2 +-
 3 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index e155536af5f..25fd5db2529 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -219,7 +219,11 @@ struct ContextSharedPart : boost::noncopyable
     ConfigurationPtr config;                                /// Global configuration settings.
 
     String tmp_path;                                        /// Path to the temporary files that occur when processing the request.
-    TemporaryDataOnDiskScopePtr temp_data_on_disk;          /// Temporary files that occur when processing the request accounted here.
+
+    /// All temporary files that occur when processing the requests accounted here.
+    /// Child scopes for more fine-grained accounting are created per user/query/etc.
+    /// Initialized once during server startup.
+    TemporaryDataOnDiskScopePtr root_temp_data_on_disk;
 
     mutable std::unique_ptr<EmbeddedDictionaries> embedded_dictionaries;    /// Metrica's dictionaries. Have lazy initialization.
     mutable std::unique_ptr<ExternalDictionariesLoader> external_dictionaries_loader;
@@ -747,8 +751,10 @@ Strings Context::getWarnings() const
 VolumePtr Context::getTemporaryVolume() const
 {
     auto lock = getLock();
-    if (shared->temp_data_on_disk)
-        return shared->temp_data_on_disk->getVolume();
+    /// Calling this method we just bypass the `temp_data_on_disk` and write to the file on the volume directly.
+    /// Volume is the same for `root_temp_data_on_disk` (always set) and `temp_data_on_disk` (if it's set).
+    if (shared->root_temp_data_on_disk)
+        return shared->root_temp_data_on_disk->getVolume();
     return nullptr;
 }
 
@@ -758,7 +764,13 @@ TemporaryDataOnDiskScopePtr Context::getTempDataOnDisk() const
         return this->temp_data_on_disk;
 
     auto lock = getLock();
-    return shared->temp_data_on_disk;
+    return shared->root_temp_data_on_disk;
+}
+
+TemporaryDataOnDiskScopePtr Context::getSharedTempDataOnDisk() const
+{
+    auto lock = getLock();
+    return shared->root_temp_data_on_disk;
 }
 
 void Context::setTempDataOnDisk(TemporaryDataOnDiskScopePtr temp_data_on_disk_)
@@ -774,7 +786,7 @@ void Context::setPath(const String & path)
 
     shared->path = path;
 
-    if (shared->tmp_path.empty() && !shared->temp_data_on_disk)
+    if (shared->tmp_path.empty() && !shared->root_temp_data_on_disk)
         shared->tmp_path = shared->path + "tmp/";
 
     if (shared->flags_path.empty())
@@ -830,6 +842,11 @@ static VolumePtr createLocalSingleDiskVolume(const std::string & path)
 
 void Context::setTemporaryStoragePath(const String & path, size_t max_size)
 {
+    auto lock = getLock();
+
+    if (shared->root_temp_data_on_disk)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary storage is already set");
+
     shared->tmp_path = path;
     if (!shared->tmp_path.ends_with('/'))
         shared->tmp_path += '/';
@@ -841,14 +858,19 @@ void Context::setTemporaryStoragePath(const String & path, size_t max_size)
         setupTmpPath(shared->log, disk->getPath());
     }
 
-    shared->temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, max_size);
+    shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, max_size);
 }
 
 void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_size)
 {
-    std::lock_guard lock(shared->storage_policies_mutex);
+    auto lock = getLock();
 
-    StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name);
+    if (shared->root_temp_data_on_disk)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary storage is already set");
+
+    std::lock_guard storage_policies_lock(shared->storage_policies_mutex);
+
+    StoragePolicyPtr tmp_policy = getStoragePolicySelector(storage_policies_lock)->get(policy_name);
     if (tmp_policy->getVolumes().size() != 1)
             throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
             "Policy '{}' is used temporary files, such policy should have exactly one volume", policy_name);
@@ -876,12 +898,16 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s
         setupTmpPath(shared->log, disk->getPath());
     }
 
-    shared->temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, max_size);
+    shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, max_size);
 }
 
-
 void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t max_size)
 {
+    auto lock = getLock();
+
+    if (shared->root_temp_data_on_disk)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary storage is already set");
+
     auto disk_ptr = getDisk(cache_disk_name);
     if (!disk_ptr)
         throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Disk '{}' is not found", cache_disk_name);
@@ -898,7 +924,7 @@ void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t
 
     shared->tmp_path = file_cache->getBasePath();
     VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path);
-    shared->temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, file_cache.get(), max_size);
+    shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, file_cache.get(), max_size);
 }
 
 void Context::setFlagsPath(const String & path)
@@ -3337,7 +3363,7 @@ void Context::shutdown()
     }
 
     /// Special volumes might also use disks that require shutdown.
-    auto & tmp_data = shared->temp_data_on_disk;
+    auto & tmp_data = shared->root_temp_data_on_disk;
     if (tmp_data && tmp_data->getVolume())
     {
         auto & disks = tmp_data->getVolume()->getDisks();
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index bbfbd4defdc..de1f053003e 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -479,6 +479,7 @@ public:
     VolumePtr getTemporaryVolume() const; /// TODO: remove, use `getTempDataOnDisk`
 
     TemporaryDataOnDiskScopePtr getTempDataOnDisk() const;
+    TemporaryDataOnDiskScopePtr getSharedTempDataOnDisk() const;
     void setTempDataOnDisk(TemporaryDataOnDiskScopePtr temp_data_on_disk_);
 
     void setPath(const String & path);
diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index d66d4bdea64..51053bd2884 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -625,7 +625,7 @@ ProcessListForUser::ProcessListForUser(ContextPtr global_context, ProcessList *
     if (global_context)
     {
         size_t size_limit = global_context->getSettingsRef().max_temporary_data_on_disk_size_for_user;
-        user_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(global_context->getTempDataOnDisk(), size_limit);
+        user_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(global_context->getSharedTempDataOnDisk(), size_limit);
     }
 }
 

From 47e7e19ca9c62cea5c1b25a96391d3744c704b94 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 5 Apr 2023 10:12:42 +0000
Subject: [PATCH 123/277] Move suffix generation code into one function

---
 src/Interpreters/FillingRow.cpp               |   5 +
 src/Interpreters/FillingRow.h                 |   1 +
 .../Transforms/FillingTransform.cpp           | 136 ++++++++++--------
 src/Processors/Transforms/FillingTransform.h  |  12 +-
 4 files changed, 92 insertions(+), 62 deletions(-)

diff --git a/src/Interpreters/FillingRow.cpp b/src/Interpreters/FillingRow.cpp
index 9030d2be5d4..05795842902 100644
--- a/src/Interpreters/FillingRow.cpp
+++ b/src/Interpreters/FillingRow.cpp
@@ -45,6 +45,11 @@ bool FillingRow::operator==(const FillingRow & other) const
     return true;
 }
 
+bool FillingRow::operator>=(const FillingRow & other) const
+{
+    return !(*this < other);
+}
+
 bool FillingRow::next(const FillingRow & to_row)
 {
     const size_t row_size = size();
diff --git a/src/Interpreters/FillingRow.h b/src/Interpreters/FillingRow.h
index 9c5828d4282..c56bd875151 100644
--- a/src/Interpreters/FillingRow.h
+++ b/src/Interpreters/FillingRow.h
@@ -28,6 +28,7 @@ public:
     size_t size() const { return row.size(); }
     bool operator<(const FillingRow & other) const;
     bool operator==(const FillingRow & other) const;
+    bool operator>=(const FillingRow & other) const;
 
     int getDirection(size_t index) const { return sort_description[index].direction; }
     FillColumnDescription & getFillDescription(size_t index) { return sort_description[index].fill_description; }
diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp
index 5adc14d29c6..9a57c6526ca 100644
--- a/src/Processors/Transforms/FillingTransform.cpp
+++ b/src/Processors/Transforms/FillingTransform.cpp
@@ -251,39 +251,24 @@ FillingTransform::FillingTransform(
             interpolate_column_positions.push_back(header_.getPositionByName(name));
 }
 
-/// prepare() is overrididen to cover cases when we need to generate rows for no input (so chunk in transform() will have no rows)
-/// (1) when all data are processed and WITH FILL .. TO is provided, we may need to generate suffix
-/// (2) for empty result set when WITH FILL FROM .. TO is provided (see PR #30888) (first and generate_suffix are both true)
+/// prepare() is overrididen to call transform() after all chunks are processed
+/// it can be necessary for suffix generation in case of WITH FILL .. TO is provided
 IProcessor::Status FillingTransform::prepare()
 {
-    if (input.isFinished() && !output.isFinished() && !has_input && !generate_suffix)
+    if (input.isFinished() && !output.isFinished() && !has_input && !all_chunks_processed)
     {
-        logDebug("prepare()", "check if need to generate suffix");
+        logDebug("prepare()", "all chunks processed");
+        all_chunks_processed = true;
 
-        should_insert_first = next_row < filling_row || first;
-
-        for (size_t i = 0, size = filling_row.size(); i < size; ++i)
-            next_row[i] = filling_row.getFillDescription(i).fill_to;
-
-        logDebug("prepare() filling_row", filling_row);
-        logDebug("prepare() next_row", next_row);
-        logDebug("prepare() first", first);
-
-        if (first || filling_row < next_row)
+        /// push output data to output port if we can
+        if (has_output && output.canPush())
         {
-            /// push output data to output port if we can
-            if (has_output && output.canPush())
-            {
-                output.pushData(std::move(output_data));
-                has_output = false;
-            }
-
-            logDebug("prepare()", "need to generate suffix");
-
-            generate_suffix = true;
-            /// return Ready to call transform() for generating filling rows after latest chunk was processed
-            return Status::Ready;
+            output.pushData(std::move(output_data));
+            has_output = false;
         }
+
+        /// return Ready to call transform() for generating filling rows after latest chunk was processed
+        return Status::Ready;
     }
 
     return ISimpleTransform::prepare();
@@ -346,8 +331,10 @@ static void insertFromFillingRow(const MutableColumnRawPtrs & filling_columns, c
             interpolate_columns[i]->insertFrom(*columns[i]->convertToFullColumnIfConst(), 0);
     }
     else
+    {
         for (auto * interpolate_column : interpolate_columns)
             interpolate_column->insertDefault();
+    }
 
     for (auto * other_column : other_columns)
         other_column->insertDefault();
@@ -398,14 +385,70 @@ void FillingTransform::initColumns(
     initColumnsByPositions(non_const_columns, input_other_columns, output_columns, output_other_columns, other_column_positions);
 }
 
+bool FillingTransform::generateSuffixIfNeeded(const Columns & input_columns, MutableColumns & result_columns)
+{
+    logDebug("generateSuffixIfNeeded() filling_row", filling_row);
+    logDebug("generateSuffixIfNeeded() next_row", next_row);
+    logDebug("generateSuffixIfNeeded() first", first);
+
+    /// Determines should we insert filling row before start generating next rows.
+    bool should_insert_first = next_row < filling_row || first;
+
+    for (size_t i = 0, size = filling_row.size(); i < size; ++i)
+        next_row[i] = filling_row.getFillDescription(i).fill_to;
+
+    logDebug("generateSuffixIfNeeded() next_row updated", next_row);
+
+    if (!first && filling_row >= next_row)
+    {
+        logDebug("generateSuffixIfNeeded()", "no need to generate suffix");
+        return false;
+    }
+
+    Columns input_fill_columns;
+    Columns input_interpolate_columns;
+    Columns input_other_columns;
+    MutableColumnRawPtrs res_fill_columns;
+    MutableColumnRawPtrs res_interpolate_columns;
+    MutableColumnRawPtrs res_other_columns;
+
+    initColumns(
+        input_columns,
+        input_fill_columns,
+        input_interpolate_columns,
+        input_other_columns,
+        result_columns,
+        res_fill_columns,
+        res_interpolate_columns,
+        res_other_columns);
+
+    if (first)
+        filling_row.initFromDefaults();
+
+    Block interpolate_block;
+    if (should_insert_first && filling_row < next_row)
+    {
+        interpolate(result_columns, interpolate_block);
+        insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
+    }
+
+    while (filling_row.next(next_row))
+    {
+        interpolate(result_columns, interpolate_block);
+        insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
+    }
+
+    return true;
+}
+
 void FillingTransform::transform(Chunk & chunk)
 {
     logDebug("new chunk rows", chunk.getNumRows());
-    logDebug("generate suffix", generate_suffix);
+    logDebug("all chunks processed", all_chunks_processed);
 
     /// if got chunk with no rows and it's not for suffix generation, then just skip it
     /// Note: ExpressionTransform can return chunk with no rows, see 02579_fill_empty_chunk.sql for example
-    if (!chunk.hasRows() && !generate_suffix)
+    if (!chunk.hasRows() && !all_chunks_processed)
         return;
 
     Columns old_fill_columns;
@@ -418,38 +461,19 @@ void FillingTransform::transform(Chunk & chunk)
 
     Block interpolate_block;
 
-    if (generate_suffix)
+    if (all_chunks_processed)
     {
         chassert(!chunk.hasRows());
 
-        const auto & empty_columns = input.getHeader().getColumns();
-        initColumns(
-            empty_columns,
-            old_fill_columns,
-            old_interpolate_columns,
-            old_other_columns,
-            result_columns,
-            res_fill_columns,
-            res_interpolate_columns,
-            res_other_columns);
-
-        if (first)
-            filling_row.initFromDefaults();
-
-        if (should_insert_first && filling_row < next_row)
+        /// if all chunks are processed, then we may need to generate suffix for the following cases:
+        /// (1) when all data are processed and WITH FILL .. TO is provided
+        /// (2) for empty result set when WITH FILL FROM .. TO is provided (see PR #30888)
+        if (generateSuffixIfNeeded(input.getHeader().getColumns(), result_columns))
         {
-            interpolate(result_columns, interpolate_block);
-            insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
+            size_t num_output_rows = result_columns[0]->size();
+            chunk.setColumns(std::move(result_columns), num_output_rows);
         }
 
-        while (filling_row.next(next_row))
-        {
-            interpolate(result_columns, interpolate_block);
-            insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
-        }
-
-        size_t num_output_rows = result_columns[0]->size();
-        chunk.setColumns(std::move(result_columns), num_output_rows);
         return;
     }
 
@@ -495,7 +519,7 @@ void FillingTransform::transform(Chunk & chunk)
         logDebug("filling_row", filling_row);
         logDebug("next_row", next_row);
 
-        should_insert_first = next_row < filling_row;
+        bool should_insert_first = next_row < filling_row;
         logDebug("should_insert_first", true);
 
         for (size_t i = 0, size = filling_row.size(); i < size; ++i)
diff --git a/src/Processors/Transforms/FillingTransform.h b/src/Processors/Transforms/FillingTransform.h
index 0f917e8889a..7aa5e4c1e8a 100644
--- a/src/Processors/Transforms/FillingTransform.h
+++ b/src/Processors/Transforms/FillingTransform.h
@@ -42,6 +42,10 @@ private:
         MutableColumnRawPtrs & output_interpolate_columns,
         MutableColumnRawPtrs & output_other_columns);
 
+    bool generateSuffixIfNeeded(
+        const Columns & input_columns,
+        MutableColumns & result_columns);
+
     const SortDescription sort_description; /// Contains only columns with WITH FILL.
     const InterpolateDescriptionPtr interpolate_description; /// Contains INTERPOLATE columns
 
@@ -54,13 +58,9 @@ private:
     Positions other_column_positions;
     std::vector<std::pair<size_t, NameAndTypePair>> input_positions; /// positions in result columns required for actions
     ExpressionActionsPtr interpolate_actions;
-    bool first = true;              /// flag to determine if transform is/will be called for the first time
-    bool generate_suffix = false;   /// flag to determine if we need to generate filling rows after latest chunk is processed (only for WITH FILL ... TO)
-
     Columns last_row;
-
-    /// Determines should we insert filling row before start generating next rows.
-    bool should_insert_first = false;
+    bool first = true;              /// flag to determine if transform is/will be called for the first time
+    bool all_chunks_processed = false;    /// flag to determine if we have already processed all chunks
 };
 
 class FillingNoopTransform : public ISimpleTransform

From f6c46153fb43d07bd4a61aed0768d8319569375d Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 5 Apr 2023 10:22:26 +0000
Subject: [PATCH 124/277] Rename old_*_columns to input_*_columns

---
 .../Transforms/FillingTransform.cpp           | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp
index 9a57c6526ca..4c5b74aad7c 100644
--- a/src/Processors/Transforms/FillingTransform.cpp
+++ b/src/Processors/Transforms/FillingTransform.cpp
@@ -451,9 +451,9 @@ void FillingTransform::transform(Chunk & chunk)
     if (!chunk.hasRows() && !all_chunks_processed)
         return;
 
-    Columns old_fill_columns;
-    Columns old_interpolate_columns;
-    Columns old_other_columns;
+    Columns input_fill_columns;
+    Columns input_interpolate_columns;
+    Columns input_other_columns;
     MutableColumnRawPtrs res_fill_columns;
     MutableColumnRawPtrs res_interpolate_columns;
     MutableColumnRawPtrs res_other_columns;
@@ -480,12 +480,12 @@ void FillingTransform::transform(Chunk & chunk)
     chassert(chunk.hasRows());
 
     const size_t num_rows = chunk.getNumRows();
-    auto old_columns = chunk.detachColumns();
+    auto input_columns = chunk.detachColumns();
     initColumns(
-        old_columns,
-        old_fill_columns,
-        old_interpolate_columns,
-        old_other_columns,
+        input_columns,
+        input_fill_columns,
+        input_interpolate_columns,
+        input_other_columns,
         result_columns,
         res_fill_columns,
         res_interpolate_columns,
@@ -495,7 +495,7 @@ void FillingTransform::transform(Chunk & chunk)
     {
         for (size_t i = 0, size = filling_row.size(); i < size; ++i)
         {
-            auto current_value = (*old_fill_columns[i])[0];
+            auto current_value = (*input_fill_columns[i])[0];
             const auto & fill_from = filling_row.getFillDescription(i).fill_from;
 
             if (!fill_from.isNull() && !equals(current_value, fill_from))
@@ -520,11 +520,11 @@ void FillingTransform::transform(Chunk & chunk)
         logDebug("next_row", next_row);
 
         bool should_insert_first = next_row < filling_row;
-        logDebug("should_insert_first", true);
+        logDebug("should_insert_first", should_insert_first);
 
         for (size_t i = 0, size = filling_row.size(); i < size; ++i)
         {
-            auto current_value = (*old_fill_columns[i])[row_ind];
+            auto current_value = (*input_fill_columns[i])[row_ind];
             const auto & fill_to = filling_row.getFillDescription(i).fill_to;
 
             if (fill_to.isNull() || less(current_value, fill_to, filling_row.getDirection(i)))
@@ -548,9 +548,9 @@ void FillingTransform::transform(Chunk & chunk)
             insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block);
         }
 
-        copyRowFromColumns(res_fill_columns, old_fill_columns, row_ind);
-        copyRowFromColumns(res_interpolate_columns, old_interpolate_columns, row_ind);
-        copyRowFromColumns(res_other_columns, old_other_columns, row_ind);
+        copyRowFromColumns(res_fill_columns, input_fill_columns, row_ind);
+        copyRowFromColumns(res_interpolate_columns, input_interpolate_columns, row_ind);
+        copyRowFromColumns(res_other_columns, input_other_columns, row_ind);
     }
 
     saveLastRow(result_columns);

From f504cd5fed2b116f8fde13b36c748b7f42a5a4f9 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 5 Apr 2023 12:34:20 +0200
Subject: [PATCH 125/277] avoid race, do not set thread_group.counters.parent
 from query

---
 src/Interpreters/ThreadStatusExt.cpp               | 9 ---------
 src/Storages/MergeTree/MergePlainMergeTreeTask.cpp | 5 +++--
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index c9ad81c7fa3..e50c3d7803c 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -75,18 +75,9 @@ ThreadGroupStatusPtr ThreadGroupStatus::createForQuery(ContextPtr query_context_
 
 ThreadGroupStatusPtr ThreadGroupStatus::createForBackgroundProcess(ContextPtr storage_context)
 {
-    /// Only for the case optimize query
-    /// Push the counters to the upper process level counters
-    auto * p_counters = CurrentThread::get().current_performance_counters;
-    while (p_counters && p_counters->level != VariableContext::Process)
-        p_counters = p_counters->getParent();
-
     auto group = std::make_shared<ThreadGroupStatus>(storage_context);
-    if (p_counters)
-        group->performance_counters.setParent(p_counters);
 
     group->memory_tracker.setDescription("background process to apply mutate/merge in table");
-
     /// However settings from storage context have to be applied
     const Settings & settings = storage_context->getSettingsRef();
     group->memory_tracker.setProfilerStep(settings.memory_profiler_step);
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
index b41fcaf4181..64065c7cfa1 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@@ -28,7 +28,8 @@ void MergePlainMergeTreeTask::onCompleted()
 
 bool MergePlainMergeTreeTask::executeStep()
 {
-    /// Metrics will be saved in the thread_group.
+    /// All metrics will be saved in the thread_group, including all scheduled tasks.
+    /// In profile_counters only metrics from this thread will be saved.
     ProfileEventsScope profile_events_scope(&profile_counters);
 
     /// Make out memory tracker a parent of current thread memory tracker
@@ -112,7 +113,7 @@ void MergePlainMergeTreeTask::prepare()
             {} /* projection_merge_list_element */,
             table_lock_holder,
             time(nullptr),
-            storage.getContext(),
+            task_context,
             merge_mutate_entry->tagger->reserved_space,
             deduplicate,
             deduplicate_by_columns,

From a59b53775c0048f012e4d55042e2070b85858819 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 5 Apr 2023 10:43:55 +0000
Subject: [PATCH 126/277] Fix style

---
 src/Functions/parseDateTime.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index b929bc45878..01239074852 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -1040,7 +1040,8 @@ namespace
                 checkSpace(cur, end, 6, "mysqlMicrosecond requires size >= 6", fragment);
 
                 Pos start = cur;
-                auto check_is_number = [&](Pos pos) {
+                auto check_is_number = [&](Pos pos)
+                {
                     if (*pos < '0' || *pos > '9')
                         throw Exception(
                             ErrorCodes::CANNOT_PARSE_DATETIME,

From 1be81db88544283bdf0e1d864f370bd4a3be68cc Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 5 Apr 2023 11:23:09 +0000
Subject: [PATCH 127/277] Fix build, pt. II

---
 src/Processors/Transforms/WindowTransform.cpp   |  2 +-
 .../Distributed/DistributedAsyncInsertBatch.cpp | 17 +++++------------
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index c2293c3097d..04a1f12f30a 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -1981,7 +1981,7 @@ struct WindowFunctionNtile final : public WindowFunction
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one parameter", name_);
         }
         auto type_id = argument_types[0]->getTypeId();
-        if (type_id != TypeIndex::UInt8 && type_id != TypeIndex::UInt16 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::UInt64)
+        if (type_id != TypeIndex::UInt8 && type_id != TypeIndex::UInt16 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::UInt64)
         {
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's argument type must be an unsigned integer (not larger then 64-bit), but got {}", argument_types[0]->getName());
         }
diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
index bf410eed6cc..ba5938e057d 100644
--- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
@@ -8,13 +8,6 @@
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromFile.h>
 
-namespace
-{
-
-namespace fs = std::filesystem;
-
-}
-
 namespace CurrentMetrics
 {
     extern const Metric DistributedSend;
@@ -140,7 +133,7 @@ void DistributedAsyncInsertBatch::send()
     total_bytes = 0;
     recovered = false;
 
-    fs::resize_file(parent.current_batch_file_path, 0);
+    std::filesystem::resize_file(parent.current_batch_file_path, 0);
 }
 
 void DistributedAsyncInsertBatch::serialize()
@@ -149,7 +142,7 @@ void DistributedAsyncInsertBatch::serialize()
     String tmp_file{parent.current_batch_file_path + ".tmp"};
 
     auto dir_sync_guard = parent.getDirectorySyncGuard(parent.relative_path);
-    if (fs::exists(tmp_file))
+    if (std::filesystem::exists(tmp_file))
         LOG_ERROR(parent.log, "Temporary file {} exists. Unclean shutdown?", backQuote(tmp_file));
 
     {
@@ -161,7 +154,7 @@ void DistributedAsyncInsertBatch::serialize()
             out.sync();
     }
 
-    fs::rename(tmp_file, parent.current_batch_file_path);
+    std::filesystem::rename(tmp_file, parent.current_batch_file_path);
 }
 
 void DistributedAsyncInsertBatch::deserialize()
@@ -174,7 +167,7 @@ void DistributedAsyncInsertBatch::writeText(WriteBuffer & out)
 {
     for (const auto & file : files)
     {
-        UInt64 file_index = parse<UInt64>(fs::path(file).stem());
+        UInt64 file_index = parse<UInt64>(std::filesystem::path(file).stem());
         out << file_index << '\n';
     }
 }
@@ -185,7 +178,7 @@ void DistributedAsyncInsertBatch::readText(ReadBuffer & in)
     {
         UInt64 idx;
         in >> idx >> "\n";
-        files.push_back(fs::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string());
+        files.push_back(std::filesystem::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string());
     }
 
     recovered = true;

From 9d3790a4c8d35841d4aa4245ddf09a2b4b5543b0 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 5 Apr 2023 11:27:38 +0000
Subject: [PATCH 128/277] Fix tests

---
 .../Formats/Impl/ArrowColumnToCHColumn.cpp    | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index 6b59de27ff8..8a968b40210 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -768,31 +768,40 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
         case arrow::Type::MAP:
         {
             DataTypePtr nested_type_hint;
+            DataTypePtr key_type_hint;
             if (type_hint)
             {
                 const auto * map_type_hint = typeid_cast<const DataTypeMap *>(type_hint.get());
                 if (map_type_hint)
+                {
                     nested_type_hint = assert_cast<const DataTypeArray *>(map_type_hint->getNestedType().get())->getNestedType();
+                    key_type_hint = map_type_hint->getKeyType();
+                }
             }
             auto arrow_nested_column = getNestedArrowColumn(arrow_column);
             auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint, true);
             if (skipped)
                 return {};
 
-            if (nested_type_hint && !nested_type_hint->equals(*nested_column.type))
-            {
-                /// Cast to target type, because it can happen that type from nested_column
-                /// cannot be Map key type.
-                nested_column.column = castColumn(nested_column, nested_type_hint);
-                nested_column.type = nested_type_hint;
-            }
-
             auto offsets_column = readOffsetsFromArrowListColumn(arrow_column);
 
             const auto * tuple_column = assert_cast<const ColumnTuple *>(nested_column.column.get());
             const auto * tuple_type = assert_cast<const DataTypeTuple *>(nested_column.type.get());
-            auto map_column = ColumnMap::create(tuple_column->getColumnPtr(0), tuple_column->getColumnPtr(1), offsets_column);
-            auto map_type = std::make_shared<DataTypeMap>(tuple_type->getElements()[0], tuple_type->getElements()[1]);
+            auto key_column = tuple_column->getColumnPtr(0);
+            auto key_type = tuple_type->getElements()[0];
+            auto value_column = tuple_column->getColumnPtr(1);
+            auto value_type = tuple_type->getElements()[1];
+
+            if (key_type_hint && !key_type_hint->equals(*key_type))
+            {
+                /// Cast key column to target type, because it can happen
+                /// that parsed type cannot be ClickHouse Map key type.
+                key_column = castColumn({key_column, key_type, "key"}, key_type_hint);
+                key_type = key_type_hint;
+            }
+
+            auto map_column = ColumnMap::create(std::move(key_column), std::move(value_column), offsets_column);
+            auto map_type = std::make_shared<DataTypeMap>(std::move(key_type), std::move(value_type));
             return {std::move(map_column), std::move(map_type), column_name};
         }
         case arrow::Type::LIST:

From bd318950b3fdc5c097a516d4c3920de6ce77f688 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 5 Apr 2023 13:35:12 +0200
Subject: [PATCH 129/277] Fix special build

---
 src/Formats/ProtobufSerializer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp
index 2017b4c1d7f..c712a1083d9 100644
--- a/src/Formats/ProtobufSerializer.cpp
+++ b/src/Formats/ProtobufSerializer.cpp
@@ -3453,7 +3453,7 @@ namespace
                     const auto & tuple_data_type = assert_cast<const DataTypeTuple &>(*data_type);
                     size_t size_of_tuple = tuple_data_type.getElements().size();
 
-                    if (auto * message_type = field_descriptor.message_type())
+                    if (const auto * message_type = field_descriptor.message_type())
                     {
                         bool have_explicit_names = tuple_data_type.haveExplicitNames();
                         Names element_names;

From 949f7c1839160f73ca19d6f9190c705044d8f23d Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 5 Apr 2023 12:04:32 +0000
Subject: [PATCH 130/277] Fix darwin build

---
 src/Functions/ReplaceStringImpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/ReplaceStringImpl.h b/src/Functions/ReplaceStringImpl.h
index 186348d7d53..cd2dc1d3636 100644
--- a/src/Functions/ReplaceStringImpl.h
+++ b/src/Functions/ReplaceStringImpl.h
@@ -110,7 +110,7 @@ struct ReplaceStringImpl
     requires (sizeof(CharT) == 1)
     static void copyToOutput(
         const CharT * what_start, size_t what_size,
-        ColumnString::Chars & output, size_t & output_offset)
+        ColumnString::Chars & output, ColumnString::Offset & output_offset)
     {
         output.resize(output.size() + what_size);
         memcpy(&output[output_offset], what_start, what_size);

From 0e0e89d5964223a52758756f108f59e89f61d064 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 5 Apr 2023 15:10:09 +0200
Subject: [PATCH 131/277] fix

---
 src/Common/MemoryTracker.cpp                        |  2 +-
 tests/clickhouse-test                               | 11 ++++++++++-
 ..._log_and_exception_messages_formatting.reference |  2 +-
 .../00002_log_and_exception_messages_formatting.sql | 13 +++++++++++--
 .../00463_long_sessions_in_http_interface.sh        |  2 +-
 5 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index 8abc3321988..e33a0736547 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -95,7 +95,7 @@ inline void debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]])
         return;
 
     MemoryTrackerBlockerInThread blocker;
-    LOG_TRACE(&Poco::Logger::get("MemoryTracker"), "Too big allocation ({} bytes) without checking memory limits, "
+    LOG_TEST(&Poco::Logger::get("MemoryTracker"), "Too big allocation ({} bytes) without checking memory limits, "
                                                    "it may lead to OOM. Stack trace: {}", size, StackTrace().toString());
 #else
     return;     /// Avoid trash logging in release builds
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index e0d51638593..1630c961c07 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -2079,7 +2079,16 @@ def reportLogStats(args):
               'File {} doesn''t exist', 'No such attribute ''{}''', 'User name ''{}'' is reserved',
               'Could not find table: {}', 'Detached part "{}" not found', 'Unknown data type family: {}',
               'Unknown input format {}', 'Cannot UPDATE key column {}', 'Substitution {} is not set',
-              'Cannot OPTIMIZE table: {}', 'User name is empty', 'Table name is empty', 'AST is too big. Maximum: {}'
+              'Cannot OPTIMIZE table: {}', 'User name is empty', 'Table name is empty', 'AST is too big. Maximum: {}',
+              'Unsupported cipher mode', 'Unknown explain kind ''{}''', 'Table {} was suddenly removed',
+              'No cache found by path: {}', 'No such column {} in table {}', 'There is no port named {}',
+              'Function {} cannot resize {}', 'Function {} is not parametric', 'Unknown key attribute ''{}''',
+              'Transaction was cancelled', 'Unknown parent id {}', 'Session {} not found', 'Mutation {} was killed',
+              'Table {}.{} doesn''t exist.', 'Table is not initialized yet', '{} is not an identifier',
+              'Column ''{}'' already exists', 'No macro {} in config', 'Invalid origin H3 index: {}',
+              'Invalid session timeout: ''{}''', 'Tuple cannot be empty', 'Database name is empty',
+              'Table {} is not a Dictionary', 'Expected function, got: {}', 'Unknown identifier: ''{}''',
+              'Failed to {} input ''{}''', '{}.{} is not a VIEW', 'Cannot convert NULL to {}', 'Dictionary {} doesn''t exist'
         ) AS known_short_messages
         SELECT count() AS c, message_format_string, substr(any(message), 1, 120)
         FROM system.text_log
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
index 60ac6e30c59..d3991f053f0 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
@@ -2,7 +2,7 @@ runtime messages	0.001
 runtime exceptions	0.05
 messages shorter than 10	0
 messages shorter than 16	2
-exceptions shorter than 30	40
+exceptions shorter than 30	27
 noisy messages	0.3
 noisy Trace messages	0.16
 noisy Debug messages	0.09
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index b9269b66ff7..e1409985e41 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -40,7 +40,16 @@ create temporary table known_short_messages (s String) as select * from (select
 'File {} doesn''t exist', 'No such attribute ''{}''', 'User name ''{}'' is reserved',
 'Could not find table: {}', 'Detached part "{}" not found', 'Unknown data type family: {}',
 'Unknown input format {}', 'Cannot UPDATE key column {}', 'Substitution {} is not set',
-'Cannot OPTIMIZE table: {}', 'User name is empty', 'Table name is empty', 'AST is too big. Maximum: {}'
+'Cannot OPTIMIZE table: {}', 'User name is empty', 'Table name is empty', 'AST is too big. Maximum: {}',
+'Unsupported cipher mode', 'Unknown explain kind ''{}''', 'Table {} was suddenly removed',
+'No cache found by path: {}', 'No such column {} in table {}', 'There is no port named {}',
+'Function {} cannot resize {}', 'Function {} is not parametric', 'Unknown key attribute ''{}''',
+'Transaction was cancelled', 'Unknown parent id {}', 'Session {} not found', 'Mutation {} was killed',
+'Table {}.{} doesn''t exist.', 'Table is not initialized yet', '{} is not an identifier',
+'Column ''{}'' already exists', 'No macro {} in config', 'Invalid origin H3 index: {}',
+'Invalid session timeout: ''{}''', 'Tuple cannot be empty', 'Database name is empty',
+'Table {} is not a Dictionary', 'Expected function, got: {}', 'Unknown identifier: ''{}''',
+'Failed to {} input ''{}''', '{}.{} is not a VIEW', 'Cannot convert NULL to {}', 'Dictionary {} doesn''t exist'
 ] as arr) array join arr;
 
 -- Check that we don't have too many short meaningless message patterns.
@@ -50,7 +59,7 @@ select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_st
 select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 2) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
 
 -- Same as above, but exceptions must be more informative. Feel free to update the threshold or remove this query if really necessary
-select 'exceptions shorter than 30', max2(countDistinctOrDefault(message_format_string), 40) from logs where length(message_format_string) < 30 and message ilike '%DB::Exception%' and message_format_string not in known_short_messages;
+select 'exceptions shorter than 30', max2(countDistinctOrDefault(message_format_string), 27) from logs where length(message_format_string) < 30 and message ilike '%DB::Exception%' and message_format_string not in known_short_messages;
 
 
 -- Avoid too noisy messages: top 1 message frequency must be less than 30%. We should reduce the threshold
diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh
index 35e75c9ec4e..d41d6409315 100755
--- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh
+++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh
@@ -82,6 +82,6 @@ do
     ${CLICKHOUSE_CLIENT} --query "SELECT count() > 0 FROM system.processes WHERE query_id = '${CLICKHOUSE_DATABASE}_9'" | grep -F '1' && break || sleep 1
 done
 
-${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT 1" | grep -c -F 'Session is locked'
+${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT 1" | grep -c -F 'SESSION_IS_LOCKED'
 ${CLICKHOUSE_CLIENT} --multiquery --query "KILL QUERY WHERE query_id = '${CLICKHOUSE_DATABASE}_9' SYNC FORMAT Null";
 wait

From 6f3fa33641706474d40cb2337e617c56f16301e5 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 5 Apr 2023 13:11:38 +0000
Subject: [PATCH 132/277] Add unbound flag to system.filesystem_cache

---
 src/Interpreters/Cache/FileSegment.h                 | 1 +
 src/Storages/System/StorageSystemFilesystemCache.cpp | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h
index d49f73d2aa2..c6d9f90cbee 100644
--- a/src/Interpreters/Cache/FileSegment.h
+++ b/src/Interpreters/Cache/FileSegment.h
@@ -159,6 +159,7 @@ public:
 
     FileSegmentKind getKind() const { return segment_kind; }
     bool isPersistent() const { return segment_kind == FileSegmentKind::Persistent; }
+    bool isUnbound() const { return is_unbound; }
 
     using UniqueId = std::pair<FileCacheKey, size_t>;
     UniqueId getUniqueId() const { return std::pair(key(), offset()); }
diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp
index bec92a60436..e898b63240d 100644
--- a/src/Storages/System/StorageSystemFilesystemCache.cpp
+++ b/src/Storages/System/StorageSystemFilesystemCache.cpp
@@ -26,6 +26,7 @@ NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes()
         {"downloaded_size", std::make_shared<DataTypeUInt64>()},
         {"persistent", std::make_shared<DataTypeNumber<UInt8>>()},
         {"kind", std::make_shared<DataTypeString>()},
+        {"unbound", std::make_shared<DataTypeNumber<UInt8>>()},
     };
 }
 
@@ -62,6 +63,7 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex
             res_columns[8]->insert(file_segment->getDownloadedSize());
             res_columns[9]->insert(file_segment->isPersistent());
             res_columns[10]->insert(toString(file_segment->getKind()));
+            res_columns[11]->insert(file_segment->isUnbound());
         }
     }
 }

From 01ce3baf0f83c4c15beeaa67c1f0bb571734d4a3 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 5 Apr 2023 13:42:13 +0000
Subject: [PATCH 133/277] Rename Context::getTemporaryVolume to
 getGlobalTemporaryVolume

---
 src/Interpreters/Context.cpp         | 2 +-
 src/Interpreters/Context.h           | 2 +-
 src/Interpreters/JoinedTables.cpp    | 2 +-
 src/Interpreters/MergeJoin.cpp       | 2 +-
 src/Interpreters/TableJoin.h         | 2 +-
 src/Planner/PlannerJoinTree.cpp      | 2 +-
 src/Server/HTTPHandler.cpp           | 2 +-
 src/Storages/MergeTree/MergeTask.cpp | 2 +-
 src/Storages/StorageMemory.cpp       | 4 ++--
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 25fd5db2529..6a570df3566 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -748,7 +748,7 @@ Strings Context::getWarnings() const
 }
 
 /// TODO: remove, use `getTempDataOnDisk`
-VolumePtr Context::getTemporaryVolume() const
+VolumePtr Context::getGlobalTemporaryVolume() const
 {
     auto lock = getLock();
     /// Calling this method we just bypass the `temp_data_on_disk` and write to the file on the volume directly.
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index de1f053003e..f41f62fd546 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -476,7 +476,7 @@ public:
     /// A list of warnings about server configuration to place in `system.warnings` table.
     Strings getWarnings() const;
 
-    VolumePtr getTemporaryVolume() const; /// TODO: remove, use `getTempDataOnDisk`
+    VolumePtr getGlobalTemporaryVolume() const; /// TODO: remove, use `getTempDataOnDisk`
 
     TemporaryDataOnDiskScopePtr getTempDataOnDisk() const;
     TemporaryDataOnDiskScopePtr getSharedTempDataOnDisk() const;
diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp
index 80b2fe5302c..37bce592f37 100644
--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@@ -308,7 +308,7 @@ std::shared_ptr<TableJoin> JoinedTables::makeTableJoin(const ASTSelectQuery & se
 
     auto settings = context->getSettingsRef();
     MultiEnum<JoinAlgorithm> join_algorithm = settings.join_algorithm;
-    auto table_join = std::make_shared<TableJoin>(settings, context->getTemporaryVolume());
+    auto table_join = std::make_shared<TableJoin>(settings, context->getGlobalTemporaryVolume());
 
     const ASTTablesInSelectQueryElement * ast_join = select_query.join();
     const auto & table_to_join = ast_join->table_expression->as<ASTTableExpression &>();
diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp
index a5ab6b25d02..07f5ae31ed6 100644
--- a/src/Interpreters/MergeJoin.cpp
+++ b/src/Interpreters/MergeJoin.cpp
@@ -1045,7 +1045,7 @@ std::shared_ptr<Block> MergeJoin::loadRightBlock(size_t pos) const
 
 void MergeJoin::initRightTableWriter()
 {
-    disk_writer = std::make_unique<SortedBlocksWriter>(size_limits, table_join->getTemporaryVolume(),
+    disk_writer = std::make_unique<SortedBlocksWriter>(size_limits, table_join->getGlobalTemporaryVolume(),
                     right_sample_block, right_sort_description, max_rows_in_right_block, max_files_to_merge,
                     table_join->temporaryFilesCodec());
     disk_writer->addBlocks(right_blocks);
diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h
index 84390adc0df..95471885a2a 100644
--- a/src/Interpreters/TableJoin.h
+++ b/src/Interpreters/TableJoin.h
@@ -209,7 +209,7 @@ public:
     JoinStrictness strictness() const { return table_join.strictness; }
     bool sameStrictnessAndKind(JoinStrictness, JoinKind) const;
     const SizeLimits & sizeLimits() const { return size_limits; }
-    VolumePtr getTemporaryVolume() { return tmp_volume; }
+    VolumePtr getGlobalTemporaryVolume() { return tmp_volume; }
 
     bool isEnabledAlgorithm(JoinAlgorithm val) const
     {
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 0479170eba1..ab7086c820f 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -994,7 +994,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
         }
     }
 
-    auto table_join = std::make_shared<TableJoin>(settings, query_context->getTemporaryVolume());
+    auto table_join = std::make_shared<TableJoin>(settings, query_context->getGlobalTemporaryVolume());
     table_join->getTableJoin() = join_node.toASTTableJoin()->as<ASTTableJoin &>();
     table_join->getTableJoin().kind = join_kind;
 
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index bfdc067f733..16718a0a218 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -623,7 +623,7 @@ void HTTPHandler::processQuery(
 
         if (buffer_until_eof)
         {
-            const std::string tmp_path(server.context()->getTemporaryVolume()->getDisk()->getPath());
+            const std::string tmp_path(server.context()->getGlobalTemporaryVolume()->getDisk()->getPath());
             const std::string tmp_path_template(fs::path(tmp_path) / "http_buffers/");
 
             auto create_tmp_disk_buffer = [tmp_path_template] (const WriteBufferPtr &)
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index d1dfa96b87c..d1e062be92a 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -273,7 +273,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
     ctx->compression_codec = global_ctx->data->getCompressionCodecForPart(
         global_ctx->merge_list_element_ptr->total_size_bytes_compressed, global_ctx->new_data_part->ttl_infos, global_ctx->time_of_merge);
 
-    ctx->tmp_disk = global_ctx->context->getTemporaryVolume()->getDisk();
+    ctx->tmp_disk = global_ctx->context->getGlobalTemporaryVolume()->getDisk();
 
     switch (global_ctx->chosen_merge_algorithm)
     {
diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index 11688582877..d2fdde2bc46 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -401,7 +401,7 @@ namespace
 
 void StorageMemory::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & /* partitions */)
 {
-    auto temp_disk = backup_entries_collector.getContext()->getTemporaryVolume()->getDisk(0);
+    auto temp_disk = backup_entries_collector.getContext()->getGlobalTemporaryVolume()->getDisk(0);
     auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size;
     backup_entries_collector.addBackupEntries(
         std::make_shared<MemoryBackup>(getInMemoryMetadataPtr(), data.get(), data_path_in_backup, temp_disk, max_compress_block_size)
@@ -417,7 +417,7 @@ void StorageMemory::restoreDataFromBackup(RestorerFromBackup & restorer, const S
     if (!restorer.isNonEmptyTableAllowed() && total_size_bytes)
         RestorerFromBackup::throwTableIsNotEmpty(getStorageID());
 
-    auto temp_disk = restorer.getContext()->getTemporaryVolume()->getDisk(0);
+    auto temp_disk = restorer.getContext()->getGlobalTemporaryVolume()->getDisk(0);
 
     restorer.addDataRestoreTask(
         [storage = std::static_pointer_cast<StorageMemory>(shared_from_this()), backup, data_path_in_backup, temp_disk]

From d9abcf2f6954b27b5f08f73ac90a0257d78f875b Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 5 Apr 2023 13:42:49 +0000
Subject: [PATCH 134/277] Reduce scope storage_policies_mutex in
 setTemporaryStoragePolicy

---
 src/Interpreters/Context.cpp | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 6a570df3566..5838747f168 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -863,17 +863,18 @@ void Context::setTemporaryStoragePath(const String & path, size_t max_size)
 
 void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_size)
 {
-    auto lock = getLock();
+    StoragePolicyPtr tmp_policy;
+    {
+        /// lock in required only for accessing `shared->merge_tree_storage_policy_selector`
+        /// StoragePolicy itself is immutable.
+        std::lock_guard storage_policies_lock(shared->storage_policies_mutex);
+        tmp_policy = getStoragePolicySelector(storage_policies_lock)->get(policy_name);
+    }
 
-    if (shared->root_temp_data_on_disk)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary storage is already set");
-
-    std::lock_guard storage_policies_lock(shared->storage_policies_mutex);
-
-    StoragePolicyPtr tmp_policy = getStoragePolicySelector(storage_policies_lock)->get(policy_name);
     if (tmp_policy->getVolumes().size() != 1)
-            throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+        throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
             "Policy '{}' is used temporary files, such policy should have exactly one volume", policy_name);
+
     VolumePtr volume = tmp_policy->getVolume(0);
 
     if (volume->getDisks().empty())
@@ -898,6 +899,11 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s
         setupTmpPath(shared->log, disk->getPath());
     }
 
+    auto lock = getLock();
+
+    if (shared->root_temp_data_on_disk)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary storage is already set");
+
     shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, max_size);
 }
 

From d65d00f496dac7c1700b44a2cba276299b1c298f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 5 Apr 2023 16:15:24 +0200
Subject: [PATCH 135/277] Don't check dependencies when renaming system tables
 automatically

---
 src/Interpreters/SystemLog.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index 78513920236..c4bcc951500 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -503,6 +503,7 @@ void SystemLog<LogElement>::prepareTable()
             rename->elements.emplace_back(std::move(elem));
 
             auto query_context = Context::createCopy(context);
+            query_context->setSetting("check_referential_table_dependencies", Field{false});
             query_context->makeQueryContext();
             InterpreterRenameQuery(rename, query_context).execute();
 

From 4876cac95838e7c954f156f55cae4de00abdb765 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Wed, 5 Apr 2023 10:49:03 -0400
Subject: [PATCH 136/277] add docs for UNDROP TABLE

---
 docs/en/sql-reference/statements/drop.md   |  4 +
 docs/en/sql-reference/statements/undrop.md | 97 ++++++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 docs/en/sql-reference/statements/undrop.md

diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md
index 8a83a8fae1d..b6208c2fd52 100644
--- a/docs/en/sql-reference/statements/drop.md
+++ b/docs/en/sql-reference/statements/drop.md
@@ -22,6 +22,10 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC]
 
 Deletes the table.
 
+:::tip
+Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
+:::
+
 Syntax:
 
 ``` sql
diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md
new file mode 100644
index 00000000000..61861a9e32f
--- /dev/null
+++ b/docs/en/sql-reference/statements/undrop.md
@@ -0,0 +1,97 @@
+---
+slug: /en/sql-reference/statements/undrop
+sidebar_label: UNDROP
+---
+
+# UNDROP TABLE
+
+Cancels the dropping of the table.
+
+Beginning with ClickHouse version 23.3 it is possible to UNDROP a table 
+thin 8 minutes of issuing the DROP TABLE statement.  Dropped tables are listed in 
+a system table called `system.dropped_tables`.
+
+:::note
+UNDROP TABLE is experimental.  To use it add this setting: 
+```sql
+set allow_experimental_undrop_table_query = 1;
+```
+:::
+
+:::tip
+Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md)
+:::
+
+Syntax:
+
+``` sql
+UNDROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC]
+```
+
+**Example**
+
+``` sql
+set allow_experimental_undrop_table_query = 1;
+```
+
+```sql
+CREATE TABLE undropMe
+(
+    `id` UInt8
+)
+ENGINE = MergeTree
+ORDER BY id
+```
+
+```sql
+DROP TABLE undropMe
+```
+```sql
+SELECT *
+FROM system.dropped_tables
+FORMAT Vertical
+```
+```response
+Row 1:
+──────
+index:                 0
+database:              default
+table:                 undropMe
+uuid:                  aa696a1a-1d70-4e60-a841-4c80827706cc
+engine:                MergeTree
+metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
+table_dropped_time:    2023-04-05 14:12:12
+
+1 row in set. Elapsed: 0.001 sec. 
+```
+```sql
+UNDROP TABLE undropMe
+```
+```response
+Ok.
+```
+```sql
+SELECT *
+FROM system.dropped_tables
+FORMAT Vertical
+```
+```response
+Ok.
+
+0 rows in set. Elapsed: 0.001 sec. 
+```
+```sql
+DESCRIBE TABLE undropMe
+FORMAT Vertical
+```
+```response
+Row 1:
+──────
+name:               id
+type:               UInt8
+default_type:       
+default_expression: 
+comment:            
+codec_expression:   
+ttl_expression:     
+```

From 1ea49371e64d05b8bcbeafbe77a4b1b85989e736 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Wed, 5 Apr 2023 10:53:36 -0400
Subject: [PATCH 137/277] lost a word

---
 docs/en/sql-reference/statements/undrop.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md
index 61861a9e32f..e56829e7151 100644
--- a/docs/en/sql-reference/statements/undrop.md
+++ b/docs/en/sql-reference/statements/undrop.md
@@ -8,7 +8,7 @@ sidebar_label: UNDROP
 Cancels the dropping of the table.
 
 Beginning with ClickHouse version 23.3 it is possible to UNDROP a table 
-thin 8 minutes of issuing the DROP TABLE statement.  Dropped tables are listed in 
+within 8 minutes of issuing the DROP TABLE statement.  Dropped tables are listed in 
 a system table called `system.dropped_tables`.
 
 :::note

From 62244ee57db4703b65b99d01b5be5d4f1340d839 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 5 Apr 2023 17:14:41 +0200
Subject: [PATCH 138/277] Fix test

---
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 7 ++++---
 src/Storages/RabbitMQ/StorageRabbitMQ.h   | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 399cf86e579..0249e3203c6 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -799,7 +799,8 @@ void StorageRabbitMQ::startup()
         try
         {
             auto consumer = createConsumer();
-            pushConsumer(std::move(consumer));
+            consumers_ref.push_back(consumer);
+            pushConsumer(consumer);
             ++num_created_consumers;
         }
         catch (...)
@@ -818,8 +819,8 @@ void StorageRabbitMQ::shutdown()
 {
     shutdown_called = true;
 
-    for (auto & consumer : consumers)
-        consumer->shutdown();
+    for (auto & consumer : consumers_ref)
+        consumer.lock()->shutdown();
 
     LOG_TRACE(log, "Deactivating background tasks");
 
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h
index 19ec5cc206e..c6cb340619c 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.h
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h
@@ -112,6 +112,7 @@ private:
     Poco::Semaphore semaphore;
     std::mutex consumers_mutex;
     std::vector<RabbitMQConsumerPtr> consumers; /// available RabbitMQ consumers
+    std::vector<std::weak_ptr<RabbitMQConsumer>> consumers_ref;
 
     String unique_strbase; /// to make unique consumer channel id
 

From 15222fe1965e4990b17f0353cfa5152ed2f943dc Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Wed, 5 Apr 2023 18:17:02 +0300
Subject: [PATCH 139/277] Update 02703_jit_external_aggregation.sh

---
 tests/queries/0_stateless/02703_jit_external_aggregation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02703_jit_external_aggregation.sh b/tests/queries/0_stateless/02703_jit_external_aggregation.sh
index 2e56177a339..d1af5b8b8bc 100755
--- a/tests/queries/0_stateless/02703_jit_external_aggregation.sh
+++ b/tests/queries/0_stateless/02703_jit_external_aggregation.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long
+# Tags: long, no-asan, no-msan, no-tsan, no-ubsan
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 3f44c6d936ff22240e519ac510e684aeb48c2539 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <nikitamikhaylov@clickhouse.com>
Date: Wed, 5 Apr 2023 15:19:45 +0000
Subject: [PATCH 140/277] Better

---
 src/Interpreters/InterpreterSelectQuery.cpp        | 14 ++++++++++++++
 src/Processors/QueryPlan/ReadFromMergeTree.cpp     |  3 ++-
 src/Storages/MergeTree/MergeTreeData.cpp           |  3 +--
 ...08_parallel_replicas_not_found_column.reference |  1 +
 .../02708_parallel_replicas_not_found_column.sql   |  3 +++
 ...parallel_replicas_with_final_modifier.reference |  1 +
 ...02709_parallel_replicas_with_final_modifier.sql |  5 +++++
 7 files changed, 27 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02708_parallel_replicas_not_found_column.reference
 create mode 100644 tests/queries/0_stateless/02708_parallel_replicas_not_found_column.sql
 create mode 100644 tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.reference
 create mode 100644 tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.sql

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index e471f279a14..a24d9312e46 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -462,6 +462,20 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         context->setSetting("parallel_replicas_custom_key", String{""});
     }
 
+    /// Try to execute query without parallel replicas if we find that there is a FINAL modifier there.
+    bool is_query_with_final = false;
+    if (query_info.table_expression_modifiers)
+        is_query_with_final = query_info.table_expression_modifiers->hasFinal();
+    else if (query_info.query)
+        is_query_with_final = query_info.query->as<ASTSelectQuery &>().final();
+
+    if (is_query_with_final && (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas))
+    {
+        LOG_WARNING(log, "FINAL modifier is supported with parallel replicas. Will try to execute the query without using them.");
+        context->setSetting("allow_experimental_parallel_reading_from_replicas", false);
+        context->setSetting("parallel_replicas_custom_key", String{""});
+    }
+
     /// Rewrite JOINs
     if (!has_input && joined_tables.tablesCount() > 1)
     {
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 291499ff412..185ec9bace8 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -99,6 +99,7 @@ namespace ErrorCodes
     extern const int INDEX_NOT_USED;
     extern const int LOGICAL_ERROR;
     extern const int TOO_MANY_ROWS;
+    extern const int SUPPORT_IS_DISABLED;
 }
 
 static MergeTreeReaderSettings getMergeTreeReaderSettings(
@@ -1539,7 +1540,7 @@ Pipe ReadFromMergeTree::spreadMarkRanges(
     if (final)
     {
         if (is_parallel_reading_from_replicas)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Final modifier is not supported with parallel replicas");
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "FINAL modifier is not supported with parallel replicas");
 
         if (output_each_partition_through_separate_port)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Optimisation isn't supposed to be used for queries with final");
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 66c52e6e24c..3fcf8de9d6d 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -6939,8 +6939,7 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage(
     if (query_context->getClientInfo().collaborate_with_initiator)
         return QueryProcessingStage::Enum::FetchColumns;
 
-    if (query_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas
-        && !query_context->getClientInfo().collaborate_with_initiator
+    if (query_context->canUseParallelReplicasOnInitiator()
         && to_stage >= QueryProcessingStage::WithMergeableState)
         return QueryProcessingStage::Enum::WithMergeableState;
 
diff --git a/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.reference b/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.reference
new file mode 100644
index 00000000000..573541ac970
--- /dev/null
+++ b/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.reference
@@ -0,0 +1 @@
+0
diff --git a/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.sql b/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.sql
new file mode 100644
index 00000000000..8900025502c
--- /dev/null
+++ b/tests/queries/0_stateless/02708_parallel_replicas_not_found_column.sql
@@ -0,0 +1,3 @@
+CREATE TABLE IF NOT EXISTS t_02708(x DateTime) ENGINE = MergeTree ORDER BY tuple();
+SELECT count() FROM t_02708 SETTINGS allow_experimental_parallel_reading_from_replicas=1;
+DROP TABLE t_02708;
diff --git a/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.reference b/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.reference
new file mode 100644
index 00000000000..f347e8b5857
--- /dev/null
+++ b/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.reference
@@ -0,0 +1 @@
+1	1	2020-01-01 00:00:00
diff --git a/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.sql b/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.sql
new file mode 100644
index 00000000000..1f5daaae189
--- /dev/null
+++ b/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.sql
@@ -0,0 +1,5 @@
+DROP TABLE IF EXISTS t_02709;
+CREATE TABLE t_02709 (key UInt32, sign Int8, date Datetime) ENGINE=CollapsingMergeTree(sign) PARTITION BY date ORDER BY key;
+INSERT INTO t_02709 VALUES (1, 1, '2020-01-01'), (2, 1, '2020-01-02'), (1, -1, '2020-01-01'), (2, -1, '2020-01-02'), (1, 1, '2020-01-01');
+SELECT * FROM t_02709 FINAL ORDER BY key SETTINGS max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas';
+DROP TABLE t_02709;

From 7975df538e74e9487645ce7f4e9dab072cdf2151 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 5 Apr 2023 15:28:35 +0000
Subject: [PATCH 141/277] Don't replicate KeeperMap DELETE and TRUNCATE

---
 src/Databases/DatabaseReplicated.cpp | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index efac04d9e15..5f5cd2667cb 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -30,6 +30,7 @@
 #include <Parsers/ASTAlterQuery.h>
 #include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTDeleteQuery.h>
 #include <Parsers/formatAST.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/ParserCreateQuery.h>
@@ -1388,25 +1389,31 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context,
     if (query_context->getClientInfo().is_replicated_database_internal)
         return false;
 
-    /// Some ALTERs are not replicated on database level
-    if (const auto * alter = query_ptr->as<const ASTAlterQuery>())
+    /// we never replicate KeeperMap operations for some types of queries because it doesn't make sense
+    const auto is_keeper_map_table = [&](const ASTPtr & ast)
     {
-        auto table_id = query_context->resolveStorageID(*alter, Context::ResolveOrdinary);
+        auto table_id = query_context->resolveStorageID(ast, Context::ResolveOrdinary);
         StoragePtr table = DatabaseCatalog::instance().getTable(table_id, query_context);
 
-        /// we never replicate KeeperMap operations because it doesn't make sense
-        if (auto * keeper_map = table->as<StorageKeeperMap>())
-            return false;
+        return table->as<StorageKeeperMap>() != nullptr;
+    };
 
-        return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter();
-    }
+    /// Some ALTERs are not replicated on database level
+    if (const auto * alter = query_ptr->as<const ASTAlterQuery>())
+        return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter() && !is_keeper_map_table(query_ptr);
 
     /// DROP DATABASE is not replicated
     if (const auto * drop = query_ptr->as<const ASTDropQuery>())
     {
-        return drop->table.get();
+        if (drop->table.get())
+            return drop->kind != ASTDropQuery::Truncate || !is_keeper_map_table(query_ptr);
+
+        return false;
     }
 
+    if (query_ptr->as<const ASTDeleteQuery>() != nullptr)
+        return !is_keeper_map_table(query_ptr);
+
     return true;
 }
 

From 5d9e006c46bd92e398517d6e84232a2e5f835610 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 5 Apr 2023 18:04:03 +0200
Subject: [PATCH 142/277] PR comments

---
 src/Interpreters/SystemLog.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp
index c4bcc951500..fca7eccf93a 100644
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@@ -503,6 +503,8 @@ void SystemLog<LogElement>::prepareTable()
             rename->elements.emplace_back(std::move(elem));
 
             auto query_context = Context::createCopy(context);
+            /// As this operation is performed automatically we don't want it to fail because of user dependencies on log tables
+            query_context->setSetting("check_table_dependencies", Field{false});
             query_context->setSetting("check_referential_table_dependencies", Field{false});
             query_context->makeQueryContext();
             InterpreterRenameQuery(rename, query_context).execute();

From 0260b84bc359b6f40d7cc453364998d88c1bbece Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 5 Apr 2023 16:07:59 +0000
Subject: [PATCH 143/277] Update only affected rows in KV storage

---
 src/Interpreters/MutationsInterpreter.cpp     | 19 ++++----
 src/Interpreters/MutationsInterpreter.h       | 10 ++--
 .../RocksDB/StorageEmbeddedRocksDB.cpp        | 10 +++-
 src/Storages/StorageKeeperMap.cpp             | 10 +++-
 .../02577_keepermap_delete_update.reference   | 46 +++++++++----------
 .../02577_keepermap_delete_update.sql         | 40 ++++++++--------
 ..._keeper_map_delete_update_strict.reference | 46 +++++++++----------
 .../02707_keeper_map_delete_update_strict.sql | 14 +++---
 8 files changed, 105 insertions(+), 90 deletions(-)

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index d1fcc006ffb..b8b0105cfa8 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -373,11 +373,11 @@ MutationsInterpreter::MutationsInterpreter(
     ContextPtr context_,
     bool can_execute_,
     bool return_all_columns_,
-    bool return_deleted_rows_)
+    bool return_mutated_rows_)
     : MutationsInterpreter(
         Source(std::move(storage_)),
         metadata_snapshot_, std::move(commands_), std::move(context_),
-        can_execute_, return_all_columns_, return_deleted_rows_)
+        can_execute_, return_all_columns_, return_mutated_rows_)
 {
     if (can_execute_ && dynamic_cast<const MergeTreeData *>(source.getStorage().get()))
     {
@@ -396,11 +396,11 @@ MutationsInterpreter::MutationsInterpreter(
     ContextPtr context_,
     bool can_execute_,
     bool return_all_columns_,
-    bool return_deleted_rows_)
+    bool return_mutated_rows_)
     : MutationsInterpreter(
         Source(storage_, std::move(source_part_)),
         metadata_snapshot_, std::move(commands_), std::move(context_),
-        can_execute_, return_all_columns_, return_deleted_rows_)
+        can_execute_, return_all_columns_, return_mutated_rows_)
 {
 }
 
@@ -411,7 +411,7 @@ MutationsInterpreter::MutationsInterpreter(
     ContextPtr context_,
     bool can_execute_,
     bool return_all_columns_,
-    bool return_deleted_rows_)
+    bool return_mutated_rows_)
     : source(std::move(source_))
     , metadata_snapshot(metadata_snapshot_)
     , commands(std::move(commands_))
@@ -419,7 +419,7 @@ MutationsInterpreter::MutationsInterpreter(
     , can_execute(can_execute_)
     , select_limits(SelectQueryOptions().analyze(!can_execute).ignoreLimits().ignoreProjections())
     , return_all_columns(return_all_columns_)
-    , return_deleted_rows(return_deleted_rows_)
+    , return_mutated_rows(return_mutated_rows_)
 {
     prepare(!can_execute);
 }
@@ -600,7 +600,7 @@ void MutationsInterpreter::prepare(bool dry_run)
     for (auto & command : commands)
     {
         // we can return deleted rows only if it's the only present command
-        assert(command.type == MutationCommand::DELETE || !return_deleted_rows);
+        assert(command.type == MutationCommand::DELETE || command.type == MutationCommand::UPDATE || !return_mutated_rows);
 
         if (command.type == MutationCommand::DELETE)
         {
@@ -610,7 +610,7 @@ void MutationsInterpreter::prepare(bool dry_run)
 
             auto predicate  = getPartitionAndPredicateExpressionForMutationCommand(command);
 
-            if (!return_deleted_rows)
+            if (!return_mutated_rows)
                 predicate = makeASTFunction("isZeroOrNull", predicate);
 
             stages.back().filters.push_back(predicate);
@@ -697,6 +697,9 @@ void MutationsInterpreter::prepare(bool dry_run)
                     type_literal);
 
                 stages.back().column_to_updated.emplace(column, updated_column);
+
+                if (condition && return_mutated_rows)
+                    stages.back().filters.push_back(condition);
             }
 
             if (!affected_materialized.empty())
diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h
index e2d9c5938d6..4b965546408 100644
--- a/src/Interpreters/MutationsInterpreter.h
+++ b/src/Interpreters/MutationsInterpreter.h
@@ -48,7 +48,7 @@ public:
         ContextPtr context_,
         bool can_execute_,
         bool return_all_columns_ = false,
-        bool return_deleted_rows_ = false);
+        bool return_mutated_rows_ = false);
 
     /// Special case for MergeTree
     MutationsInterpreter(
@@ -59,7 +59,7 @@ public:
         ContextPtr context_,
         bool can_execute_,
         bool return_all_columns_ = false,
-        bool return_deleted_rows_ = false);
+        bool return_mutated_rows_ = false);
 
     void validate();
     size_t evaluateCommandsSize();
@@ -136,7 +136,7 @@ private:
         ContextPtr context_,
         bool can_execute_,
         bool return_all_columns_,
-        bool return_deleted_rows_);
+        bool return_mutated_rows_);
 
     void prepare(bool dry_run);
 
@@ -210,8 +210,8 @@ private:
     // whether all columns should be returned, not just updated
     bool return_all_columns;
 
-    // whether we should return deleted or nondeleted rows on DELETE mutation
-    bool return_deleted_rows;
+    // whether we should return mutated or all existing rows
+    bool return_mutated_rows;
 };
 
 }
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 9ff0c152399..3af4a62c20d 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -237,7 +237,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt
             context_,
             /*can_execute_*/ true,
             /*return_all_columns_*/ true,
-            /*return_deleted_rows_*/ true);
+            /*return_mutated_rows*/ true);
         auto pipeline = QueryPipelineBuilder::getPipeline(interpreter->execute());
         PullingPipelineExecutor executor(pipeline);
 
@@ -279,7 +279,13 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key cannot be updated");
 
     auto interpreter = std::make_unique<MutationsInterpreter>(
-        storage_ptr, metadata_snapshot, commands, context_, /*can_execute_*/ true, /*return_all_columns*/ true);
+        storage_ptr,
+        metadata_snapshot,
+        commands,
+        context_,
+        /*can_execute_*/ true,
+        /*return_all_columns*/ true,
+        /*return_mutated_rows*/ true);
     auto pipeline = QueryPipelineBuilder::getPipeline(interpreter->execute());
     PullingPipelineExecutor executor(pipeline);
 
diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp
index aeb206f1e05..22a92840313 100644
--- a/src/Storages/StorageKeeperMap.cpp
+++ b/src/Storages/StorageKeeperMap.cpp
@@ -864,7 +864,7 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca
             local_context,
             /*can_execute_*/ true,
             /*return_all_columns_*/ true,
-            /*return_deleted_rows_*/ true);
+            /*return_mutated_rows*/ true);
         auto pipeline = QueryPipelineBuilder::getPipeline(interpreter->execute());
         PullingPipelineExecutor executor(pipeline);
 
@@ -927,7 +927,13 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key cannot be updated");
 
     auto interpreter = std::make_unique<MutationsInterpreter>(
-        storage_ptr, metadata_snapshot, commands, local_context, /*can_execute_*/ true, /*return_all_columns*/ true);
+        storage_ptr,
+        metadata_snapshot,
+        commands,
+        local_context,
+        /*can_execute_*/ true,
+        /*return_all_columns*/ true,
+        /*return_mutated_rows*/ true);
     auto pipeline = QueryPipelineBuilder::getPipeline(interpreter->execute());
     PullingPipelineExecutor executor(pipeline);
 
diff --git a/tests/queries/0_stateless/02577_keepermap_delete_update.reference b/tests/queries/0_stateless/02577_keepermap_delete_update.reference
index 8ca8c0ca5a2..4a71f86a258 100644
--- a/tests/queries/0_stateless/02577_keepermap_delete_update.reference
+++ b/tests/queries/0_stateless/02577_keepermap_delete_update.reference
@@ -1,32 +1,32 @@
-1	Some string	0
-2	Some other string	0
-3	random	0
-4	random2	0
+1	Some string	0	0
+2	Some other string	0	0
+3	random	0	0
+4	random2	0	0
 -----------
-3	random	0
-4	random2	0
+3	random	0	0
+4	random2	0	0
 -----------
-3	random	0
+3	random	0	0
 -----------
 0
 -----------
-1	String	10
-2	String	20
-3	String	30
-4	String	40
+1	String	10	0
+2	String	20	0
+3	String	30	0
+4	String	40	0
 -----------
-1	String	10
-2	String	20
-3	Another	30
-4	Another	40
+1	String	10	0
+2	String	20	0
+3	Another	30	1
+4	Another	40	1
 -----------
-1	String	10
-2	String	20
-3	Another	30
-4	Another	40
+1	String	10	0
+2	String	20	0
+3	Another	30	1
+4	Another	40	1
 -----------
-1	String	102
-2	String	202
-3	Another	302
-4	Another	402
+1	String	102	1
+2	String	202	1
+3	Another	302	2
+4	Another	402	2
 -----------
diff --git a/tests/queries/0_stateless/02577_keepermap_delete_update.sql b/tests/queries/0_stateless/02577_keepermap_delete_update.sql
index 942dd28cd46..ae80e6ead29 100644
--- a/tests/queries/0_stateless/02577_keepermap_delete_update.sql
+++ b/tests/queries/0_stateless/02577_keepermap_delete_update.sql
@@ -1,44 +1,44 @@
 -- Tags: no-ordinary-database, no-fasttest
 
-DROP TABLE IF EXISTS 02661_keepermap_delete_update;
+DROP TABLE IF EXISTS 02577_keepermap_delete_update;
 
-CREATE TABLE 02661_keepermap_delete_update (key UInt64, value String, value2 UInt64) ENGINE=KeeperMap('/' ||  currentDatabase() || '/test02661_keepermap_delete_update') PRIMARY KEY(key);
+CREATE TABLE 02577_keepermap_delete_update (key UInt64, value String, value2 UInt64) ENGINE=KeeperMap('/' ||  currentDatabase() || '/test02577_keepermap_delete_update') PRIMARY KEY(key);
 
-INSERT INTO 02661_keepermap_delete_update VALUES (1, 'Some string', 0), (2, 'Some other string', 0), (3, 'random', 0), (4, 'random2', 0);
+INSERT INTO 02577_keepermap_delete_update VALUES (1, 'Some string', 0), (2, 'Some other string', 0), (3, 'random', 0), (4, 'random2', 0);
 
-SELECT * FROM 02661_keepermap_delete_update ORDER BY key;
+SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
-DELETE FROM 02661_keepermap_delete_update WHERE value LIKE 'Some%string';
+DELETE FROM 02577_keepermap_delete_update WHERE value LIKE 'Some%string';
 
-SELECT * FROM 02661_keepermap_delete_update ORDER BY key;
+SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
-ALTER TABLE 02661_keepermap_delete_update DELETE WHERE key >= 4;
+ALTER TABLE 02577_keepermap_delete_update DELETE WHERE key >= 4;
 
-SELECT * FROM 02661_keepermap_delete_update ORDER BY key;
+SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
-DELETE FROM 02661_keepermap_delete_update WHERE 1 = 1;
-SELECT count() FROM 02661_keepermap_delete_update;
+DELETE FROM 02577_keepermap_delete_update WHERE 1 = 1;
+SELECT count() FROM 02577_keepermap_delete_update;
 SELECT '-----------';
 
-INSERT INTO 02661_keepermap_delete_update VALUES (1, 'String', 10), (2, 'String', 20), (3, 'String', 30), (4, 'String', 40);
-SELECT * FROM 02661_keepermap_delete_update ORDER BY key;
+INSERT INTO 02577_keepermap_delete_update VALUES (1, 'String', 10), (2, 'String', 20), (3, 'String', 30), (4, 'String', 40);
+SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
-ALTER TABLE 02661_keepermap_delete_update UPDATE value = 'Another' WHERE key > 2;
-SELECT * FROM 02661_keepermap_delete_update ORDER BY key;
+ALTER TABLE 02577_keepermap_delete_update UPDATE value = 'Another' WHERE key > 2;
+SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
-ALTER TABLE 02661_keepermap_delete_update UPDATE key = key * 10 WHERE 1 = 1; -- { serverError BAD_ARGUMENTS }
-SELECT * FROM 02661_keepermap_delete_update ORDER BY key;
+ALTER TABLE 02577_keepermap_delete_update UPDATE key = key * 10 WHERE 1 = 1; -- { serverError BAD_ARGUMENTS }
+SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
-ALTER TABLE 02661_keepermap_delete_update UPDATE value2 = value2 * 10 + 2 WHERE value2 < 100;
-SELECT * FROM 02661_keepermap_delete_update ORDER BY key;
+ALTER TABLE 02577_keepermap_delete_update UPDATE value2 = value2 * 10 + 2 WHERE value2 < 100;
+SELECT *, _version FROM 02577_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
-ALTER TABLE 02661_keepermap_delete_update ON CLUSTER test_shard_localhost UPDATE value2 = value2 * 10 + 2 WHERE value2 < 100; -- { serverError BAD_ARGUMENTS }
+ALTER TABLE 02577_keepermap_delete_update ON CLUSTER test_shard_localhost UPDATE value2 = value2 * 10 + 2 WHERE value2 < 100; -- { serverError BAD_ARGUMENTS }
 
-DROP TABLE IF EXISTS 02661_keepermap_delete_update;
+DROP TABLE IF EXISTS 02577_keepermap_delete_update;
diff --git a/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.reference b/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.reference
index 8ca8c0ca5a2..7ae6daf4b8d 100644
--- a/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.reference
+++ b/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.reference
@@ -1,32 +1,32 @@
-1	Some string	0
-2	Some other string	0
-3	random	0
-4	random2	0
+1	Some string	0	0	0
+2	Some other string	0	0	0
+3	random	0	0	0
+4	random2	0	0	0
 -----------
-3	random	0
-4	random2	0
+3	random	0	0
+4	random2	0	0
 -----------
-3	random	0
+3	random	0	0
 -----------
 0
 -----------
-1	String	10
-2	String	20
-3	String	30
-4	String	40
+1	String	10	0
+2	String	20	0
+3	String	30	0
+4	String	40	0
 -----------
-1	String	10
-2	String	20
-3	Another	30
-4	Another	40
+1	String	10	0
+2	String	20	0
+3	Another	30	1
+4	Another	40	1
 -----------
-1	String	10
-2	String	20
-3	Another	30
-4	Another	40
+1	String	10	0
+2	String	20	0
+3	Another	30	1
+4	Another	40	1
 -----------
-1	String	102
-2	String	202
-3	Another	302
-4	Another	402
+1	String	102	1
+2	String	202	1
+3	Another	302	2
+4	Another	402	2
 -----------
diff --git a/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.sql b/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.sql
index aaf4f2fd838..cf59af2f388 100644
--- a/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.sql
+++ b/tests/queries/0_stateless/02707_keeper_map_delete_update_strict.sql
@@ -8,17 +8,17 @@ CREATE TABLE 02707_keepermap_delete_update (key UInt64, value String, value2 UIn
 
 INSERT INTO 02707_keepermap_delete_update VALUES (1, 'Some string', 0), (2, 'Some other string', 0), (3, 'random', 0), (4, 'random2', 0);
 
-SELECT * FROM 02707_keepermap_delete_update ORDER BY key;
+SELECT *, _version, _version FROM 02707_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
 DELETE FROM 02707_keepermap_delete_update WHERE value LIKE 'Some%string';
 
-SELECT * FROM 02707_keepermap_delete_update ORDER BY key;
+SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
 ALTER TABLE 02707_keepermap_delete_update DELETE WHERE key >= 4;
 
-SELECT * FROM 02707_keepermap_delete_update ORDER BY key;
+SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
 DELETE FROM 02707_keepermap_delete_update WHERE 1 = 1;
@@ -26,19 +26,19 @@ SELECT count() FROM 02707_keepermap_delete_update;
 SELECT '-----------';
 
 INSERT INTO 02707_keepermap_delete_update VALUES (1, 'String', 10), (2, 'String', 20), (3, 'String', 30), (4, 'String', 40);
-SELECT * FROM 02707_keepermap_delete_update ORDER BY key;
+SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
 ALTER TABLE 02707_keepermap_delete_update UPDATE value = 'Another' WHERE key > 2;
-SELECT * FROM 02707_keepermap_delete_update ORDER BY key;
+SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
 ALTER TABLE 02707_keepermap_delete_update UPDATE key = key * 10 WHERE 1 = 1; -- { serverError 36 }
-SELECT * FROM 02707_keepermap_delete_update ORDER BY key;
+SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
 ALTER TABLE 02707_keepermap_delete_update UPDATE value2 = value2 * 10 + 2 WHERE value2 < 100;
-SELECT * FROM 02707_keepermap_delete_update ORDER BY key;
+SELECT *, _version FROM 02707_keepermap_delete_update ORDER BY key;
 SELECT '-----------';
 
 DROP TABLE IF EXISTS 02707_keepermap_delete_update;

From 1be6bceb2fc0a6030c8832053381475b5a2d7c86 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 5 Apr 2023 16:23:06 +0000
Subject: [PATCH 144/277] Generate valid Decimals and Bools in generateRandom
 function

---
 src/Storages/StorageGenerateRandom.cpp        | 55 +++++++++++++++----
 ..._random_valid_decimals_and_bools.reference | 25 +++++++++
 ...nerate_random_valid_decimals_and_bools.sql |  5 ++
 3 files changed, 73 insertions(+), 12 deletions(-)
 create mode 100644 tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.reference
 create mode 100644 tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.sql

diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp
index 03f3aff412f..fc642eb7cc3 100644
--- a/src/Storages/StorageGenerateRandom.cpp
+++ b/src/Storages/StorageGenerateRandom.cpp
@@ -24,6 +24,7 @@
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/NestedUtils.h>
 
 #include <Common/SipHash.h>
@@ -232,8 +233,17 @@ ColumnPtr fillColumnWithRandomData(
         case TypeIndex::UInt8:
         {
             auto column = ColumnUInt8::create();
-            column->getData().resize(limit);
-            fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit, sizeof(UInt8), rng);
+            auto & data = column->getData();
+            data.resize(limit);
+            if (isBool(type))
+            {
+                for (size_t i = 0; i < limit; ++i)
+                    data[i] = rng() % 2;
+            }
+            else
+            {
+                fillBufferWithRandomData(reinterpret_cast<char *>(data.data()), limit, sizeof(UInt8), rng);
+            }
             return column;
         }
         case TypeIndex::UInt16: [[fallthrough]];
@@ -349,34 +359,55 @@ ColumnPtr fillColumnWithRandomData(
         }
         case TypeIndex::Decimal32:
         {
-            auto column = type->createColumn();
+            const auto & decimal_type = assert_cast<const DataTypeDecimal<Decimal32> &>(*type);
+            auto column = decimal_type.createColumn();
             auto & column_concrete = typeid_cast<ColumnDecimal<Decimal32> &>(*column);
-            column_concrete.getData().resize(limit);
-            fillBufferWithRandomData(reinterpret_cast<char *>(column_concrete.getData().data()), limit, sizeof(Decimal32), rng, true);
+            auto & data = column_concrete.getData();
+            data.resize(limit);
+            /// Generate numbers from range [-10^P + 1, 10^P - 1]
+            Int32 range = common::exp10_i32(decimal_type.getPrecision());
+            for (size_t i = 0; i != limit; ++i)
+                data[i] = static_cast<Int32>(rng()) % range;
             return column;
         }
-        case TypeIndex::Decimal64:  /// TODO Decimal may be generated out of range.
+        case TypeIndex::Decimal64:
         {
+            const auto & decimal_type = assert_cast<const DataTypeDecimal<Decimal64> &>(*type);
             auto column = type->createColumn();
             auto & column_concrete = typeid_cast<ColumnDecimal<Decimal64> &>(*column);
-            column_concrete.getData().resize(limit);
-            fillBufferWithRandomData(reinterpret_cast<char *>(column_concrete.getData().data()), limit, sizeof(Decimal64), rng, true);
+            auto & data = column_concrete.getData();
+            data.resize(limit);
+            /// Generate numbers from range [-10^P + 1, 10^P - 1]
+            Int64 range = common::exp10_i64(decimal_type.getPrecision());
+            for (size_t i = 0; i != limit; ++i)
+                data[i] = static_cast<Int64>(rng()) % range;
+
             return column;
         }
         case TypeIndex::Decimal128:
         {
+            const auto & decimal_type = assert_cast<const DataTypeDecimal<Decimal128> &>(*type);
             auto column = type->createColumn();
             auto & column_concrete = typeid_cast<ColumnDecimal<Decimal128> &>(*column);
-            column_concrete.getData().resize(limit);
-            fillBufferWithRandomData(reinterpret_cast<char *>(column_concrete.getData().data()), limit, sizeof(Decimal128), rng, true);
+            auto & data = column_concrete.getData();
+            data.resize(limit);
+            /// Generate numbers from range [-10^P + 1, 10^P - 1]
+            Int128 range = common::exp10_i128(decimal_type.getPrecision());
+            for (size_t i = 0; i != limit; ++i)
+                data[i] = Int128({rng(), rng()}) % range;
             return column;
         }
         case TypeIndex::Decimal256:
         {
+            const auto & decimal_type = assert_cast<const DataTypeDecimal<Decimal256> &>(*type);
             auto column = type->createColumn();
             auto & column_concrete = typeid_cast<ColumnDecimal<Decimal256> &>(*column);
-            column_concrete.getData().resize(limit);
-            fillBufferWithRandomData(reinterpret_cast<char *>(column_concrete.getData().data()), limit, sizeof(Decimal256), rng, true);
+            auto & data = column_concrete.getData();
+            data.resize(limit);
+            /// Generate numbers from range [-10^P + 1, 10^P - 1]
+            Int256 range = common::exp10_i256(decimal_type.getPrecision());
+            for (size_t i = 0; i != limit; ++i)
+                data[i] = Int256({rng(), rng(), rng(), rng()}) % range;
             return column;
         }
         case TypeIndex::FixedString:
diff --git a/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.reference b/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.reference
new file mode 100644
index 00000000000..49f0c2c1360
--- /dev/null
+++ b/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.reference
@@ -0,0 +1,25 @@
+32.077
+-421.374
+401.741
+19.925
+-53.055
+-741245.011359027
+-108826.704302334
+-885710.601317107
+200615.252943765
+571119.753066497
+-1810705720.77468465228544079155
+4423020457.03124833705321108749
+-7357115755.03893179428185573375
+-3596476105.34116783307269095642
+-882544888.46147190610682821046
+2998274172057708048.9272057168211482653001963259921827760845
+-59397657133291422934.9333984240607897191609175175045361436671
+39869165044835399916.6747138660882932389363995495451540473418
+-57185968744047146404.1855920695644202095475491426441451681562
+-45866526235163110880.4305861667709353032099072504822212330478
+1
+0
+1
+1
+1
diff --git a/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.sql b/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.sql
new file mode 100644
index 00000000000..c290ce4833a
--- /dev/null
+++ b/tests/queries/0_stateless/02709_generate_random_valid_decimals_and_bools.sql
@@ -0,0 +1,5 @@
+select toString(x)::Decimal(6, 3) from generateRandom('x Decimal(6, 3)', 42) limit 5;
+select toString(x)::Decimal(15, 9) from generateRandom('x Decimal(15, 9)', 42) limit 5;
+select toString(x)::Decimal(30, 20) from generateRandom('x Decimal(30, 20)', 42) limit 5;
+select toString(x)::Decimal(60, 40) from generateRandom('x Decimal(60, 40)', 42) limit 5;
+select reinterpret(x, 'UInt8') from generateRandom('x Bool', 42) limit 5;

From 11d3d01e507906dc7a8c32e0b1acf6c480ada854 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 5 Apr 2023 18:29:59 +0200
Subject: [PATCH 145/277] Fix special build

---
 src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
index 8a968b40210..3690a49d5ae 100644
--- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
+++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp
@@ -800,8 +800,8 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
                 key_type = key_type_hint;
             }
 
-            auto map_column = ColumnMap::create(std::move(key_column), std::move(value_column), offsets_column);
-            auto map_type = std::make_shared<DataTypeMap>(std::move(key_type), std::move(value_type));
+            auto map_column = ColumnMap::create(key_column, value_column, offsets_column);
+            auto map_type = std::make_shared<DataTypeMap>(key_type, value_type);
             return {std::move(map_column), std::move(map_type), column_name};
         }
         case arrow::Type::LIST:

From af257572eb5a8508395c6a327978e6bbc8f6f4c7 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 5 Apr 2023 16:43:55 +0000
Subject: [PATCH 146/277] Update tests

---
 .../01087_storage_generate.reference          | 200 +++++++++---------
 .../01087_table_function_generate.reference   |  60 +++---
 2 files changed, 130 insertions(+), 130 deletions(-)

diff --git a/tests/queries/0_stateless/01087_storage_generate.reference b/tests/queries/0_stateless/01087_storage_generate.reference
index 00ea8ac914f..3b388d0e80f 100644
--- a/tests/queries/0_stateless/01087_storage_generate.reference
+++ b/tests/queries/0_stateless/01087_storage_generate.reference
@@ -1,103 +1,103 @@
 100
 -
-[]	-54259.6828	('2088-03-01 13:26:24.094','d3c2a216-a98c-d56c-7bf7-62de9f264cf4')
-[88]	34528.4014	('2031-12-08 21:40:39.898','9ef777c8-de0e-d25e-e16c-5b624f88523c')
-[-1]	121968.7945	('2060-02-05 06:18:12.011','7655e515-d2ca-2f06-0950-e4f44f69aca7')
-[-103,75]	-135033.4349	('2038-12-19 17:38:58.695','86b57d15-292d-2517-9acf-47cd053e7a3a')
-[110]	-202668.69	('2009-06-17 21:53:29.808','bc630f78-7d58-0c46-dd4b-27fc35625e96')
-[-22,2]	168636.9728	('2074-09-03 06:20:20.936','7624ce27-9bff-4e9d-3f18-6851a97dd0ca')
-[-22,-62]	-75192.4989	('2085-10-11 18:51:12.855','a4c4d0ed-f448-244e-1723-ca1bba816f2b')
-[-2,-90]	133592.5064	('2010-10-28 17:18:04.633','8ba9103b-f90c-b49b-38c1-223ae5f42bf7')
-[-94,80]	197330.6359	('2024-03-30 19:08:45.772','83442013-3677-5097-065d-72dfbe8a3506')
-[23]	167557.6237	('2078-07-25 18:54:42.480','be14d98e-5b24-54ee-c959-d24fa9a58fdd')
-[46,-10,-63]	185107.1979	('2040-10-07 03:06:53.504','5ed1fe6a-9313-41d7-4bf9-3948e961509f')
-[-107,68]	-163781.3045	('2021-12-21 16:18:58.933','7b634f19-0863-829e-484b-be288aab54a1')
-[-35,-116,73]	-203577.5379	('2093-08-01 17:21:09.407','d371bad4-b098-ffdd-f84c-6a02390c2939')
-[61]	152284.9386	('2089-12-20 16:21:33.149','9e8426c1-278a-4d9c-4076-364a95b065e3')
-[75]	170968.4171	('2020-07-17 12:45:31.975','47397a81-bda7-8bd9-59f7-d60e2204fe99')
-[-115,93]	-173740.5652	('2098-04-25 19:10:33.327','117e31dd-102e-ee6c-0dbd-0a4203c18ca5')
-[-20,4,21]	63834.8685	('2000-07-08 14:09:40.271','10b0fa48-55a3-755a-4a44-36315ae04c1c')
-[-110,117,91]	-160640.1506	('1998-04-18 06:58:04.479','6dfa3a8e-6e65-543c-5f50-1ff45835aa5a')
-[62]	63817.7977	('2043-01-23 23:07:18.972','98b8ef31-4f65-2f8b-1ea7-b1473900099e')
-[-2]	-175477.0173	('2007-01-16 04:46:14.781','ec92f616-6e1f-003a-54c6-c5f9118d2f1b')
-[]	197663.3035	('2046-06-30 14:04:56.788','fb3244a4-8af2-104f-2a6f-25a7b7b9a112')
-[-24]	-174299.4691	('2058-02-23 11:50:58.839','d63ee868-fa93-bf8b-0264-8ebbceb13e3b')
-[95,38]	-65083.7371	('2015-03-10 10:33:16.429','47bd199c-f99e-51ea-84e9-b65cce9d167c')
-[91,110,72]	130908.9643	('2036-03-16 12:17:53.679','0dd4ca31-1e09-d7e0-f3df-60cad3cfa805')
-[]	208972.3779	('2034-03-05 19:29:21.994','1069d77c-dfd2-912e-60b8-3c5b964f7e11')
-[-32]	167938.505	('2093-09-10 17:39:39.050','9d1025b6-2d0c-1d84-dafd-02668eb29270')
-[]	153744.6987	('2088-10-02 08:02:11.024','a88e6cb7-2210-5ce5-6bcf-24afc0eca5b6')
-[67]	-74220.665	('2074-12-30 15:43:40.817','68096065-18c8-8aca-fd21-15330ead669d')
-[6]	66759.8938	('2091-09-01 16:07:18.219','bb14f4cc-0b54-9a8c-e835-71333b28c03b')
-[-28,-82,9]	168625.3131	('2002-03-20 18:02:30.321','405bb877-6e28-8b91-cb62-bd82a3fa797c')
-[]	-19760.167	('2044-11-08 04:52:03.325','13769348-9e58-0e75-3972-8bbadc150715')
-[]	160663.7797	('2025-04-12 10:17:53.501','e6370321-94f5-97e6-0348-a84e72ff5b42')
-[-17,18]	99105.9856	('1972-05-01 09:23:11.688','02618b9e-97cd-4698-d2e8-3f52f4c5a09a')
-[86,77]	-116990.3914	('1981-12-31 02:06:54.198','3ac42bb4-8652-b1a8-10bb-98f0337261f8')
-[-109,69,-63]	-151527.3587	('2001-01-17 08:19:56.504','77fe7ee2-f279-2855-bfd2-a7d7cee678cc')
-[]	-57762.3928	('1978-08-16 15:47:37.660','ab9a110a-fd8d-3c4c-5a49-34c2005536ce')
-[-77]	107274.6407	('2017-01-12 09:03:02.657','c1ad4f17-cc54-45f3-9410-9c1011653f6d')
-[]	107133.641	('2050-10-05 03:29:27.154','36e576aa-c77f-994e-1925-4a4c40da3a0f')
-[]	46672.2176	('2094-01-21 17:25:39.144','e9ba850d-604e-bc7d-417c-1078e89d4615')
-[-87,-122,-65]	-86258.4663	('2081-06-17 00:37:45.498','64795221-9719-7937-b4d2-be5f30065ece')
-[-53]	-48672.1424	('1992-06-27 13:27:23.602','7c67bc31-c7bb-6197-fdca-f73329b976f2')
-[34]	-108954.782	('2096-07-03 20:06:30.632','9c1b37d7-4ced-9428-a0ae-34c5436b14c4')
-[]	-168124.2364	('1987-06-03 02:47:12.945','d1c39af4-f920-5095-b8e2-0f878950167b')
-[]	-112431.4799	('2021-07-26 04:04:58.527','da07a72d-7e1f-8890-4c4b-326835d11b39')
-[-35,-95,58]	-181254.9139	('2086-11-12 14:17:14.473','22f74d0b-dfc0-3f7a-33f4-8055d8fa7846')
-[98,119]	11468.5238	('2092-02-25 08:07:07.695','a1fb97bf-1885-6715-c233-b88a6cd111e4')
-[]	82333.8963	('1989-11-22 22:38:57.012','a2b82b5b-8331-555c-579b-de4b0eeb7e81')
-[-5,-66,69]	32055.8376	('2040-12-17 13:49:08.704','4537d25e-a2db-ea9a-8e24-a16ed7e0c6e4')
-[81,-84,-24]	-210815.2512	('2047-06-09 10:30:06.922','ac3c5b5f-f977-2830-c398-d10a6076a498')
-[84,-105]	-175413.7733	('1998-11-03 01:30:21.191','c535feac-1943-c0a1-23f0-645d5406db24')
-[58,31]	-335.8512	('1973-07-09 09:21:10.444','24a7dd3d-2565-1de3-05d9-e45fd8ba7729')
-[-49,-47]	177399.2836	('2049-03-15 12:33:00.190','e4432b9b-61e9-d451-dc87-ae3b9da6fd35')
-[]	211525.2349	('2106-01-11 07:44:18.918','23315435-7132-05b5-5a9b-c2c738433a87')
-[45,-95,-39]	-15314.9732	('2055-10-29 10:51:12.182','833b2efa-8c72-f5f6-3040-cb4831e8ceb9')
-[]	213384.5774	('2067-02-10 19:02:42.113','0cd7f438-caa7-0d21-867c-1fdb6d67d797')
-[99]	-147316.5599	('2000-05-09 17:37:34.776','a3ea6796-38d5-72ff-910d-8b4300831916')
-[]	8828.2471	('1993-11-30 13:53:22.503','7209213f-38bb-cfed-1955-f1fad5a9577a')
-[117,9,-35]	-134812.6269	('2065-09-04 20:47:26.589','d33d0d6f-b9c0-2850-4593-cfc9f1e20a4d')
-[-35,-58,-101]	-9101.5369	('2023-08-24 17:56:11.695','87fbe3f9-b1f0-c030-a4c0-8662045923b4')
-[-58,87]	122510.9099	('2019-08-09 14:40:29.849','c1d3a2cc-878f-c2c3-4a0b-10e98cda8b4a')
-[4,19,58]	-13496.8672	('2027-05-01 06:11:48.659','8996ae31-d670-cbfe-b735-b16b7c3b3476')
-[23,-75,-89]	-51218.286	('2010-06-01 22:49:03.396','d32b8b61-cc3e-31fa-2a2a-abefa60bfcee')
-[50]	-45297.4315	('2087-04-15 03:46:08.247','04fe9603-97fc-07a4-6248-0f21e408c884')
-[-23,17,63]	89185.9462	('2065-10-26 05:27:12.817','a5fbf764-70b4-8b65-4a8f-7550abca3859')
-[-6]	-129925.369	('2013-11-05 03:44:45.233','11db26b3-e2b5-b9fa-6b0e-79c43a2e67ab')
-[-72,-108]	203171.5475	('2000-01-28 06:34:58.032','14d5399e-7949-20c7-0e47-85e2fce5836c')
-[-73,34,-27]	2676.7265	('2057-10-25 11:37:10.049','00049a92-4350-badb-3764-dd7f019b9b31')
-[65,-7]	-153472.9461	('1973-04-11 23:34:41.245','e0a0324d-1552-d11e-f3a5-fbd822d206c5')
-[]	81837.7838	('2041-09-20 17:56:39.712','f7923f2c-e526-1706-79b9-58045d9deaa7')
-[-113,8]	173192.6905	('2066-04-02 06:59:59.356','e3013e5c-92e3-c03c-b57a-e1939e00a1a7')
-[107]	9694.1102	('1984-11-02 10:11:34.034','e973db18-07b7-2117-f3ba-e7002adfa939')
-[]	-76460.9664	('2051-02-10 06:54:42.143','b8344c22-9e8a-7052-c644-9c3e5989cdf1')
-[59,59,0]	27041.7606	('2083-02-17 15:21:22.547','4d6b137b-a3e1-f36d-2c0c-c8d718dda388')
-[-114]	133673.963	('2005-10-02 16:34:27.452','04785b75-30e5-af8b-547e-d15bcb7f49fb')
-[43]	-169861.2	('2006-12-13 06:26:13.923','cb865d38-d961-d7f9-acbb-583b9f31252f')
-[]	197115.2174	('2060-04-08 01:17:00.488','0f26c4b4-b24c-1fd5-c619-31bcf71a4831')
-[-25]	-200081.9506	('2055-12-24 23:30:16.276','0b32ad69-2c84-4269-9718-e3171482878a')
-[14,110]	-40196.4463	('2084-08-13 16:37:07.588','ed882071-acba-b3ab-5d77-d79a9544a834')
-[-62,-71,-82]	-154958.9747	('2100-07-07 23:32:53.741','7711c7c1-0d22-e302-fc86-61ef5e68db96')
-[96,-114,-101]	78910.332	('2100-07-19 12:02:27.109','756bfd26-c4b3-94b8-e991-c7ab7a833b76')
-[49]	80117.2267	('1970-07-04 00:50:56.748','aebac019-9054-4a77-2ccd-8801fc4a7496')
-[]	102078.4801	('2055-01-06 22:22:33.624','21f2e59a-a1ca-5df3-27fd-aa95456cfbe5')
-[-106]	-108728.4237	('2020-05-27 08:56:18.121','6b7b6674-9342-2360-4cc0-f7ef8a2404de')
-[]	173213.5631	('2034-01-18 16:04:16.059','2dc0038d-67c1-f0ee-280b-f3f0f536b01a')
-[42]	139872.2503	('2001-07-16 07:09:28.754','d6487da6-1077-1053-f314-9a1079f5df15')
-[]	1107.5244	('2031-02-26 12:06:00.846','b32bee8f-85b7-3c71-bb24-9a0093e6a08c')
-[]	85892.8913	('2088-04-13 11:54:18.514','84f3b59b-8d23-78a6-3032-91392344584f')
-[43]	-109644.2714	('1974-07-04 11:45:43.139','cf722ca8-15f5-6fe2-997c-0cf88e95e902')
-[]	212557.3762	('2069-03-03 04:21:08.439','9e676cac-36e6-2962-f7b1-578214f0dfbd')
-[-128,55]	80471.0777	('1970-04-01 15:54:40.257','ca358854-416b-9c95-0b9b-c7fed7bb7cb5')
-[-30,-54]	-132205.4512	('2017-12-15 19:54:15.750','3558faa4-2d2f-c533-437f-1e03d3600f1d')
-[-116,-72]	-91499.667	('2105-09-23 18:06:17.755','07bb6e47-3234-c268-40d7-332388dc06f8')
-[]	-201636.5228	('2085-01-27 04:54:42.717','86c3bdc3-ff0f-1723-07c2-845aa3c02370')
-[-103,-39]	44330.7722	('2064-07-02 08:08:28.068','0869c79d-6bdd-5d2d-a3d1-ffe13f6aa810')
-[99]	-31035.5391	('2093-07-25 22:50:23.026','aeb59338-254f-dc09-fbd7-263da415e211')
-[101]	157961.4729	('2036-05-03 23:35:07.845','8b6221a9-8dad-4655-7460-6b3031b06893')
-[111]	84732.4403	('1997-04-06 12:10:18.624','08806a79-59f4-c833-eedc-a200bb851767')
-[9,-48]	-190491.559	('2031-11-03 16:47:03.757','914e6166-c96e-e0e4-101a-0bb516cf5a2f')
-[-41]	-132501.8311	('2089-11-21 18:38:28.848','6de6cc8d-3c49-641e-fb12-87ed5ecb97b0')
-[77]	64903.6579	('1985-04-17 13:08:03.998','26484b8a-f3f1-587f-7777-bc7a57a689c3')
+[]	-54259.6828	('1973-07-09 09:21:10.444','9d1025b6-2d0c-1d84-dafd-02668eb29270')
+[88]	21968.7945	('2049-03-15 12:33:00.190','a88e6cb7-2210-5ce5-6bcf-24afc0eca5b6')
+[-1]	-2668.69	('2106-01-11 07:44:18.918','68096065-18c8-8aca-fd21-15330ead669d')
+[-103,75]	-75192.4989	('2055-10-29 10:51:12.182','bb14f4cc-0b54-9a8c-e835-71333b28c03b')
+[110]	97330.6359	('2067-02-10 19:02:42.113','405bb877-6e28-8b91-cb62-bd82a3fa797c')
+[-22,2]	85107.1979	('2000-05-09 17:37:34.776','13769348-9e58-0e75-3972-8bbadc150715')
+[-22,-62]	-3577.5379	('1993-11-30 13:53:22.503','e6370321-94f5-97e6-0348-a84e72ff5b42')
+[-2,-90]	70968.4171	('2065-09-04 20:47:26.589','02618b9e-97cd-4698-d2e8-3f52f4c5a09a')
+[-94,80]	63834.8685	('2023-08-24 17:56:11.695','3ac42bb4-8652-b1a8-10bb-98f0337261f8')
+[23]	63817.7977	('2019-08-09 14:40:29.849','77fe7ee2-f279-2855-bfd2-a7d7cee678cc')
+[46,-10,-63]	97663.3035	('2027-05-01 06:11:48.659','ab9a110a-fd8d-3c4c-5a49-34c2005536ce')
+[-107,68]	-65083.7371	('2010-06-01 22:49:03.396','c1ad4f17-cc54-45f3-9410-9c1011653f6d')
+[-35,-116,73]	8972.3779	('2087-04-15 03:46:08.247','36e576aa-c77f-994e-1925-4a4c40da3a0f')
+[61]	53744.6987	('2065-10-26 05:27:12.817','e9ba850d-604e-bc7d-417c-1078e89d4615')
+[75]	66759.8938	('2013-11-05 03:44:45.233','64795221-9719-7937-b4d2-be5f30065ece')
+[-115,93]	-19760.167	('2000-01-28 06:34:58.032','7c67bc31-c7bb-6197-fdca-f73329b976f2')
+[-20,4,21]	99105.9856	('2057-10-25 11:37:10.049','9c1b37d7-4ced-9428-a0ae-34c5436b14c4')
+[-110,117,91]	-51527.3587	('1973-04-11 23:34:41.245','d1c39af4-f920-5095-b8e2-0f878950167b')
+[62]	7274.6407	('2041-09-20 17:56:39.712','da07a72d-7e1f-8890-4c4b-326835d11b39')
+[-2]	46672.2176	('2066-04-02 06:59:59.356','22f74d0b-dfc0-3f7a-33f4-8055d8fa7846')
+[]	-48672.1424	('1984-11-02 10:11:34.034','a1fb97bf-1885-6715-c233-b88a6cd111e4')
+[-24]	-68124.2364	('2051-02-10 06:54:42.143','a2b82b5b-8331-555c-579b-de4b0eeb7e81')
+[95,38]	-81254.9139	('2083-02-17 15:21:22.547','4537d25e-a2db-ea9a-8e24-a16ed7e0c6e4')
+[91,110,72]	82333.8963	('2005-10-02 16:34:27.452','ac3c5b5f-f977-2830-c398-d10a6076a498')
+[]	-10815.2512	('2006-12-13 06:26:13.923','c535feac-1943-c0a1-23f0-645d5406db24')
+[-32]	-335.8512	('2060-04-08 01:17:00.488','24a7dd3d-2565-1de3-05d9-e45fd8ba7729')
+[]	11525.2349	('2055-12-24 23:30:16.276','e4432b9b-61e9-d451-dc87-ae3b9da6fd35')
+[67]	13384.5774	('2084-08-13 16:37:07.588','23315435-7132-05b5-5a9b-c2c738433a87')
+[6]	8828.2471	('2100-07-07 23:32:53.741','833b2efa-8c72-f5f6-3040-cb4831e8ceb9')
+[-28,-82,9]	-9101.5369	('2100-07-19 12:02:27.109','0cd7f438-caa7-0d21-867c-1fdb6d67d797')
+[]	-13496.8672	('1970-07-04 00:50:56.748','a3ea6796-38d5-72ff-910d-8b4300831916')
+[]	-45297.4315	('2055-01-06 22:22:33.624','7209213f-38bb-cfed-1955-f1fad5a9577a')
+[-17,18]	-29925.369	('2020-05-27 08:56:18.121','d33d0d6f-b9c0-2850-4593-cfc9f1e20a4d')
+[86,77]	2676.7265	('2034-01-18 16:04:16.059','87fbe3f9-b1f0-c030-a4c0-8662045923b4')
+[-109,69,-63]	81837.7838	('2001-07-16 07:09:28.754','c1d3a2cc-878f-c2c3-4a0b-10e98cda8b4a')
+[]	9694.1102	('2031-02-26 12:06:00.846','8996ae31-d670-cbfe-b735-b16b7c3b3476')
+[-77]	27041.7606	('2088-04-13 11:54:18.514','d32b8b61-cc3e-31fa-2a2a-abefa60bfcee')
+[]	-69861.2	('1974-07-04 11:45:43.139','04fe9603-97fc-07a4-6248-0f21e408c884')
+[]	-81.9506	('2069-03-03 04:21:08.439','a5fbf764-70b4-8b65-4a8f-7550abca3859')
+[-87,-122,-65]	-54958.9747	('1970-04-01 15:54:40.257','11db26b3-e2b5-b9fa-6b0e-79c43a2e67ab')
+[-53]	80117.2267	('2017-12-15 19:54:15.750','14d5399e-7949-20c7-0e47-85e2fce5836c')
+[34]	-8728.4237	('2105-09-23 18:06:17.755','00049a92-4350-badb-3764-dd7f019b9b31')
+[]	39872.2503	('2085-01-27 04:54:42.717','e0a0324d-1552-d11e-f3a5-fbd822d206c5')
+[]	85892.8913	('2064-07-02 08:08:28.068','f7923f2c-e526-1706-79b9-58045d9deaa7')
+[-35,-95,58]	12557.3762	('2093-07-25 22:50:23.026','e3013e5c-92e3-c03c-b57a-e1939e00a1a7')
+[98,119]	-32205.4512	('2036-05-03 23:35:07.845','e973db18-07b7-2117-f3ba-e7002adfa939')
+[]	-1636.5228	('1997-04-06 12:10:18.624','b8344c22-9e8a-7052-c644-9c3e5989cdf1')
+[-5,-66,69]	-31035.5391	('2031-11-03 16:47:03.757','4d6b137b-a3e1-f36d-2c0c-c8d718dda388')
+[81,-84,-24]	84732.4403	('2089-11-21 18:38:28.848','04785b75-30e5-af8b-547e-d15bcb7f49fb')
+[84,-105]	-32501.8311	('1985-04-17 13:08:03.998','cb865d38-d961-d7f9-acbb-583b9f31252f')
+[58,31]	95437.1166	('1977-06-10 06:40:13.164','0f26c4b4-b24c-1fd5-c619-31bcf71a4831')
+[-49,-47]	32232.0218	('2006-10-31 16:14:17.332','0b32ad69-2c84-4269-9718-e3171482878a')
+[]	-8085.7941	('1995-02-28 19:24:55.774','ed882071-acba-b3ab-5d77-d79a9544a834')
+[45,-95,-39]	-15528.0377	('2090-01-30 21:38:09.084','7711c7c1-0d22-e302-fc86-61ef5e68db96')
+[]	-26490.6032	('2064-06-06 03:33:21.798','756bfd26-c4b3-94b8-e991-c7ab7a833b76')
+[99]	35137.0312	('2029-05-20 02:21:06.599','aebac019-9054-4a77-2ccd-8801fc4a7496')
+[]	-35249.6041	('2103-07-16 18:29:01.847','21f2e59a-a1ca-5df3-27fd-aa95456cfbe5')
+[117,9,-35]	-20390.4167	('2051-04-04 01:10:56.314','6b7b6674-9342-2360-4cc0-f7ef8a2404de')
+[-35,-58,-101]	-86622.5332	('2056-01-31 05:30:53.382','2dc0038d-67c1-f0ee-280b-f3f0f536b01a')
+[-58,87]	-38301.9728	('1993-06-09 12:46:54.102','d6487da6-1077-1053-f314-9a1079f5df15')
+[4,19,58]	-19098.0416	('2046-03-01 07:58:38.749','b32bee8f-85b7-3c71-bb24-9a0093e6a08c')
+[23,-75,-89]	-57396.8139	('1994-03-07 15:06:46.346','84f3b59b-8d23-78a6-3032-91392344584f')
+[50]	15536.4639	('2101-06-20 20:16:49.230','cf722ca8-15f5-6fe2-997c-0cf88e95e902')
+[-23,17,63]	6790.5373	('2033-08-28 21:52:52.011','9e676cac-36e6-2962-f7b1-578214f0dfbd')
+[-6]	55706.5159	('2073-12-23 07:06:46.299','ca358854-416b-9c95-0b9b-c7fed7bb7cb5')
+[-72,-108]	-86272.6801	('2011-10-08 23:52:57.079','3558faa4-2d2f-c533-437f-1e03d3600f1d')
+[-73,34,-27]	99270.5967	('2100-01-02 21:12:36.759','07bb6e47-3234-c268-40d7-332388dc06f8')
+[65,-7]	-47051.3089	('2045-02-03 08:35:08.678','86c3bdc3-ff0f-1723-07c2-845aa3c02370')
+[]	-73019.898	('2031-11-03 03:51:00.078','0869c79d-6bdd-5d2d-a3d1-ffe13f6aa810')
+[-113,8]	69167.0269	('2100-08-12 02:10:36.637','aeb59338-254f-dc09-fbd7-263da415e211')
+[107]	20867.6436	('2032-05-30 20:42:14.359','8b6221a9-8dad-4655-7460-6b3031b06893')
+[]	-44814.8969	('2014-10-07 06:52:22.047','08806a79-59f4-c833-eedc-a200bb851767')
+[59,59,0]	5445.4157	('2089-11-28 09:02:07.006','914e6166-c96e-e0e4-101a-0bb516cf5a2f')
+[-114]	92856.7823	('2028-11-13 18:19:16.001','6de6cc8d-3c49-641e-fb12-87ed5ecb97b0')
+[43]	-2480.1718	('2102-05-21 16:10:28.701','26484b8a-f3f1-587f-7777-bc7a57a689c3')
+[]	-68249.3014	('2030-03-10 00:24:03.129','d2625c6e-dd01-7e12-e794-6601d4633dfc')
+[-25]	-96391.8384	('2079-01-14 14:29:39.356','125c0ffe-0cb6-c0ef-e4fe-cfa824d91cfc')
+[14,110]	99043.5601	('2102-02-06 00:40:42.723','0bcc2764-a09e-7707-df0d-ba8de2228488')
+[-62,-71,-82]	-19952.4405	('2003-12-28 16:25:30.841','b91d069b-8866-f2a5-37a6-d476cafcb331')
+[96,-114,-101]	-25989.8831	('2026-05-13 15:46:53.209','cbbd4ea9-a80c-642f-2650-977c8e56adff')
+[49]	-3848.9475	('2102-06-09 05:38:10.156','097a67d1-a39a-b133-03ab-304e954bcd95')
+[]	69635.1325	('2070-06-11 21:48:00.549','b9c6c1eb-2ee3-c50d-fb0f-05c7e06c77ce')
+[-106]	54574.7656	('2087-08-21 07:15:19.514','b6cc520e-a251-0027-6f4a-26b3a8fc47d1')
+[]	65529.215	('2011-07-24 06:41:14.268','bd1b3d45-44c7-f3c3-b521-5a42e095c66e')
+[42]	46705.3016	('2042-09-26 17:17:50.524','d3a14a33-e1c6-9c99-66cf-dd8a6b03bbfe')
+[]	54751.8012	('2048-12-26 02:02:23.706','ff698130-c509-0e1b-0a85-3989d6699004')
+[]	-84690.1759	('2063-10-25 19:01:38.315','8040b7c2-ef8f-b180-927f-4745e7a106c3')
+[43]	63776.0626	('2021-01-25 10:03:59.774','0aaf8333-b1ea-4d85-33e1-8c715179c161')
+[]	-7703.4808	('2001-05-12 17:53:23.514','047489f8-7294-e929-f98e-a2044d26ed22')
+[-128,55]	-23194.9926	('2099-09-18 07:04:58.395','3035f0bf-bda6-9307-90f6-a34378b20d6c')
+[-30,-54]	98203.9762	('2050-02-06 14:51:29.743','ccc3d3fd-64dc-65a1-61e1-384e83e3d641')
+[-116,-72]	-13519.4648	('1998-04-09 16:50:26.194','38810a09-9c5b-9a6f-4b4c-134aa551d6ba')
+[]	-6898.0943	('2099-09-13 20:00:52.619','255e9952-300f-0153-3a7f-8865d1b6683c')
+[-103,-39]	-52030.6657	('2047-10-28 16:47:28.315','385abde4-14d6-ed9e-bd01-e641dd0b5ed5')
+[99]	-42787.2791	('1996-10-22 16:24:52.458','e522d1ea-ffae-14f6-d95c-14dfdaf2eb83')
+[101]	-11540.4113	('1981-07-17 18:47:34.268','918f1eea-b5e7-b10b-96dd-47cdcd470a1a')
+[111]	71231.1796	('1997-05-16 14:19:48.064','28ae2849-7667-d36f-7010-fa020d71eb79')
+[9,-48]	68698.7488	('2078-03-06 02:00:34.565','3a37dc94-88d0-e885-8a2f-d37d135c5394')
+[-41]	-14738.4502	('2092-08-10 16:54:45.294','a629c44c-14ac-c7de-c6bc-a5eaac0063ed')
+[77]	-47644.5561	('2080-06-16 05:11:09.073','a46a60e2-c8ac-e8de-4b10-2a653c66c751')
 -
diff --git a/tests/queries/0_stateless/01087_table_function_generate.reference b/tests/queries/0_stateless/01087_table_function_generate.reference
index 53792bfb579..d6744ba0b33 100644
--- a/tests/queries/0_stateless/01087_table_function_generate.reference
+++ b/tests/queries/0_stateless/01087_table_function_generate.reference
@@ -94,16 +94,16 @@ Float32	Float64
 7.317837e-36	-1.6511853645079817e-21
 -
 Decimal(9, 4)	Decimal(18, 8)	Decimal(18, 8)
--18731.5032	81241713112.39967992	-10576027963457111164764.0798899532879521
-65289.5061	-27889310937.24180887	5807515838469365530027.7612329616030438
--197586.1517	-751754543.85331084	3835903211857734974086.0358362773591932
-183596.0063	8217353434.4196403	13633006218585943284268.9826084812209912
-73041.2674	-88881500366.49430454	-148702703925022894263.3187064158377476
-101454.4494	-27768337.71540858	-634829280961262229789.4961995996929358
--174012.0101	-13636289325.35403038	-3611949395160064991369.2765012316944096
-138203.8526	13556098030.08819271	134470734594381953531.9736002591779584
-15395.1766	-8047388876.97332962	16804394201271843589306.4234533639925009
-8569.7048	-49271659843.47126295	-14851374957489266092927.8687987539036841
+-18731.5032	-27768337.71540858	-9393543543230357843716.0041459374484681
+-97586.1517	-3636289325.35403038	3364376403318670133825.8224672630083466
+73041.2674	3556098030.08819271	-2446472555280036491886.6248666210495333
+-74012.0101	-8047388876.97332962	-1097064725002692978976.3763759594690971
+15395.1766	-9271659843.47126295	5428073503721506689195.250326552245674
+-93143.0664	-1797039080.46100129	-8559656833301969702973.2550203153949345
+46848.5225	-7332762471.23822513	6692780327859072322513.5175470507222506
+-85331.598	-7937795415.83578394	6508457918219676369805.8207979274851797
+-31470.8994	1482606444.06230976	-8371832329716119186316.4300878863977591
+32104.097	-9071727536.35797124	1491705856202199878279.4124322332399575
 -
 Tuple(Int32, Int64)
 (-187315032,8124171311239967992)
@@ -225,25 +225,25 @@ U6
 \'%Y~t9
 RL,{Xs\\tw
 -
-[]	-27467.1221	('2021-03-08 00:39:14.331','08ec773f-cded-8c46-727f-954768082cbf')
-[]	204013.7193	('2026-05-05 02:20:23.160','30f6d580-cb25-8d4f-f869-fc10128b3389')
-[-122]	-9432.2617	('2001-08-23 04:05:41.222','f7bf2154-78c3-8920-e4d3-a374e22998a4')
-[-30,61]	-133488.2399	('2048-05-14 06:05:06.021','a6af106c-b321-978b-fa79-338c9e342b5a')
-[-1]	58720.0591	('1976-06-07 20:26:18.162','fc038af0-ba31-8fdc-1847-37328ef161b0')
-[1]	-18736.7874	('1977-03-10 01:41:16.215','3259d377-a92d-3557-9045-4ad1294d55d5')
-[34,-10]	-99367.9009	('2031-05-08 07:00:41.084','0b38ebc5-20a6-be3d-8543-23ce3546f49c')
-[110]	31562.7502	('2045-02-27 08:46:14.976','74116384-cb3e-eb00-0102-fb30ddea5d5f')
-[114]	-84125.1554	('2023-06-06 03:55:06.492','bf9ab359-ef9f-ad11-7e6c-160368b1e5ea')
-[124]	-114719.5228	('2010-11-11 19:57:23.722','c1046ffb-3415-cc3a-509a-e0005856d7d7')
+[]	-89844.0836	('2057-10-03 21:07:16.864','fa79338c-9e34-2b5a-64b7-ab28e4f8c281')
+[]	58720.0591	('2048-05-14 06:05:06.021','854323ce-3546-f49c-08ec-773fcded8c46')
+[-122]	-9432.2617	('2010-11-11 19:57:23.722','7e6c1603-68b1-e5ea-3259-d377a92d3557')
+[-30,61]	-16883.7384	('2067-03-02 22:51:41.201','8dab5bc5-a641-5a33-7a35-02d6b3af106f')
+[-1]	-82719.0473	('2050-09-09 13:42:43.295','138fe3b2-602c-4249-f2dc-175ea47e2429')
+[1]	-99367.9009	('1976-06-07 20:26:18.162','509ae000-5856-d7d7-0b38-ebc520a6be3d')
+[34,-10]	97602.7584	('1995-09-16 10:43:48.516','18473732-8ef1-61b0-a6af-106cb321978b')
+[110]	-84125.1554	('2031-05-08 07:00:41.084','90454ad1-294d-55d5-30f6-d580cb258d4f')
+[114]	4013.7193	('2021-03-08 00:39:14.331','f869fc10-128b-3389-c104-6ffb3415cc3a')
+[124]	10712.855	('2005-06-14 02:52:57.504','727f9547-6808-2cbf-fc03-8af0ba318fdc')
 -
-[]	1900051923	{	-189530.5846	h	-5.6279699579452485e47	('1984-12-06','2028-08-17 03:05:01','2036-04-02 20:52:28.468','4b3d498c-dd44-95c1-5b75-921504ec5d8d')	F743
-[-102,-118]	392272782	Eb	-14818.02	o	-2.664492247169164e59	('2082-12-26','2052-09-09 03:50:50','2088-04-21 02:07:08.245','aeb9c26e-0ee7-2b8e-802b-2a96319b8e60')	CBF4
-[-71]	775049089	\N	-158115.1178	w	4.1323844687113747e-305	('2108-04-19','2090-07-31 13:45:26','2076-07-10 06:11:06.385','57c69bc6-dddd-0975-e932-a7b5173a1304')	EB1D
-[-28,100]	3675466147	{	-146685.1749	h	3.6676044396877755e142	('2017-10-25','2100-02-28 15:07:18','2055-10-14 03:36:20.056','14949dae-dfa8-a124-af83-887348b2f609')	6D88
-[-23]	2514120753	(`u,	-119659.6174	w	1.3231258347475906e34	('2141-04-06','2074-08-10 03:25:12','1976-12-04 15:31:55.745','86a9b3c1-4593-4d56-7762-3aa1dd22cbbf')	AD43
-[11,-36]	3308237300	\N	171205.1896	\N	5.634708707075817e195	('1974-10-31','1993-12-24 06:38:45','2038-07-15 02:22:51.805','63d999b8-8cca-e237-c4a4-4dd7d0096f65')	609E
-[39]	1614362420	`4A8P	157144.063	o	-1.1843143253872814e-255	('2147-08-18','2072-09-28 15:27:27','2073-07-10 09:19:58.146','6483f5c0-8733-364c-4fa0-9948d32e8903')	A886
-[48,-120]	3848918261	1<Lu3	91487.2852	h	-1.9300793134783347e263	('2050-12-04','2076-04-05 06:33:05','2103-12-13 20:48:44.066','e522b794-b8fa-3f11-003b-3b6b088ff941')	556E
-[55]	3047524030	li&lF	93462.3661	h	2.8979254388809897e54	('1976-01-10','1987-07-13 20:25:51','2021-11-19 01:44:08.986','486e5b26-5fe8-fe3e-12ef-09aee40643e0')	9E75
-[100,-42]	3999367674		-112975.9852	h	2.658098863752086e-160	('2081-05-13','2071-08-07 10:34:33','1980-11-11 09:00:44.669','9754e8ac-5145-befb-63d9-a12dd1cf1f3a')	DF63
+[]	1900051923	{	-89530.5846	h	-1.9300793134783347e263	('2081-06-01','2078-08-20 17:36:52','2103-12-13 20:48:44.066','77623aa1-dd22-cbbf-9754-e8ac5145befb')	B9D2
+[-102,-118]	392272782	Eb	-54771.1235	w	1.862030138254951e-275	('2069-11-06','2080-03-28 05:11:25','2074-02-13 07:29:40.749','af838873-48b2-f609-6483-f5c08733364c')	FD02
+[-71]	775049089	\N	-46685.1749	o	-1.1843143253872814e-255	('2024-01-16','2090-10-31 16:35:45','2073-07-10 09:19:58.146','802b2a96-319b-8e60-63d9-99b88ccae237')	CB06
+[-28,100]	3675466147	{	2713.8171	h	7.716471250409565e-150	('2084-01-01','2062-08-12 20:41:53','2034-02-02 02:30:44.960','9e60f4cb-6e55-1deb-5ac4-d66a86a8886d')	8502
+[-23]	2514120753	(`u,	-58115.1178	\N	5.634708707075817e195	('2006-11-21','1998-08-21 14:20:10','2038-07-15 02:22:51.805','e932a7b5-173a-1304-e522-b794b8fa3f11')	0355
+[11,-36]	3308237300	\N	49981.1072	o	1.551329240281692e74	('2001-08-12','2029-04-21 09:27:59','2085-03-11 12:17:17.374','4fa09948-d32e-8903-63df-43ad759e43f7')	1C98
+[39]	1614362420	`4A8P	1344.0939	o	-0.00010017641870052119	('2141-08-07','2031-08-07 08:29:42','2044-03-18 14:34:17.814','98714b2c-65e7-b5cb-a040-421e260c6d8d')	35D9
+[48,-120]	3848918261	1<Lu3	20577.5348	w	-7.09663503573236e-160	('2079-01-06','2058-07-13 22:47:30','2068-08-27 04:49:41.825','c4a44dd7-d009-6f65-1494-9daedfa8a124')	D175
+[55]	3047524030	li&lF	-19659.6174	o	-2.664492247169164e59	('2128-12-20','2032-01-08 03:14:46','2088-04-21 02:07:08.245','63d9a12d-d1cf-1f3a-57c6-9bc6dddd0975')	0926
+[100,-42]	3999367674		-14818.02	h	3.6676044396877755e142	('2110-02-03','2070-07-11 04:59:41','2055-10-14 03:36:20.056','003b3b6b-088f-f941-aeb9-c26e0ee72b8e')	6897
 -

From a016e8fcd1dc15097336213e4c691677da23bc6b Mon Sep 17 00:00:00 2001
From: rfraposa <richraposa@gmail.com>
Date: Wed, 5 Apr 2023 11:30:59 -0600
Subject: [PATCH 147/277] Update replication.md

---
 .../table-engines/mergetree-family/replication.md   | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md
index e9ca87916a0..e14ba5699e4 100644
--- a/docs/en/engines/table-engines/mergetree-family/replication.md
+++ b/docs/en/engines/table-engines/mergetree-family/replication.md
@@ -8,11 +8,18 @@ sidebar_label: Data Replication
 
 :::note
 In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments.  For example, in the text below you would replace:
+
+```sql
+ENGINE = ReplicatedReplacingMergeTree(
+    '/clickhouse/tables/{shard}/table_name',
+    '{replica}',
+    ver
+)
 ```
-ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
-```
+
 with:
-```
+
+```sql
 ENGINE = ReplicatedReplacingMergeTree
 ```
 :::

From c1adbb7aa69636f24824cf701e72114fa246b3b4 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 5 Apr 2023 16:46:29 +0200
Subject: [PATCH 148/277] transfer counters to the query directly

---
 .../MergeTree/MergePlainMergeTreeTask.cpp        | 16 ++++++++++++++++
 src/Storages/MergeTree/MergePlainMergeTreeTask.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
index 64065c7cfa1..871672c442b 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@@ -4,6 +4,8 @@
 #include <Storages/StorageMergeTree.h>
 #include <Storages/MergeTree/MergeTreeDataMergerMutator.h>
 #include <Common/ProfileEventsScope.h>
+#include <Common/ProfileEvents.h>
+
 
 namespace DB
 {
@@ -106,6 +108,19 @@ void MergePlainMergeTreeTask::prepare()
             std::move(profile_counters_snapshot));
     };
 
+    transfer_profile_counters_to_initial_query = [this, query_thread_group = CurrentThread::getGroup()] ()
+    {
+        if (query_thread_group)
+        {
+            auto task_thread_group = (*merge_list_entry)->thread_group;
+            auto task_counters_snapshot = task_thread_group->performance_counters.getPartiallyAtomicSnapshot();
+
+            auto & query_counters = query_thread_group->performance_counters;
+            for (ProfileEvents::Event i = ProfileEvents::Event(0); i < ProfileEvents::end(); ++i)
+                query_counters.incrementNoTrace(i, task_counters_snapshot[i]);
+        }
+    };
+
     merge_task = storage.merger_mutator.mergePartsToTemporaryPart(
             future_part,
             metadata_snapshot,
@@ -133,6 +148,7 @@ void MergePlainMergeTreeTask::finish()
 
     write_part_log({});
     storage.incrementMergedPartsProfileEvent(new_part->getType());
+    transfer_profile_counters_to_initial_query();
 }
 
 ContextMutablePtr MergePlainMergeTreeTask::createTaskContext() const
diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h
index 478ec36630c..369b4390da7 100644
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h
@@ -80,6 +80,7 @@ private:
     UInt64 priority{0};
 
     std::function<void(const ExecutionStatus &)> write_part_log;
+    std::function<void()> transfer_profile_counters_to_initial_query;
     IExecutableTask::TaskResultCallback task_result_callback;
     MergeTaskPtr merge_task{nullptr};
 

From 061be7c9add2ec9919d4ba5d21a6afa2986031fa Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 5 Apr 2023 18:08:56 +0000
Subject: [PATCH 149/277] Add trailing commas to expression list in SELECT
 query

---
 src/Parsers/ExpressionListParsers.cpp         | 70 +++++++++++++++----
 src/Parsers/ExpressionListParsers.h           | 18 +++--
 src/Parsers/ParserSelectQuery.cpp             |  2 +-
 src/Parsers/ParserTablesInSelectQuery.cpp     |  1 -
 .../02676_trailing_commas.reference           |  4 ++
 .../0_stateless/02676_trailing_commas.sql     |  4 ++
 6 files changed, 77 insertions(+), 22 deletions(-)
 create mode 100644 tests/queries/0_stateless/02676_trailing_commas.reference
 create mode 100644 tests/queries/0_stateless/02676_trailing_commas.sql

diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 054a22a0c3a..6008e89d038 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -303,9 +303,9 @@ ASTPtr makeBetweenOperator(bool negative, ASTs arguments)
     }
 }
 
-ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function)
+ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function, bool allow_trailing_commas)
     : impl(std::make_unique<ParserWithOptionalAlias>(
-        is_table_function ? ParserPtr(std::make_unique<ParserTableFunctionExpression>()) : ParserPtr(std::make_unique<ParserExpression>()),
+        is_table_function ? ParserPtr(std::make_unique<ParserTableFunctionExpression>()) : ParserPtr(std::make_unique<ParserExpression>(allow_trailing_commas)),
         allow_alias_without_as_keyword))
 {
 }
@@ -314,7 +314,7 @@ ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_
 bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     return ParserList(
-        std::make_unique<ParserExpressionWithOptionalAlias>(allow_alias_without_as_keyword, is_table_function),
+        std::make_unique<ParserExpressionWithOptionalAlias>(allow_alias_without_as_keyword, is_table_function, allow_trailing_commas),
         std::make_unique<ParserToken>(TokenType::Comma))
         .parse(pos, node, expected);
 }
@@ -783,9 +783,11 @@ class ExpressionLayer : public Layer
 {
 public:
 
-    explicit ExpressionLayer(bool is_table_function_) : Layer(false, false)
+    explicit ExpressionLayer(bool is_table_function_, bool allow_trailing_commas_ = false)
+        : Layer(false, false)
     {
         is_table_function = is_table_function_;
+        allow_trailing_commas = allow_trailing_commas_;
     }
 
     bool getResult(ASTPtr & node) override
@@ -799,13 +801,52 @@ public:
         return Layer::getResultImpl(node);
     }
 
-    bool parse(IParser::Pos & pos, Expected & /*expected*/, Action & /*action*/) override
+    bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override
     {
         if (pos->type == TokenType::Comma)
+        {
             finished = true;
 
+            if (!allow_trailing_commas)
+                return true;
+
+            /// We support trailing commas at the end of the column declaration:
+            ///  - SELECT a, b, c, FROM table
+            ///  - SELECT 1,
+
+            /// For this purpose we eliminate the following cases:
+            ///  1. WITH 1 AS from SELECT 2, from
+            ///  2. SELECT to, from FROM table
+            ///  3. SELECT to, from AS alias FROM table
+            ///  4. SELECT to, from + to FROM table
+
+            auto test_pos = pos;
+            ++test_pos;
+
+            if (test_pos.isValid() && test_pos->type != TokenType::Semicolon)
+            {
+                if (!ParserKeyword("FROM").ignore(test_pos, expected))
+                    return true;
+
+                if (ParserKeyword("FROM").ignore(test_pos, expected))
+                    return true;
+
+                if (ParserAlias(false).ignore(test_pos, expected))
+                    return true;
+
+                if (!ParserIdentifier(true).ignore(test_pos, expected))
+                    return true;
+            }
+
+            ++pos;
+            return true;
+        }
+
         return true;
     }
+
+private:
+    bool allow_trailing_commas;
 };
 
 /// Basic layer for a function with certain separator and end tokens:
@@ -2201,7 +2242,7 @@ struct ParserExpressionImpl
 
 bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    auto start = std::make_unique<ExpressionLayer>(false);
+    auto start = std::make_unique<ExpressionLayer>(false, allow_trailing_commas);
     return ParserExpressionImpl().parse(std::move(start), pos, node, expected);
 }
 
@@ -2543,18 +2584,17 @@ Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & po
 
     if (cur_op == operators_table.end())
     {
+        if (!layers.back()->allow_alias || layers.back()->parsed_alias)
+            return Action::NONE;
+
         ASTPtr alias;
         ParserAlias alias_parser(layers.back()->allow_alias_without_as_keyword);
 
-        if (layers.back()->allow_alias &&
-            !layers.back()->parsed_alias &&
-            alias_parser.parse(pos, alias, expected) &&
-            layers.back()->insertAlias(alias))
-        {
-            layers.back()->parsed_alias = true;
-            return Action::OPERATOR;
-        }
-        return Action::NONE;
+        if (!alias_parser.parse(pos, alias, expected) || !layers.back()->insertAlias(alias))
+            return Action::NONE;
+
+        layers.back()->parsed_alias = true;
+        return Action::OPERATOR;
     }
 
     auto op = cur_op->second;
diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h
index 653654e5a33..040a81bbe27 100644
--- a/src/Parsers/ExpressionListParsers.h
+++ b/src/Parsers/ExpressionListParsers.h
@@ -172,10 +172,15 @@ protected:
 
 class ParserExpression : public IParserBase
 {
+public:
+    ParserExpression(bool allow_trailing_commas_ = false) : allow_trailing_commas(allow_trailing_commas_) {}
+
 protected:
     const char * getName() const override { return "lambda expression"; }
 
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+    bool allow_trailing_commas;
 };
 
 
@@ -192,7 +197,7 @@ protected:
 class ParserExpressionWithOptionalAlias : public IParserBase
 {
 public:
-    explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword_, bool is_table_function_ = false);
+    explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword_, bool is_table_function_ = false, bool allow_trailing_commas_ = false);
 protected:
     ParserPtr impl;
 
@@ -209,12 +214,15 @@ protected:
 class ParserExpressionList : public IParserBase
 {
 public:
-    explicit ParserExpressionList(bool allow_alias_without_as_keyword_, bool is_table_function_ = false)
-        : allow_alias_without_as_keyword(allow_alias_without_as_keyword_), is_table_function(is_table_function_) {}
+    explicit ParserExpressionList(bool allow_alias_without_as_keyword_, bool is_table_function_ = false, bool allow_trailing_commas_ = false)
+        : allow_alias_without_as_keyword(allow_alias_without_as_keyword_)
+        , is_table_function(is_table_function_)
+        , allow_trailing_commas(allow_trailing_commas_) {}
 
 protected:
     bool allow_alias_without_as_keyword;
     bool is_table_function; // This expression list is used by a table function
+    bool allow_trailing_commas;
 
     const char * getName() const override { return "list of expressions"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
@@ -224,8 +232,8 @@ protected:
 class ParserNotEmptyExpressionList : public IParserBase
 {
 public:
-    explicit ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword)
-        : nested_parser(allow_alias_without_as_keyword) {}
+    explicit ParserNotEmptyExpressionList(bool allow_alias_without_as_keyword_, bool allow_trailing_commas_ = false)
+        : nested_parser(allow_alias_without_as_keyword_, false, allow_trailing_commas_) {}
 private:
     ParserExpressionList nested_parser;
 protected:
diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index 17b082a2ddb..1c48f773823 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -68,7 +68,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
     ParserNotEmptyExpressionList exp_list(false);
     ParserNotEmptyExpressionList exp_list_for_with_clause(false);
-    ParserNotEmptyExpressionList exp_list_for_select_clause(true);    /// Allows aliases without AS keyword.
+    ParserNotEmptyExpressionList exp_list_for_select_clause(/*allow_alias_without_as_keyword*/ true, /*allow_trailing_commas*/ true);
     ParserExpressionWithOptionalAlias exp_elem(false);
     ParserOrderByExpressionList order_list;
     ParserGroupingSetsExpressionList grouping_sets_list;
diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp
index 617ab7816d4..08b6f77fafa 100644
--- a/src/Parsers/ParserTablesInSelectQuery.cpp
+++ b/src/Parsers/ParserTablesInSelectQuery.cpp
@@ -225,7 +225,6 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec
             }
             else if (ParserKeyword("ON").ignore(pos, expected))
             {
-                /// OR is operator with lowest priority, so start parsing from it.
                 if (!ParserExpression().parse(pos, table_join->on_expression, expected))
                     return false;
             }
diff --git a/tests/queries/0_stateless/02676_trailing_commas.reference b/tests/queries/0_stateless/02676_trailing_commas.reference
new file mode 100644
index 00000000000..41ace3e47aa
--- /dev/null
+++ b/tests/queries/0_stateless/02676_trailing_commas.reference
@@ -0,0 +1,4 @@
+1
+1
+1
+1	2
diff --git a/tests/queries/0_stateless/02676_trailing_commas.sql b/tests/queries/0_stateless/02676_trailing_commas.sql
new file mode 100644
index 00000000000..5e2dafccb46
--- /dev/null
+++ b/tests/queries/0_stateless/02676_trailing_commas.sql
@@ -0,0 +1,4 @@
+SELECT 1,;
+SELECT 1, FROM numbers(1);
+WITH 1 as a SELECT a, FROM numbers(1);
+WITH 1 as from SELECT from, from + from, FROM numbers(1);

From 3b9e8a86305b1aa0fee4a558c368a44a74cf82b5 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 5 Apr 2023 19:40:43 +0000
Subject: [PATCH 150/277] Override user and password environment variables with
 client parameters

---
 programs/client/Client.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 660b8d7c00a..df0abceb8c6 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -277,11 +277,11 @@ void Client::initialize(Poco::Util::Application & self)
       */
 
     const char * env_user = getenv("CLICKHOUSE_USER"); // NOLINT(concurrency-mt-unsafe)
-    if (env_user)
+    if (env_user && !config().has("user"))
         config().setString("user", env_user);
 
     const char * env_password = getenv("CLICKHOUSE_PASSWORD"); // NOLINT(concurrency-mt-unsafe)
-    if (env_password)
+    if (env_password && !config().has("password"))
         config().setString("password", env_password);
 
     parseConnectionsCredentials();

From 809a7fbb7ada069374edc2774abc4f34c85dfac1 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 5 Apr 2023 12:25:51 +0000
Subject: [PATCH 151/277] Incorporate more review feedback

---
 src/Common/Concepts.h                         |   4 +
 src/Common/typeid_cast.h                      |   3 -
 src/Core/Settings.h                           |   1 +
 src/Core/SettingsChangesHistory.h             |   1 +
 src/Functions/FunctionsConversion.h           |   1 +
 src/Functions/formatDateTime.cpp              | 189 +++++++++++-------
 src/Functions/parseDateTime.cpp               |  18 +-
 src/Functions/widthBucket.cpp                 |   1 +
 .../00718_format_datetime.reference           |   4 +-
 .../0_stateless/00718_format_datetime.sql     |   5 +-
 .../02668_parse_datetime.reference            |  12 ++
 .../0_stateless/02668_parse_datetime.sql      |   9 +
 12 files changed, 160 insertions(+), 88 deletions(-)

diff --git a/src/Common/Concepts.h b/src/Common/Concepts.h
index b1bf591024d..927f42aa4be 100644
--- a/src/Common/Concepts.h
+++ b/src/Common/Concepts.h
@@ -5,6 +5,10 @@
 namespace DB
 {
 
+template<typename T, typename ... U>
+concept is_any_of = (std::same_as<T, U> || ...);
+
+
 template <typename... T>
 concept OptionalArgument = requires(T &&...)
 {
diff --git a/src/Common/typeid_cast.h b/src/Common/typeid_cast.h
index 1568d380938..baee3aaf632 100644
--- a/src/Common/typeid_cast.h
+++ b/src/Common/typeid_cast.h
@@ -18,9 +18,6 @@ namespace DB
     }
 }
 
-template<typename T, typename ... U>
-concept is_any_of = (std::same_as<T, U> || ...);
-
 
 /** Checks type by comparing typeid.
   * The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index fe8e9d4dc7d..05d4098e4b3 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -462,6 +462,7 @@ class IColumn;
     M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
     \
     M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
+    M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in function 'formatDateTime' produces the month name instead of minutes.", 0) \
     \
     M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \
     M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index caf18cf8fb8..4f89397ed9d 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -101,6 +101,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
                {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
                {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
     {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
+    {"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
     {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
     {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
               {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index f832bf404a8..28002d34acc 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -41,6 +41,7 @@
 #include <Columns/ColumnsCommon.h>
 #include <Columns/ColumnStringHelpers.h>
 #include <Common/assert_cast.h>
+#include <Common/Concepts.h>
 #include <Common/quoteString.h>
 #include <Common/Exception.h>
 #include <Core/AccurateComparison.h>
diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index 162c34803de..7b515c70959 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -17,6 +17,7 @@
 
 #include <IO/WriteHelpers.h>
 
+#include <Common/Concepts.h>
 #include <Common/DateLUTImpl.h>
 #include <base/find_symbols.h>
 #include <Core/DecimalFunctions.h>
@@ -672,9 +673,10 @@ private:
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "'%' must not be the last character in the format string, use '%%' instead");
     }
 
-    static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format)
+    static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name)
     {
-        static constexpr std::array variable_width_formatter = {'M', 'W'};
+        static constexpr std::array variable_width_formatter = {'W'};
+        static constexpr std::array variable_width_formatter_M_is_month_name = {'M', 'W'};
 
         for (size_t i = 0; i < format.size(); ++i)
         {
@@ -683,8 +685,20 @@ private:
                 case '%':
                     if (i + 1 >= format.size())
                         throwLastCharacterIsPercentException();
-                    if (std::any_of(variable_width_formatter.begin(), variable_width_formatter.end(), [&](char c){ return c == format[i + 1]; }))
-                        return false;
+                    if (mysql_M_is_month_name)
+                    {
+                        if (std::any_of(
+                                variable_width_formatter.begin(), variable_width_formatter_M_is_month_name.end(),
+                                [&](char c){ return c == format[i + 1]; }))
+                            return false;
+                    }
+                    else
+                    {
+                        if (std::any_of(
+                                variable_width_formatter.begin(), variable_width_formatter.end(),
+                                [&](char c){ return c == format[i + 1]; }))
+                            return false;
+                    }
                     i += 1;
                     continue;
                 default:
@@ -695,10 +709,17 @@ private:
         return true;
     }
 
+    const bool mysql_M_is_month_name;
+
 public:
     static constexpr auto name = Name::name;
 
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionFormatDateTimeImpl>(); }
+    static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionFormatDateTimeImpl>(context); }
+
+    explicit FunctionFormatDateTimeImpl(ContextPtr context)
+        : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
+    {
+    }
 
     String getName() const override
     {
@@ -849,12 +870,12 @@ public:
         ///   column rows are NOT populated with the template and left uninitialized. We run the normal instructions for formatters AND
         ///   instructions that copy literal characters before/between/after formatters. As a result, each byte of each result row is
         ///   written which is obviously slow.
-        bool only_fixed_width_formatters = format_syntax == FormatSyntax::MySQL ? containsOnlyFixedWidthMySQLFormatters(format) : false;
+        bool mysql_with_only_fixed_length_formatters = (format_syntax == FormatSyntax::MySQL) ? containsOnlyFixedWidthMySQLFormatters(format, mysql_M_is_month_name) : false;
 
         using T = typename InstructionValueTypeMap<DataType>::InstructionValueType;
         std::vector<Instruction<T>> instructions;
         String out_template;
-        size_t out_template_size = parseFormat(format, instructions, scale, only_fixed_width_formatters, out_template);
+        size_t out_template_size = parseFormat(format, instructions, scale, mysql_with_only_fixed_length_formatters, out_template);
 
         const DateLUTImpl * time_zone_tmp = nullptr;
         if (castType(arguments[0].type.get(), [&]([[maybe_unused]] const auto & type) { return true; }))
@@ -873,26 +894,29 @@ public:
         res_data.resize(vec.size() * (out_template_size + 1));
         res_offsets.resize(vec.size());
 
-        if (format_syntax == FormatSyntax::MySQL && only_fixed_width_formatters)
+        if constexpr (format_syntax == FormatSyntax::MySQL)
         {
-            /// Fill result with template.
+            if (mysql_with_only_fixed_length_formatters)
             {
-                const UInt8 * const begin = res_data.data();
-                const UInt8 * const end = res_data.data() + res_data.size();
-                UInt8 * pos = res_data.data();
-
-                if (pos < end)
+                /// Fill result with template.
                 {
-                    memcpy(pos, out_template.data(), out_template_size + 1); /// With zero terminator. mystring[mystring.size()] = '\0' is guaranteed since C++11.
-                    pos += out_template_size + 1;
-                }
+                    const UInt8 * const begin = res_data.data();
+                    const UInt8 * const end = res_data.data() + res_data.size();
+                    UInt8 * pos = res_data.data();
 
-                /// Copy exponentially growing ranges.
-                while (pos < end)
-                {
-                    size_t bytes_to_copy = std::min(pos - begin, end - pos);
-                    memcpy(pos, begin, bytes_to_copy);
-                    pos += bytes_to_copy;
+                    if (pos < end)
+                    {
+                        memcpy(pos, out_template.data(), out_template_size + 1); /// With zero terminator. mystring[mystring.size()] = '\0' is guaranteed since C++11.
+                        pos += out_template_size + 1;
+                    }
+
+                    /// Copy exponentially growing ranges.
+                    while (pos < end)
+                    {
+                        size_t bytes_to_copy = std::min(pos - begin, end - pos);
+                        memcpy(pos, begin, bytes_to_copy);
+                        pos += bytes_to_copy;
+                    }
                 }
             }
         }
@@ -922,18 +946,18 @@ public:
     }
 
     template <typename T>
-    size_t parseFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, bool only_fixed_width_formatters, String & out_template) const
+    size_t parseFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, [[maybe_unused]] bool mysql_with_only_fixed_length_formatters, String & out_template) const
     {
         static_assert(format_syntax == FormatSyntax::MySQL || format_syntax == FormatSyntax::Joda);
 
         if constexpr (format_syntax == FormatSyntax::MySQL)
-            return parseMySQLFormat(format, instructions, scale, only_fixed_width_formatters, out_template);
+            return parseMySQLFormat(format, instructions, scale, mysql_with_only_fixed_length_formatters, out_template);
         else
-            return parseJodaFormat(format, instructions, scale, only_fixed_width_formatters, out_template);
+            return parseJodaFormat(format, instructions, scale, /*dummy*/ false, out_template);
     }
 
     template <typename T>
-    size_t parseMySQLFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, bool only_fixed_width_formatters, String & out_template) const
+    size_t parseMySQLFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, bool mysql_with_only_fixed_length_formatters, String & out_template) const
     {
         auto add_extra_shift = [&](size_t amount)
         {
@@ -941,7 +965,7 @@ public:
             {
                 Instruction<T> instruction;
                 instruction.setMysqlFunc(&Instruction<T>::mysqlNoop);
-                instructions.push_back(instruction);
+                instructions.push_back(std::move(instruction));
             }
             instructions.back().extra_shift += amount;
         };
@@ -951,12 +975,12 @@ public:
             Instruction<T> instruction;
             instruction.setMysqlFunc(&Instruction<T>::mysqlLiteral);
             instruction.setLiteral(literal);
-            instructions.push_back(instruction);
+            instructions.push_back(std::move(instruction));
         };
 
         auto add_extra_shift_or_literal_instruction = [&](size_t amount, std::string_view literal)
         {
-            if (only_fixed_width_formatters)
+            if (mysql_with_only_fixed_length_formatters)
                 add_extra_shift(amount);
             else
                 add_literal_instruction(literal);
@@ -966,11 +990,11 @@ public:
         {
             /// DateTime/DateTime64 --> insert instruction
             /// Other types cannot provide the requested data --> write out template
-            if constexpr (std::is_same_v<T, UInt32> || std::is_same_v<T, Int64>)
+            if constexpr (is_any_of<T, UInt32, Int64>)
             {
                 Instruction<T> instruction;
                 instruction.setMysqlFunc(std::move(func));
-                instructions.push_back(instruction);
+                instructions.push_back(std::move(instruction));
             }
             else
                 add_extra_shift_or_literal_instruction(amount, literal);
@@ -1003,7 +1027,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfWeekTextShort);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "Mon";
                         break;
                     }
@@ -1013,7 +1037,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlMonthOfYearTextShort);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "Jan";
                         break;
                     }
@@ -1023,7 +1047,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "00";
                         break;
                     }
@@ -1033,7 +1057,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlCentury);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "00";
                         break;
                     }
@@ -1043,7 +1067,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfMonth);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "00";
                         break;
                     }
@@ -1053,7 +1077,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlAmericanDate);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "00/00/00";
                         break;
                     }
@@ -1063,18 +1087,29 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfMonthSpacePadded);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(std::move(instruction)));
                         out_template += " 0";
                         break;
                     }
 
-                    // Full month [January-December]
+                    // Depending on a setting
+                    // - Full month [January-December] OR
+                    // - Minute of hour range [0, 59]
                     case 'M':
                     {
                         Instruction<T> instruction;
-                        instruction.setMysqlFunc(&Instruction<T>::mysqlMonthOfYearTextLong);
-                        instructions.push_back(instruction);
-                        out_template += "September"; /// longest possible month name
+                        if (mysql_M_is_month_name)
+                        {
+                            instruction.setMysqlFunc(&Instruction<T>::mysqlMonthOfYearTextLong);
+                            instructions.push_back(std::move(instruction));
+                            out_template += "September"; /// longest possible month name
+                        }
+                        else
+                        {
+                            static constexpr std::string_view val = "00";
+                            add_time_instruction(&Instruction<T>::mysqlMinute, 2, val);
+                            out_template += val;
+                        }
                         break;
                     }
 
@@ -1084,7 +1119,7 @@ public:
                         /// If the time data type has no fractional part, then we print '0' as the fractional part.
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlFractionalSecond);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += String(std::max<UInt32>(1, scale), '0');
                         break;
                     }
@@ -1094,7 +1129,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlISO8601Date);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "0000-00-00";
                         break;
                     }
@@ -1104,7 +1139,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlISO8601Year2);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "00";
                         break;
                     }
@@ -1114,7 +1149,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlISO8601Year4);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "0000";
                         break;
                     }
@@ -1124,7 +1159,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfYear);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "000";
                         break;
                     }
@@ -1134,7 +1169,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlMonth);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "00";
                         break;
                     }
@@ -1144,7 +1179,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfWeek);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "0";
                         break;
                     }
@@ -1154,7 +1189,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlISO8601Week);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "00";
                         break;
                     }
@@ -1164,7 +1199,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfWeek0To6);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "0";
                         break;
                     }
@@ -1174,7 +1209,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlDayOfWeekTextLong);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "Wednesday"; /// longest possible weekday name
                         break;
                     }
@@ -1184,7 +1219,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlYear2);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "00";
                         break;
                     }
@@ -1194,7 +1229,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlYear4);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "0000";
                         break;
                     }
@@ -1204,7 +1239,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlQuarter);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "0";
                         break;
                     }
@@ -1214,7 +1249,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setMysqlFunc(&Instruction<T>::mysqlTimezoneOffset);
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         out_template += "+0000";
                         break;
                     }
@@ -1391,17 +1426,17 @@ public:
         /// If the argument was DateTime, add instruction for printing. If it was date, just append default literal
         auto add_instruction = [&]([[maybe_unused]] typename Instruction<T>::FuncJoda && func, [[maybe_unused]] const String & default_literal)
         {
-            if constexpr (std::is_same_v<T, UInt32> || std::is_same_v<T, Int64>)
+            if constexpr (is_any_of<T, UInt32, Int64>)
             {
                 Instruction<T> instruction;
                 instruction.setJodaFunc(std::move(func));
-                instructions.push_back(instruction);
+                instructions.push_back(std::move(instruction));
             }
             else
             {
                 Instruction<T> instruction;
                 instruction.setJodaFunc(std::bind_front(&Instruction<T>::template jodaLiteral<String>, default_literal));
-                instructions.push_back(instruction);
+                instructions.push_back(std::move(instruction));
             }
         };
 
@@ -1420,7 +1455,7 @@ public:
                     Instruction<T> instruction;
                     std::string_view literal(cur_token, 1);
                     instruction.setJodaFunc(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
-                    instructions.push_back(instruction);
+                    instructions.push_back(std::move(instruction));
                     ++reserve_size;
                     pos += 2;
                 }
@@ -1437,7 +1472,7 @@ public:
                             Instruction<T> instruction;
                             std::string_view literal(cur_token + i, 1);
                             instruction.setJodaFunc(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
-                            instructions.push_back(instruction);
+                            instructions.push_back(std::move(instruction));
                             ++reserve_size;
                             if (*(cur_token + i) == '\'')
                                 i += 1;
@@ -1461,7 +1496,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaEra, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         reserve_size += repetitions <= 3 ? 2 : 13;
                         break;
                     }
@@ -1469,7 +1504,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaCenturyOfEra, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// Year range [1900, 2299]
                         reserve_size += std::max(repetitions, 2);
                         break;
@@ -1478,7 +1513,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaYearOfEra, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// Year range [1900, 2299]
                         reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4);
                         break;
@@ -1487,7 +1522,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaWeekYear, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// weekyear range [1900, 2299]
                         reserve_size += std::max(repetitions, 4);
                         break;
@@ -1496,7 +1531,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaWeekOfWeekYear, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// Week of weekyear range [1, 52]
                         reserve_size += std::max(repetitions, 2);
                         break;
@@ -1505,7 +1540,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaDayOfWeek1Based, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// Day of week range [1, 7]
                         reserve_size += std::max(repetitions, 1);
                         break;
@@ -1514,7 +1549,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaDayOfWeekText, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// Maximum length of short name is 3, maximum length of full name is 9.
                         reserve_size += repetitions <= 3 ? 3 : 9;
                         break;
@@ -1523,7 +1558,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaYear, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// Year range [1900, 2299]
                         reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4);
                         break;
@@ -1532,7 +1567,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaDayOfYear, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// Day of year range [1, 366]
                         reserve_size += std::max(repetitions, 3);
                         break;
@@ -1543,7 +1578,7 @@ public:
                         {
                             Instruction<T> instruction;
                             instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaMonthOfYear, repetitions));
-                            instructions.push_back(instruction);
+                            instructions.push_back(std::move(instruction));
                             /// Month of year range [1, 12]
                             reserve_size += 2;
                         }
@@ -1551,7 +1586,7 @@ public:
                         {
                             Instruction<T> instruction;
                             instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaMonthOfYearText, repetitions));
-                            instructions.push_back(instruction);
+                            instructions.push_back(std::move(instruction));
                             /// Maximum length of short name is 3, maximum length of full name is 9.
                             reserve_size += repetitions <= 3 ? 3 : 9;
                         }
@@ -1561,7 +1596,7 @@ public:
                     {
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaDayOfMonth, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// Day of month range [1, 3]
                         reserve_size += std::max(repetitions, 3);
                         break;
@@ -1615,7 +1650,7 @@ public:
                         /// Default fraction of second is 0
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaFractionOfSecond, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// 'S' repetitions range [0, 9]
                         reserve_size += repetitions <= 9 ? repetitions : 9;
                         break;
@@ -1627,7 +1662,7 @@ public:
 
                         Instruction<T> instruction;
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::jodaTimezone, repetitions));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         /// Longest length of full name of time zone is 32.
                         reserve_size += 32;
                         break;
@@ -1642,7 +1677,7 @@ public:
                         Instruction<T> instruction;
                         std::string_view literal(cur_token, pos - cur_token);
                         instruction.setJodaFunc(std::bind_front(&Instruction<T>::template jodaLiteral<decltype(literal)>, literal));
-                        instructions.push_back(instruction);
+                        instructions.push_back(std::move(instruction));
                         reserve_size += pos - cur_token;
                         break;
                     }
diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index d96e5b27616..941b8ca3aa3 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -466,8 +466,15 @@ namespace
     class FunctionParseDateTimeImpl : public IFunction
     {
     public:
+        const bool mysql_M_is_month_name;
+
         static constexpr auto name = Name::name;
-        static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionParseDateTimeImpl>(); }
+        static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionParseDateTimeImpl>(context); }
+
+        explicit FunctionParseDateTimeImpl(ContextPtr context)
+            : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
+        {
+        }
 
         String getName() const override { return name; }
 
@@ -1564,9 +1571,14 @@ namespace
                             instructions.emplace_back(ACTION_ARGS(Instruction::mysqlTimezoneOffset));
                             break;
 
-                        // Minute (00-59)
+                        // Depending on a setting
+                        // - Full month [January...December]
+                        // - Minute (00-59) OR
                         case 'M':
-                            instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthOfYearTextLong));
+                            if (mysql_M_is_month_name)
+                                instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthOfYearTextLong));
+                            else
+                                instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMinute));
                             break;
 
                         // AM or PM
diff --git a/src/Functions/widthBucket.cpp b/src/Functions/widthBucket.cpp
index a32fa159c2c..e14ef90223d 100644
--- a/src/Functions/widthBucket.cpp
+++ b/src/Functions/widthBucket.cpp
@@ -11,6 +11,7 @@
 #include <Functions/IFunction.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/castColumn.h>
+#include <Common/Concepts.h>
 #include <Common/Exception.h>
 #include <Common/NaNUtils.h>
 #include <Common/register_objects.h>
diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference
index 75eda219dba..3f951b59c86 100644
--- a/tests/queries/0_stateless/00718_format_datetime.reference
+++ b/tests/queries/0_stateless/00718_format_datetime.reference
@@ -18,6 +18,8 @@ Jan	Jan
 00	00
 01	01
 January	January
+33	00
+January	January
 \n	\n
 AM	AM
 AM
@@ -50,5 +52,3 @@ no formatting pattern	no formatting pattern
 2022-12-08 18:11:29.0
 2022-12-08 00:00:00.0
 2022-12-08 00:00:00.0
-16\t\n%MayMonday16
-00\t\n%MayMonday00
diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql
index 7ee9be91b3a..2d19c53d5ca 100644
--- a/tests/queries/0_stateless/00718_format_datetime.sql
+++ b/tests/queries/0_stateless/00718_format_datetime.sql
@@ -29,6 +29,8 @@ SELECT formatDateTime(toDateTime('2018-01-01 00:33:44'), '%j'), formatDateTime(t
 SELECT formatDateTime(toDateTime('2000-12-31 00:33:44'), '%j'), formatDateTime(toDate32('2000-12-31'), '%j');
 SELECT formatDateTime(toDateTime('2000-12-31 00:33:44'), '%k'), formatDateTime(toDate32('2000-12-31'), '%k');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%m'), formatDateTime(toDate32('2018-01-02'), '%m');
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%M'), formatDateTime(toDate32('2018-01-02'), '%M') SETTINGS formatdatetime_parsedatetime_m_is_month_name = 1;
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%M'), formatDateTime(toDate32('2018-01-02'), '%M') SETTINGS formatdatetime_parsedatetime_m_is_month_name = 0;
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%M'), formatDateTime(toDate32('2018-01-02'), '%M');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%n'), formatDateTime(toDate32('2018-01-02'), '%n');
 SELECT formatDateTime(toDateTime('2018-01-02 00:33:44'), '%p'), formatDateTime(toDateTime('2018-01-02'), '%p');
@@ -74,6 +76,3 @@ select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 0, 'UTC'), '%F %T
 select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
 select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f');
-
-select formatDateTime(toDateTime('2018-05-07 15:16:17', 'UTC'), '%i%t%n%%%M%W%i');
-select formatDateTime(toDate32('2018-05-07', 'UTC'), '%i%t%n%%%M%W%i');
diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference
index c85c7c0a22c..40b56faf4e9 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.reference
+++ b/tests/queries/0_stateless/02668_parse_datetime.reference
@@ -26,6 +26,7 @@ select parseDateTime('jun', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC');
 select parseDateTime('JUN', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC');
 1
 select parseDateTime('abc', '%b'); -- { serverError CANNOT_PARSE_DATETIME }
+set formatdatetime_parsedatetime_m_is_month_name = 1;
 select parseDateTime('may', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC');
 1
 select parseDateTime('MAY', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC');
@@ -33,6 +34,17 @@ select parseDateTime('MAY', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC');
 select parseDateTime('september', '%M', 'UTC') = toDateTime('2000-09-01', 'UTC');
 1
 select parseDateTime('summer', '%M'); -- { serverError CANNOT_PARSE_DATETIME }
+set formatdatetime_parsedatetime_m_is_month_name = 0;
+select parseDateTime('08', '%M', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC');
+1
+select parseDateTime('59', '%M', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC');
+1
+select parseDateTime('00/', '%M/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC');
+1
+select parseDateTime('60', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('-1', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('123456789', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+set formatdatetime_parsedatetime_m_is_month_name = 1;
 -- day of month
 select parseDateTime('07', '%d', 'UTC') = toDateTime('2000-01-07', 'UTC');
 1
diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql
index 294f384b8a9..211f7054135 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.sql
+++ b/tests/queries/0_stateless/02668_parse_datetime.sql
@@ -18,10 +18,19 @@ select parseDateTime('12345', '%c'); -- { serverError CANNOT_PARSE_DATETIME }
 select parseDateTime('jun', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC');
 select parseDateTime('JUN', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC');
 select parseDateTime('abc', '%b'); -- { serverError CANNOT_PARSE_DATETIME }
+set formatdatetime_parsedatetime_m_is_month_name = 1;
 select parseDateTime('may', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC');
 select parseDateTime('MAY', '%M', 'UTC') = toDateTime('2000-05-01', 'UTC');
 select parseDateTime('september', '%M', 'UTC') = toDateTime('2000-09-01', 'UTC');
 select parseDateTime('summer', '%M'); -- { serverError CANNOT_PARSE_DATETIME }
+set formatdatetime_parsedatetime_m_is_month_name = 0;
+select parseDateTime('08', '%M', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC');
+select parseDateTime('59', '%M', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC');
+select parseDateTime('00/', '%M/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC');
+select parseDateTime('60', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('-1', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+select parseDateTime('123456789', '%M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
+set formatdatetime_parsedatetime_m_is_month_name = 1;
 
 -- day of month
 select parseDateTime('07', '%d', 'UTC') = toDateTime('2000-01-07', 'UTC');

From 3e84be5f3aeb66fbf9ba3db0d4893ca3b7b1ca79 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 5 Apr 2023 20:09:27 +0000
Subject: [PATCH 152/277] Fix stuff

---
 src/Functions/formatDateTime.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index 7b515c70959..55ea9aedc90 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -676,7 +676,7 @@ private:
     static bool containsOnlyFixedWidthMySQLFormatters(std::string_view format, bool mysql_M_is_month_name)
     {
         static constexpr std::array variable_width_formatter = {'W'};
-        static constexpr std::array variable_width_formatter_M_is_month_name = {'M', 'W'};
+        static constexpr std::array variable_width_formatter_M_is_month_name = {'W', 'M'};
 
         for (size_t i = 0; i < format.size(); ++i)
         {
@@ -688,7 +688,7 @@ private:
                     if (mysql_M_is_month_name)
                     {
                         if (std::any_of(
-                                variable_width_formatter.begin(), variable_width_formatter_M_is_month_name.end(),
+                                variable_width_formatter_M_is_month_name.begin(), variable_width_formatter_M_is_month_name.end(),
                                 [&](char c){ return c == format[i + 1]; }))
                             return false;
                     }

From 65609340cc6c67dbf1df283f5b0a0c825f8037ea Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 5 Apr 2023 20:15:58 +0000
Subject: [PATCH 153/277] Yet another fix

---
 src/Functions/formatDateTime.cpp                          | 4 ++--
 tests/queries/0_stateless/00718_format_datetime.reference | 1 -
 tests/queries/0_stateless/00718_format_datetime.sql       | 1 -
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index 55ea9aedc90..1ed5948a367 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -946,14 +946,14 @@ public:
     }
 
     template <typename T>
-    size_t parseFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, [[maybe_unused]] bool mysql_with_only_fixed_length_formatters, String & out_template) const
+    size_t parseFormat(const String & format, std::vector<Instruction<T>> & instructions, UInt32 scale, bool mysql_with_only_fixed_length_formatters, String & out_template) const
     {
         static_assert(format_syntax == FormatSyntax::MySQL || format_syntax == FormatSyntax::Joda);
 
         if constexpr (format_syntax == FormatSyntax::MySQL)
             return parseMySQLFormat(format, instructions, scale, mysql_with_only_fixed_length_formatters, out_template);
         else
-            return parseJodaFormat(format, instructions, scale, /*dummy*/ false, out_template);
+            return parseJodaFormat(format, instructions, scale, mysql_with_only_fixed_length_formatters, out_template);
     }
 
     template <typename T>
diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference
index 3f951b59c86..eb2c23576eb 100644
--- a/tests/queries/0_stateless/00718_format_datetime.reference
+++ b/tests/queries/0_stateless/00718_format_datetime.reference
@@ -19,7 +19,6 @@ Jan	Jan
 01	01
 January	January
 33	00
-January	January
 \n	\n
 AM	AM
 AM
diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql
index 2d19c53d5ca..a77578fc7c3 100644
--- a/tests/queries/0_stateless/00718_format_datetime.sql
+++ b/tests/queries/0_stateless/00718_format_datetime.sql
@@ -31,7 +31,6 @@ SELECT formatDateTime(toDateTime('2000-12-31 00:33:44'), '%k'), formatDateTime(t
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%m'), formatDateTime(toDate32('2018-01-02'), '%m');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%M'), formatDateTime(toDate32('2018-01-02'), '%M') SETTINGS formatdatetime_parsedatetime_m_is_month_name = 1;
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%M'), formatDateTime(toDate32('2018-01-02'), '%M') SETTINGS formatdatetime_parsedatetime_m_is_month_name = 0;
-SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%M'), formatDateTime(toDate32('2018-01-02'), '%M');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%n'), formatDateTime(toDate32('2018-01-02'), '%n');
 SELECT formatDateTime(toDateTime('2018-01-02 00:33:44'), '%p'), formatDateTime(toDateTime('2018-01-02'), '%p');
 SELECT formatDateTime(toDateTime('2018-01-02 11:33:44'), '%p');

From 4fb7e0ec3c13a05129c60565daae44cd7b4d197e Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Wed, 5 Apr 2023 21:23:57 +0000
Subject: [PATCH 154/277] add retries to loading of data parts

---
 src/Storages/MergeTree/MergeTreeData.cpp      | 51 +++++++++++++++++--
 src/Storages/MergeTree/MergeTreeData.h        | 10 ++++
 .../configs/config.d/storage_conf.xml         | 19 +++++++
 .../test_merge_tree_s3_failover/test.py       | 26 ++++++++++
 4 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 66c52e6e24c..678dd008ba7 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -76,6 +76,7 @@
 
 #include <algorithm>
 #include <atomic>
+#include <chrono>
 #include <iomanip>
 #include <limits>
 #include <optional>
@@ -1154,6 +1155,10 @@ static bool isRetryableException(const Exception & e)
     return false;
 }
 
+static constexpr size_t loading_parts_initial_backoff_ms = 100;
+static constexpr size_t loading_parts_max_backoff_ms = 5000;
+static constexpr size_t loading_parts_max_tries = 3;
+
 MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(
     const MergeTreePartInfo & part_info,
     const String & part_name,
@@ -1322,6 +1327,37 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(
     return res;
 }
 
+MergeTreeData::LoadPartResult MergeTreeData::loadDataPartWithRetries(
+    const MergeTreePartInfo & part_info,
+    const String & part_name,
+    const DiskPtr & part_disk_ptr,
+    MergeTreeDataPartState to_state,
+    std::mutex & part_loading_mutex,
+    size_t initial_backoff_ms,
+    size_t max_backoff_ms,
+    size_t max_tries)
+{
+    for (size_t try_no = 0; try_no < max_tries; ++try_no)
+    {
+        try
+        {
+            return loadDataPart(part_info, part_name, part_disk_ptr, to_state, part_loading_mutex);
+        }
+        catch (const Exception & e)
+        {
+            if (!isRetryableException(e) || try_no + 1 == max_tries)
+                throw;
+
+            LOG_DEBUG(log, "Failed to load data part {} at try {} with retryable error: {}. Will retry in {} ms",
+                part_name, try_no, e.message(), initial_backoff_ms);
+
+            std::this_thread::sleep_for(std::chrono::milliseconds(initial_backoff_ms));
+            initial_backoff_ms = std::min(initial_backoff_ms * 2, max_backoff_ms);
+        }
+    }
+    UNREACHABLE();
+}
+
 std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
     ThreadPool & pool,
     size_t num_parts,
@@ -1436,10 +1472,14 @@ std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
 
                     /// Pass a separate mutex to guard the set of parts, because this lambda
                     /// is called concurrently but with already locked @data_parts_mutex.
-                    auto res = loadDataPart(thread_part->info, thread_part->name, thread_part->disk, DataPartState::Active, part_loading_mutex);
-                    thread_part->is_loaded = true;
+                    auto res = loadDataPartWithRetries(
+                        thread_part->info, thread_part->name, thread_part->disk,
+                        DataPartState::Active, part_loading_mutex, loading_parts_initial_backoff_ms,
+                        loading_parts_max_backoff_ms, loading_parts_max_tries);
 
+                    thread_part->is_loaded = true;
                     bool is_active_part = res.part->getState() == DataPartState::Active;
+
                     /// If part is broken or duplicate or should be removed according to transaction
                     /// and it has any covered parts then try to load them to replace this part.
                     if (!is_active_part && !thread_part->children.empty())
@@ -1834,9 +1874,12 @@ try
             outdated_unloaded_data_parts.pop_back();
         }
 
-        auto res = loadDataPart(part->info, part->name, part->disk, MergeTreeDataPartState::Outdated, data_parts_mutex);
-        ++num_loaded_parts;
+        auto res = loadDataPartWithRetries(
+            part->info, part->name, part->disk,
+            DataPartState::Outdated, data_parts_mutex, loading_parts_initial_backoff_ms,
+            loading_parts_max_backoff_ms, loading_parts_max_tries);
 
+        ++num_loaded_parts;
         if (res.is_broken)
             res.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
         else if (res.part->is_duplicate)
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 0be932ccdaf..f08c7dfc55a 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -1500,6 +1500,16 @@ private:
         MergeTreeDataPartState to_state,
         std::mutex & part_loading_mutex);
 
+    LoadPartResult loadDataPartWithRetries(
+        const MergeTreePartInfo & part_info,
+        const String & part_name,
+        const DiskPtr & part_disk_ptr,
+        MergeTreeDataPartState to_state,
+        std::mutex & part_loading_mutex,
+        size_t backoff_ms,
+        size_t max_backoff_ms,
+        size_t max_tries);
+
     std::vector<LoadPartResult> loadDataPartsFromDisk(
         ThreadPool & pool,
         size_t num_parts,
diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
index 32d78468a71..976933b2d21 100644
--- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
@@ -21,6 +21,18 @@
                 <!-- ClickHouse starts earlier than custom S3 endpoint. Skip access check to avoid fail on start-up -->
                 <skip_access_check>true</skip_access_check>
             </s3_retryable>
+            <s3_no_retries>
+                <type>s3</type>
+                <!-- Use custom S3 endpoint -->
+                <endpoint>http://resolver:8080/root/data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+                <!-- ClickHouse starts earlier than custom S3 endpoint. Skip access check to avoid fail on start-up -->
+                <skip_access_check>true</skip_access_check>
+                <!-- Avoid extra retries to speed up tests -->
+                <s3_retry_attempts>1</s3_retry_attempts>
+                <s3_max_single_read_retries>1</s3_max_single_read_retries>
+            </s3_no_retries>
             <default/>
         </disks>
         <policies>
@@ -48,6 +60,13 @@
                     </main>
                 </volumes>
             </s3_retryable>
+            <s3_no_retries>
+                <volumes>
+                    <main>
+                        <disk>s3_no_retries</disk>
+                    </main>
+                </volumes>
+            </s3_no_retries>
         </policies>
     </storage_configuration>
 </clickhouse>
diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py
index c61cacc9d8c..cf71b423713 100644
--- a/tests/integration/test_merge_tree_s3_failover/test.py
+++ b/tests/integration/test_merge_tree_s3_failover/test.py
@@ -270,3 +270,29 @@ def test_throttle_retry(cluster):
         )
         == "42\n"
     )
+
+
+# Check that loading of parts is retried.
+def test_retry_loading_parts(cluster):
+    node = cluster.instances["node"]
+
+    node.query(
+        """
+        CREATE TABLE s3_retry_loading_parts (
+            id Int64
+        ) ENGINE=MergeTree()
+        ORDER BY id
+        SETTINGS storage_policy='s3_no_retries'
+        """
+    )
+
+    node.query("INSERT INTO s3_retry_loading_parts VALUES (42)")
+    node.query("DETACH TABLE s3_retry_loading_parts")
+
+    fail_request(cluster, 5)
+    node.query("ATTACH TABLE s3_retry_loading_parts")
+
+    assert node.contains_in_log(
+        "Failed to load data part all_1_1_0 at try 0 with retryable error"
+    )
+    assert node.query("SELECT * FROM s3_retry_loading_parts") == "42\n"

From 224f4f92e0e39da5f181111aa329048662f319f7 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 5 Apr 2023 21:26:53 +0000
Subject: [PATCH 155/277] fix test

---
 .../02458_use_structure_from_insertion_table.reference |  3 ---
 .../02458_use_structure_from_insertion_table.sql       | 10 +++++-----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference
index 7a004c58187..53abb49c4e1 100644
--- a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference
+++ b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.reference
@@ -1,9 +1,6 @@
 \N	0
 \N	1
 1	2
-1	2
-\N	42
-\N	42
 \N	42
 \N	42
 \N	42
diff --git a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
index a199a69cde8..97d493fa031 100644
--- a/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
+++ b/tests/queries/0_stateless/02458_use_structure_from_insertion_table.sql
@@ -13,13 +13,13 @@ insert into test select x + 1, y from file(02458_data.jsonl); -- {serverError ON
 insert into test select x, z from file(02458_data.jsonl);
 
 insert into test select * from file(02458_data.jsoncompacteachrow);
-insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED}
-insert into test select x, y from file(02458_data.jsoncompacteachrow);
+insert into test select x, 1 from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
+insert into test select x, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
 insert into test select x + 1, y from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
-insert into test select x, z from file(02458_data.jsoncompacteachrow);
+insert into test select x, z from file(02458_data.jsoncompacteachrow); -- {serverError ONLY_NULLS_WHILE_READING_SCHEMA}
 
 insert into test select * from input() format CSV 1,2
-insert into test select x, y from input() format CSV 1,2
+insert into test select x, y from input() format CSV 1,2 -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
 insert into test select x, y from input() format JSONEachRow {"x" : null, "y" : 42}
 
 select * from test order by y;
@@ -31,7 +31,7 @@ insert into test select x from file(02458_data.jsonl);
 insert into test select y from file(02458_data.jsonl);
 insert into test select y as x from file(02458_data.jsonl);
 
-insert into test select c1 from input() format CSV 1,2; -- {clientError INCORRECT_DATA}
+insert into test select c1 from input() format CSV 1,2; -- {serverError CANNOT_EXTRACT_TABLE_STRUCTURE}
 insert into test select x from input() format JSONEachRow {"x" : null, "y" : 42}
 
 select * from test order by x;

From 785ea8213b95d5874512db302515baf6532b25ab Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 6 Apr 2023 02:29:37 +0200
Subject: [PATCH 156/277] Better exception messages from Keeper client

---
 src/Common/ZooKeeper/ZooKeeperImpl.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
index 8183569a718..79a975e683f 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@@ -669,8 +669,8 @@ void ZooKeeper::receiveThread()
                     earliest_operation = operations.begin()->second;
                     auto earliest_operation_deadline = earliest_operation->time + std::chrono::microseconds(args.operation_timeout_ms * 1000);
                     if (now > earliest_operation_deadline)
-                        throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (deadline already expired) for path: {}",
-                                        earliest_operation->request->getPath());
+                        throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (deadline of {} ms already expired) for path: {}",
+                                        args.operation_timeout_ms, earliest_operation->request->getPath());
                     max_wait_us = std::chrono::duration_cast<std::chrono::microseconds>(earliest_operation_deadline - now).count();
                 }
             }
@@ -687,12 +687,12 @@ void ZooKeeper::receiveThread()
             {
                 if (earliest_operation)
                 {
-                    throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (no response) for request {} for path: {}",
-                        toString(earliest_operation->request->getOpNum()), earliest_operation->request->getPath());
+                    throw Exception(Error::ZOPERATIONTIMEOUT, "Operation timeout (no response in {} ms) for request {} for path: {}",
+                        args.operation_timeout_ms, toString(earliest_operation->request->getOpNum()), earliest_operation->request->getPath());
                 }
                 waited_us += max_wait_us;
                 if (waited_us >= args.session_timeout_ms * 1000)
-                    throw Exception(Error::ZOPERATIONTIMEOUT, "Nothing is received in session timeout");
+                    throw Exception(Error::ZOPERATIONTIMEOUT, "Nothing is received in session timeout of {} ms", args.session_timeout_ms);
 
             }
 
@@ -1080,7 +1080,7 @@ void ZooKeeper::pushRequest(RequestInfo && info)
             if (requests_queue.isFinished())
                 throw Exception(Error::ZSESSIONEXPIRED, "Session expired");
 
-            throw Exception(Error::ZOPERATIONTIMEOUT, "Cannot push request to queue within operation timeout");
+            throw Exception(Error::ZOPERATIONTIMEOUT, "Cannot push request to queue within operation timeout of {} ms", args.operation_timeout_ms);
         }
     }
     catch (...)
@@ -1332,7 +1332,7 @@ void ZooKeeper::close()
     request_info.request = std::make_shared<ZooKeeperCloseRequest>(std::move(request));
 
     if (!requests_queue.tryPush(std::move(request_info), args.operation_timeout_ms))
-        throw Exception(Error::ZOPERATIONTIMEOUT, "Cannot push close request to queue within operation timeout");
+        throw Exception(Error::ZOPERATIONTIMEOUT, "Cannot push close request to queue within operation timeout of {} ms", args.operation_timeout_ms);
 
     ProfileEvents::increment(ProfileEvents::ZooKeeperClose);
 }

From 22af013184d70cb87d6c193476ad30aab6559f7d Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Wed, 5 Apr 2023 20:49:24 -0700
Subject: [PATCH 157/277] Add why clickhouse-local overview

---
 docs/en/operations/utilities/clickhouse-local.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index a23e0745dec..6363d9cab27 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -6,7 +6,13 @@ sidebar_label: clickhouse-local
 
 # clickhouse-local
 
-The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/index.md). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
+## When to use clickhouse-local vs. ClickHouse
+
+`clickhouse-local` is an easy-to-use version of ClickHouse that is ideal for developers who need to perform fast processing on local and remote files using SQL without having to install a full database server. With `clickhouse-local`, developers can use SQL commands (using the [ClickHouse SQL dialect](../../sql-reference/index.md)) directly from the command line, providing a simple and efficient way to access ClickHouse features without the need for a full ClickHouse installation. One of the main benefits of `clickhouse-local` is that it is already included when installing [clickhouse-client](https://clickhouse.com/docs/en/integrations/sql-clients/clickhouse-client-local). This means that developers can get started with `clickhouse-local` quickly, without the need for a complex installation process.
+
+While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`.
+
+Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3).
 
 ## Download clickhouse-local
 

From 5eb31bba8743b33050a5711f7e3b1182e1bb5948 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 6 Apr 2023 07:47:07 +0000
Subject: [PATCH 158/277] MySQL compat: Align behavior of %f in
 formatDateTime() with parseDateTime()

---
 .../functions/date-time-functions.md          |  4 ++-
 src/Core/Settings.h                           |  3 +-
 src/Core/SettingsChangesHistory.h             |  1 +
 src/Functions/formatDateTime.cpp              | 36 ++++++++++++++++---
 .../00718_format_datetime.reference           | 25 +++++++++----
 .../0_stateless/00718_format_datetime.sql     | 19 ++++++++--
 6 files changed, 73 insertions(+), 15 deletions(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 11036d804dc..b49f8745468 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1264,7 +1264,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %d       | day of the month, zero-padded (01-31)                   | 02         |
 | %D       | Short MM/DD/YY date, equivalent to %m/%d/%y             | 01/02/18   |
 | %e       | day of the month, space-padded (1-31)                   | &nbsp; 2   |
-| %f       | fractional second from the fractional part of DateTime64 | 1234560   |
+| %f       | fractional second, see below (*)                        | 1234560   |
 | %F       | short YYYY-MM-DD date, equivalent to %Y-%m-%d           | 2018-01-02 |
 | %g       | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation                                | 18       |
 | %G       | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V  | 2018         |
@@ -1295,6 +1295,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %z       | Time offset from UTC as +HHMM or -HHMM                  | -0500      |
 | %%       | a % sign                                                | %          |
 
+(*) The behavior of `%f` is to print `000000` (six zeros) if the formatted value is a Date, Date32 or DateTime (which have no fractional seconds) or a DateTime64 with a precision of 0. Earlier versions of ClickHouse printed `0` in this case. The previous behavior can be restored using setting `formatdatetime_f_prints_single_zero = 1`.
+
 **Example**
 
 Query:
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index b6a149aa4ca..1b565d421eb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -467,7 +467,8 @@ class IColumn;
     M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
     \
     M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
-    M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in function 'formatDateTime' produces the month name instead of minutes.", 0) \
+    M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' produces a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
+    M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' produces the month name instead of minutes.", 0) \
     \
     M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \
     M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 4f89397ed9d..d7f80cc7a49 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -101,6 +101,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
                {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
                {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
     {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
+    {"23.4", {{"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}}},
     {"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
     {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
     {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index d6275a54c75..a015340fc5d 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -449,6 +449,20 @@ private:
         }
 
         size_t mysqlFractionalSecond(char * dest, Time /*source*/, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & /*timezone*/)
+        {
+            if (scale == 0)
+                scale = 6;
+
+            for (Int64 i = scale, value = fractional_second; i > 0; --i)
+            {
+                dest[i - 1] += value % 10;
+                value /= 10;
+            }
+            return scale;
+        }
+
+        /// Same as mysqlFractionalSecond but prints a single zero if the value has no fractional seconds
+        size_t mysqlFractionalSecondSingleZero(char * dest, Time /*source*/, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & /*timezone*/)
         {
             if (scale == 0)
                 scale = 1;
@@ -710,6 +724,7 @@ private:
     }
 
     const bool mysql_M_is_month_name;
+    const bool mysql_f_prints_single_zero;
 
 public:
     static constexpr auto name = Name::name;
@@ -718,6 +733,7 @@ public:
 
     explicit FunctionFormatDateTimeImpl(ContextPtr context)
         : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
+        , mysql_f_prints_single_zero(context->getSettings().formatdatetime_f_prints_single_zero)
     {
     }
 
@@ -1116,11 +1132,21 @@ public:
                     // Fractional seconds
                     case 'f':
                     {
-                        /// If the time data type has no fractional part, then we print '0' as the fractional part.
-                        Instruction<T> instruction;
-                        instruction.setMysqlFunc(&Instruction<T>::mysqlFractionalSecond);
-                        instructions.push_back(std::move(instruction));
-                        out_template += String(std::max<UInt32>(1, scale), '0');
+                        /// If the time data type has no fractional part, we print (default) '000000' or (deprecated) '0' as fractional part.
+                        if (mysql_f_prints_single_zero)
+                        {
+                            Instruction<T> instruction;
+                            instruction.setMysqlFunc(&Instruction<T>::mysqlFractionalSecondSingleZero);
+                            instructions.push_back(std::move(instruction));
+                            out_template += String(scale == 0 ? 1 : scale, '0');
+                        }
+                        else
+                        {
+                            Instruction<T> instruction;
+                            instruction.setMysqlFunc(&Instruction<T>::mysqlFractionalSecond);
+                            instructions.push_back(std::move(instruction));
+                            out_template += String(scale == 0 ? 6 : scale, '0');
+                        }
                         break;
                     }
 
diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference
index eb2c23576eb..50874ac9b2e 100644
--- a/tests/queries/0_stateless/00718_format_datetime.reference
+++ b/tests/queries/0_stateless/00718_format_datetime.reference
@@ -5,6 +5,7 @@ Jan	Jan
 02	02
 01/02/18	01/02/18
  2	 2
+000000	000000
 2018-01-02	2018-01-02
 10	12
 22	00
@@ -43,11 +44,23 @@ no formatting pattern	no formatting pattern
 -1100
 +0300
 +0530
-1234560
-000340
+000000
+000000
+000000
+000000
+123
+123456
+123456789
+0
+0
+0
+0
+123
+123456
+123456789
 2022-12-08 18:11:29.123400000
 2022-12-08 18:11:29.1
-2022-12-08 18:11:29.0
-2022-12-08 18:11:29.0
-2022-12-08 00:00:00.0
-2022-12-08 00:00:00.0
+2022-12-08 18:11:29.000000
+2022-12-08 18:11:29.000000
+2022-12-08 00:00:00.000000
+2022-12-08 00:00:00.000000
diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql
index a77578fc7c3..c0db6a4f64e 100644
--- a/tests/queries/0_stateless/00718_format_datetime.sql
+++ b/tests/queries/0_stateless/00718_format_datetime.sql
@@ -17,6 +17,7 @@ SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%C'), formatDateTime(t
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%d'), formatDateTime(toDate32('2018-01-02'), '%d');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%D'), formatDateTime(toDate32('2018-01-02'), '%D');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%e'), formatDateTime(toDate32('2018-01-02'), '%e');
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%f'), formatDateTime(toDate32('2018-01-02'), '%f');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%F'), formatDateTime(toDate32('2018-01-02'), '%F');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%h'), formatDateTime(toDate32('2018-01-02'), '%h');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%H'), formatDateTime(toDate32('2018-01-02'), '%H');
@@ -66,8 +67,22 @@ SELECT formatDateTime(toDateTime('2020-01-01 01:00:00', 'US/Samoa'), '%z');
 SELECT formatDateTime(toDateTime('2020-01-01 01:00:00', 'Europe/Moscow'), '%z');
 SELECT formatDateTime(toDateTime('1970-01-01 00:00:00', 'Asia/Kolkata'), '%z');
 
-select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f');
-select formatDateTime(toDateTime64('2022-12-08 18:11:29.00034', 6, 'UTC'), '%f');
+-- %f (default settings)
+select formatDateTime(toDate('2010-01-04'), '%f') SETTINGS formatdatetime_f_prints_single_zero = 0;
+select formatDateTime(toDate32('2010-01-04'), '%f') SETTINGS formatdatetime_f_prints_single_zero = 0;
+select formatDateTime(toDateTime('2010-01-04 12:34:56'), '%f') SETTINGS formatdatetime_f_prints_single_zero = 0;
+select formatDateTime(toDateTime64('2010-01-04 12:34:56', 0), '%f') SETTINGS formatdatetime_f_prints_single_zero = 0;
+select formatDateTime(toDateTime64('2010-01-04 12:34:56.123', 3), '%f') SETTINGS formatdatetime_f_prints_single_zero = 0;
+select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 6), '%f') SETTINGS formatdatetime_f_prints_single_zero = 0;
+select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456789', 9), '%f') SETTINGS formatdatetime_f_prints_single_zero = 0;
+-- %f (legacy settings)
+select formatDateTime(toDate('2010-01-04'), '%f') SETTINGS formatdatetime_f_prints_single_zero = 1;
+select formatDateTime(toDate32('2010-01-04'), '%f') SETTINGS formatdatetime_f_prints_single_zero = 1;
+select formatDateTime(toDateTime('2010-01-04 12:34:56'), '%f') SETTINGS formatdatetime_f_prints_single_zero = 1;
+select formatDateTime(toDateTime64('2010-01-04 12:34:56', 0), '%f') SETTINGS formatdatetime_f_prints_single_zero = 1;
+select formatDateTime(toDateTime64('2010-01-04 12:34:56.123', 3), '%f') SETTINGS formatdatetime_f_prints_single_zero = 1;
+select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 6), '%f') SETTINGS formatdatetime_f_prints_single_zero = 0;
+select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456789', 9), '%f') SETTINGS formatdatetime_f_prints_single_zero = 1;
 
 select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 9, 'UTC'), '%F %T.%f');
 select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 1, 'UTC'), '%F %T.%f');

From b7be5fd89e825ae10be38e265a7289839d97b244 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 6 Apr 2023 08:40:39 +0000
Subject: [PATCH 159/277] Improve code and tests

---
 src/Functions/parseDateTime.cpp               | 48 ++++++++-----------
 .../02668_parse_datetime.reference            |  6 +--
 .../0_stateless/02668_parse_datetime.sql      |  6 +--
 3 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index 01239074852..12e2e113af4 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -716,7 +716,7 @@ namespace
                 if constexpr (need_check_space == NeedCheckSpace::Yes)
                     checkSpace(cur, end, 1, "assertChar requires size >= 1", fragment);
 
-                if (*cur != expected)
+                if (*cur != expected) [[unlikely]]
                     throw Exception(
                         ErrorCodes::CANNOT_PARSE_DATETIME,
                         "Unable to parse fragment {} from {} because char {} is expected but {} provided",
@@ -729,6 +729,24 @@ namespace
                 return cur;
             }
 
+            template <NeedCheckSpace need_check_space>
+            static Pos assertNumber(Pos cur, Pos end, const String & fragment)
+            {
+                if constexpr (need_check_space == NeedCheckSpace::Yes)
+                    checkSpace(cur, end, 1, "assertChar requires size >= 1", fragment);
+
+                if (*cur < '0' || *cur > '9') [[unlikely]]
+                    throw Exception(
+                        ErrorCodes::CANNOT_PARSE_DATETIME,
+                        "Unable to parse fragment {} from {} because {} is not a number",
+                        fragment,
+                        std::string_view(cur, end - cur),
+                        String(*cur, 1));
+
+                ++cur;
+                return cur;
+            }
+
             static Pos mysqlDayOfWeekTextShort(Pos cur, Pos end, const String & fragment, DateTime & date)
             {
                 checkSpace(cur, end, 3, "mysqlDayOfWeekTextShort requires size >= 3", fragment);
@@ -1037,32 +1055,8 @@ namespace
 
             static Pos mysqlMicrosecond(Pos cur, Pos end, const String & fragment, DateTime & /*date*/)
             {
-                checkSpace(cur, end, 6, "mysqlMicrosecond requires size >= 6", fragment);
-
-                Pos start = cur;
-                auto check_is_number = [&](Pos pos)
-                {
-                    if (*pos < '0' || *pos > '9')
-                        throw Exception(
-                            ErrorCodes::CANNOT_PARSE_DATETIME,
-                            "Unable to parse fragment '{}' from '{}' because '{}'' is not a number ",
-                            fragment,
-                            std::string_view(start, end),
-                            *cur);
-                };
-
-                check_is_number(cur);
-                ++cur;
-                check_is_number(cur);
-                ++cur;
-                check_is_number(cur);
-                ++cur;
-                check_is_number(cur);
-                ++cur;
-                check_is_number(cur);
-                ++cur;
-                check_is_number(cur);
-                ++cur;
+                for (size_t i = 0; i < 6; ++i)
+                    cur = assertNumber<NeedCheckSpace::Yes>(cur, end, fragment);
                 return cur;
             }
 
diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference
index a5b5ad7d109..3a6925ecb70 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.reference
+++ b/tests/queries/0_stateless/02668_parse_datetime.reference
@@ -198,11 +198,11 @@ select parseDateTime('456789', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00',
 select parseDateTime('42', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError NOT_ENOUGH_SPACE }
 select parseDateTime('12ABCD', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 -- mixed YMD format
-select parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s', 'UTC') = toDateTime('2021-01-04 23:00:00', 'UTC');
+select parseDateTime('2021-01-04+23:00:00.654321', '%Y-%m-%d+%H:%i:%s.%f', 'UTC') = toDateTime('2021-01-04 23:00:00', 'UTC');
 1
-select parseDateTime('2019-07-03 11:04:10', '%Y-%m-%d %H:%i:%s', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
+select parseDateTime('2019-07-03 11:04:10.975319', '%Y-%m-%d %H:%i:%s.%f', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
 1
-select parseDateTime('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
+select parseDateTime('10:04:11 03-07-2019.242424', '%s:%i:%H %d-%m-%Y.%f', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
 1
 -- *OrZero, *OrNull, str_to_date
 select parseDateTimeOrZero('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql
index 33e84120521..b18375840c9 100644
--- a/tests/queries/0_stateless/02668_parse_datetime.sql
+++ b/tests/queries/0_stateless/02668_parse_datetime.sql
@@ -135,9 +135,9 @@ select parseDateTime('42', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC
 select parseDateTime('12ABCD', '%f', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME }
 
 -- mixed YMD format
-select parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s', 'UTC') = toDateTime('2021-01-04 23:00:00', 'UTC');
-select parseDateTime('2019-07-03 11:04:10', '%Y-%m-%d %H:%i:%s', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
-select parseDateTime('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
+select parseDateTime('2021-01-04+23:00:00.654321', '%Y-%m-%d+%H:%i:%s.%f', 'UTC') = toDateTime('2021-01-04 23:00:00', 'UTC');
+select parseDateTime('2019-07-03 11:04:10.975319', '%Y-%m-%d %H:%i:%s.%f', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
+select parseDateTime('10:04:11 03-07-2019.242424', '%s:%i:%H %d-%m-%Y.%f', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');
 
 -- *OrZero, *OrNull, str_to_date
 select parseDateTimeOrZero('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC');

From 4b7c0f42f4f2777c6da90602ac28deb2963bf5ee Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 6 Apr 2023 09:49:58 +0000
Subject: [PATCH 160/277] Small documentation follow-up to #47246

---
 docs/en/sql-reference/functions/date-time-functions.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 11036d804dc..74ef9a28dc1 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1276,7 +1276,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %k       | hour in 24h format (00-23)                              | 22         |
 | %l       | hour in 12h format (01-12)                              | 09         |
 | %m       | month as an integer number (01-12)                      | 01         |
-| %M       | full month name (January-December)                      | January    |
+| %M       | full month name (January-December), see (*) below       | January    |
 | %n       | new-line character (‘’)                                 |            |
 | %p       | AM or PM designation                                    | PM         |
 | %Q       | Quarter (1-4)                                           | 1          |
@@ -1295,6 +1295,8 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %z       | Time offset from UTC as +HHMM or -HHMM                  | -0500      |
 | %%       | a % sign                                                | %          |
 
+(*) In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
+
 **Example**
 
 Query:

From f9fa29342159e032b8054c8759d529a78671727c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 6 Apr 2023 13:49:03 +0300
Subject: [PATCH 161/277] Update
 00002_log_and_exception_messages_formatting.sql

---
 .../00002_log_and_exception_messages_formatting.sql           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index e1409985e41..0638c50ec69 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -53,10 +53,10 @@ create temporary table known_short_messages (s String) as select * from (select
 ] as arr) array join arr;
 
 -- Check that we don't have too many short meaningless message patterns.
-select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_string), 0) from logs where length(message_format_string) < 10 and message_format_string not in known_short_messages;
+select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_string), 1) from logs where length(message_format_string) < 10 and message_format_string not in known_short_messages;
 
 -- Same as above. Feel free to update the threshold or remove this query if really necessary
-select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 2) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
+select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 3) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
 
 -- Same as above, but exceptions must be more informative. Feel free to update the threshold or remove this query if really necessary
 select 'exceptions shorter than 30', max2(countDistinctOrDefault(message_format_string), 27) from logs where length(message_format_string) < 30 and message ilike '%DB::Exception%' and message_format_string not in known_short_messages;

From 2cc7d52bdfae122a733bfee6f8f0e7cf05f67fa1 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 6 Apr 2023 13:49:44 +0300
Subject: [PATCH 162/277] Update
 00002_log_and_exception_messages_formatting.reference

---
 .../00002_log_and_exception_messages_formatting.reference     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
index d3991f053f0..1e7b85d6489 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
@@ -1,7 +1,7 @@
 runtime messages	0.001
 runtime exceptions	0.05
-messages shorter than 10	0
-messages shorter than 16	2
+messages shorter than 10	1
+messages shorter than 16	3
 exceptions shorter than 30	27
 noisy messages	0.3
 noisy Trace messages	0.16

From 3fc952a56af8efcccc670460f9fb1462b77f752a Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Thu, 6 Apr 2023 13:01:24 +0200
Subject: [PATCH 163/277] Update CachedOnDiskReadBufferFromFile.cpp

---
 src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 72346787cfb..5eaee2e3026 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -1184,7 +1184,7 @@ String CachedOnDiskReadBufferFromFile::getInfoForLog()
         implementation_buffer_read_range_str = "None";
 
     String current_file_segment_info;
-    if (current_file_segment_it == file_segments_holder->file_segments.end())
+    if (current_file_segment_it != file_segments_holder->file_segments.end())
         current_file_segment_info = (*current_file_segment_it)->getInfoForLog();
     else
         current_file_segment_info = "None";

From 1ec9f5e42c03d2a054f8aeecf8b8464de657f46b Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 6 Apr 2023 13:20:11 +0200
Subject: [PATCH 164/277] Avoid operation on uninitialised data in
 readDateTimeTextImpl

---
 src/IO/ReadHelpers.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 20ba73e0fa7..9c0c9525773 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -1028,12 +1028,15 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
 
     bool is_ok = true;
     if constexpr (std::is_same_v<ReturnType, void>)
-        datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(components, scale);
+    {
+        datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(components, scale) * negative_multiplier;
+    }
     else
+    {
         is_ok = DecimalUtils::tryGetDecimalFromComponents<DateTime64>(components, scale, datetime64);
-
-    datetime64 *= negative_multiplier;
-
+        if (is_ok)
+            datetime64 *= negative_multiplier;
+    }
 
     return ReturnType(is_ok);
 }

From 56c51043785c4c201816ec5b2b8a1b7f40ae70e4 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 6 Apr 2023 11:43:56 +0000
Subject: [PATCH 165/277] Update tests

---
 tests/queries/0_stateless/01905_to_json_string.reference | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01905_to_json_string.reference b/tests/queries/0_stateless/01905_to_json_string.reference
index 33d435f8e1a..ec4f4e48bde 100644
--- a/tests/queries/0_stateless/01905_to_json_string.reference
+++ b/tests/queries/0_stateless/01905_to_json_string.reference
@@ -1,3 +1,3 @@
-[]	2947817982	"&"	-69802.9769	"w"	-1.9158530982937093e25	["2003-05-15","1988-03-19 06:13:49","2090-04-14 03:58:26.029","91943d2e-480d-66b5-ee4c-1b5bb8eb7256"]	"�O"	[]
-[-115]	481807067	",{MM"	-170235.0663	"o"	3.3808659558052087e155	["2055-01-12","2070-08-09 03:49:21","2068-11-30 09:36:49.672","20b0e7b5-ad0e-177b-3054-c779b2a8ebe0"]	"I\\u001C"	["e57178f9-4d10-2fa1-7c2d-53c5a65c3463"]
+[]	2947817982	"&"	-69802.9769	"o"	3.3808659558052087e155	["2142-01-24","2076-06-05 14:54:21","2068-11-30 09:36:49.672","ee4c1b5b-b8eb-7256-20b0-e7b5ad0e177b"]	"��"	["7c2d53c5-a65c-3463-a76e-e26583aca234"]
+[-115]	481807067	",{MM"	-45534.1174	"w"	1.711178201812925e-166	["1994-01-04","1971-12-29 08:41:23","2012-03-25 07:11:39.573","3054c779-b2a8-ebe0-ec50-64cb1c494fbd"]	"�\\u0000"	["055300b2-b400-653c-1ea0-2413e3a3af76"]
 {"1234":"5678"}

From b199219ec1b4f34bfa0b98983e3efb1103785b35 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 6 Apr 2023 12:24:22 +0000
Subject: [PATCH 166/277] Add reading step for system zookeeper. Analyze path
 from filter DAG.

---
 .../System/StorageSystemZooKeeper.cpp         | 247 ++++++++++--------
 src/Storages/System/StorageSystemZooKeeper.h  |  17 +-
 2 files changed, 154 insertions(+), 110 deletions(-)

diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index 9663c76a5c3..ee33253e40f 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -12,11 +12,17 @@
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/typeid_cast.h>
+#include <Columns/ColumnSet.h>
+#include <Columns/ColumnConst.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Functions/IFunction.h>
 #include <Parsers/ASTSubquery.h>
 #include <Interpreters/Set.h>
 #include <Interpreters/interpretSubquery.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Processors/Sinks/SinkToStorage.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <boost/algorithm/string/join.hpp>
 #include <boost/algorithm/string.hpp>
 #include <algorithm>
@@ -155,8 +161,24 @@ public:
     }
 };
 
+class ReadFromSystemZooKeeper final : public SourceStepWithFilter
+{
+public:
+    ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info_, ContextPtr context_);
+
+    String getName() const override { return "ReadFromSystemZooKeeper"; }
+
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override;
+
+private:
+    void fillData(MutableColumns & res_columns) const;
+
+    std::shared_ptr<const StorageLimitsList> storage_limits;
+    ContextPtr context;
+};
+
 StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_)
-        : IStorageSystemOneBlock<StorageSystemZooKeeper>(table_id_)
+        : IStorage(table_id_)
 {
         StorageInMemoryMetadata storage_metadata;
         ColumnsDescription desc;
@@ -173,6 +195,26 @@ StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_)
         setInMemoryMetadata(storage_metadata);
 }
 
+bool StorageSystemZooKeeper::mayBenefitFromIndexForIn(const ASTPtr & node, ContextPtr, const StorageMetadataPtr &) const
+{
+    return node->as<ASTIdentifier>() && node->getColumnName() == "path";
+}
+
+void StorageSystemZooKeeper::read(
+    QueryPlan & query_plan,
+    const Names & /*column_names*/,
+    const StorageSnapshotPtr & storage_snapshot,
+    SelectQueryInfo & query_info,
+    ContextPtr context,
+    QueryProcessingStage::Enum /*processed_stage*/,
+    size_t /*max_block_size*/,
+    size_t /*num_streams*/)
+{
+    auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals());
+    auto read_step = std::make_unique<ReadFromSystemZooKeeper>(header, query_info, context);
+    query_plan.addStep(std::move(read_step));
+}
+
 SinkToStoragePtr StorageSystemZooKeeper::write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr context)
 {
     if (!context->getConfigRef().getBool("allow_zookeeper_write", false))
@@ -229,125 +271,99 @@ static String pathCorrected(const String & path)
     return path_corrected;
 }
 
-
-static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context, bool allow_unrestricted)
+static bool isPathNode(const ActionsDAG::Node * node)
 {
-    const auto * function = elem.as<ASTFunction>();
-    if (!function)
-        return false;
+    while (node->type == ActionsDAG::ActionType::ALIAS)
+        node = node->children.at(0);
 
-    if (function->name == "and")
+    return node->result_name == "path";
+}
+
+static void extractPathImpl(const ActionsDAG::Node & node, Paths & res, ContextPtr context, bool allow_unrestricted)
+{
+    if (node.type != ActionsDAG::ActionType::FUNCTION)
+        return;
+
+    auto function_name = node.function_base->getName();
+    if (function_name == "and")
     {
-        for (const auto & child : function->arguments->children)
-            if (extractPathImpl(*child, res, context, allow_unrestricted))
-                return true;
+        for (const auto * child : node.children)
+            extractPathImpl(*child, res, context, allow_unrestricted);
 
-        return false;
+        return;
     }
 
-    const auto & args = function->arguments->as<ASTExpressionList &>();
-    if (args.children.size() != 2)
-        return false;
+    if (node.children.size() != 2)
+        return;
 
-    if (function->name == "in")
+    if (function_name == "in")
     {
-        const ASTIdentifier * ident = args.children.at(0)->as<ASTIdentifier>();
-        if (!ident || ident->name() != "path")
-            return false;
+        if (!isPathNode(node.children.at(0)))
+            return;
 
-        ASTPtr value = args.children.at(1);
+        auto value = node.children.at(1)->column;
+        if (!value)
+            return;
 
-        if (value->as<ASTSubquery>())
-        {
-            auto interpreter_subquery = interpretSubquery(value, context, {}, {});
-            auto pipeline = interpreter_subquery->execute().pipeline;
-            SizeLimits limites(context->getSettingsRef().max_rows_in_set, context->getSettingsRef().max_bytes_in_set, OverflowMode::THROW);
-            Set set(limites, true, context->getSettingsRef().transform_null_in);
-            set.setHeader(pipeline.getHeader().getColumnsWithTypeAndName());
+        const IColumn * column = value.get();
+        if (const auto * column_const = typeid_cast<const ColumnConst *>(column))
+            column = &column_const->getDataColumn();
 
-            PullingPipelineExecutor executor(pipeline);
-            Block block;
-            while (executor.pull(block))
-            {
-                set.insertFromBlock(block.getColumnsWithTypeAndName());
-            }
-            set.finishInsert();
+        const ColumnSet * column_set = typeid_cast<const ColumnSet *>(column);
+        if (!column_set)
+            return;
 
-            set.checkColumnsNumber(1);
-            const auto & set_column = *set.getSetElements()[0];
-            for (size_t row = 0; row < set_column.size(); ++row)
-                res.emplace_back(set_column[row].safeGet<String>(), ZkPathType::Exact);
-        }
-        else
-        {
-            auto evaluated = evaluateConstantExpressionAsLiteral(value, context);
-            const auto * literal = evaluated->as<ASTLiteral>();
-            if (!literal)
-                return false;
+        auto set = column_set->getData();
+        if (!set->isCreated())
+            return;
 
-            if (String str; literal->value.tryGet(str))
-            {
-                res.emplace_back(str, ZkPathType::Exact);
-            }
-            else if (Tuple tuple; literal->value.tryGet(tuple))
-            {
-                for (auto element : tuple)
-                    res.emplace_back(element.safeGet<String>(), ZkPathType::Exact);
-            }
-            else
-                return false;
-        }
+        if (!set->hasExplicitSetElements())
+            return;
 
-        return true;
+        set->checkColumnsNumber(1);
+        auto type = set->getElementsTypes()[0];
+        if (!isString(removeNullable(removeLowCardinality(type))))
+            return;
+
+        auto values = set->getSetElements()[0];
+        size_t size = values->size();
+
+        for (size_t row = 0; row < size; ++row)
+            res.emplace_back(values->getDataAt(row).toString(), ZkPathType::Exact);
     }
-    else if (function->name == "equals")
+    else if (function_name == "equals")
     {
-        const ASTIdentifier * ident;
-        ASTPtr value;
-        if ((ident = args.children.at(0)->as<ASTIdentifier>()))
-            value = args.children.at(1);
-        else if ((ident = args.children.at(1)->as<ASTIdentifier>()))
-            value = args.children.at(0);
-        else
-            return false;
+        if (!isPathNode(node.children.at(0)))
+            return;
 
-        if (ident->name() != "path")
-            return false;
+        auto value = node.children.at(1);
+        if (!value->column)
+            return;
 
-        auto evaluated = evaluateConstantExpressionAsLiteral(value, context);
-        const auto * literal = evaluated->as<ASTLiteral>();
-        if (!literal)
-            return false;
+        if (!isString(removeNullable(removeLowCardinality(value->result_type))))
+            return;
 
-        if (literal->value.getType() != Field::Types::String)
-            return false;
+        if (value->column->size() != 1)
+            return;
 
-        res.emplace_back(literal->value.safeGet<String>(), ZkPathType::Exact);
-        return true;
+        res.emplace_back(value->column->getDataAt(0).toString(), ZkPathType::Exact);
     }
-    else if (allow_unrestricted && function->name == "like")
+    else if (allow_unrestricted && function_name == "like")
     {
-        const ASTIdentifier * ident;
-        ASTPtr value;
-        if ((ident = args.children.at(0)->as<ASTIdentifier>()))
-            value = args.children.at(1);
-        else if ((ident = args.children.at(1)->as<ASTIdentifier>()))
-            value = args.children.at(0);
-        else
-            return false;
+        if (!isPathNode(node.children.at(0)))
+            return;
 
-        if (ident->name() != "path")
-            return false;
+        auto value = node.children.at(1);
+        if (!value->column)
+            return;
 
-        auto evaluated = evaluateConstantExpressionAsLiteral(value, context);
-        const auto * literal = evaluated->as<ASTLiteral>();
-        if (!literal)
-            return false;
+        if (!isString(removeNullable(removeLowCardinality(value->result_type))))
+            return;
 
-        if (literal->value.getType() != Field::Types::String)
-            return false;
+        if (value->column->size() != 1)
+            return;
 
-        String pattern = literal->value.safeGet<String>();
+        String pattern = value->column->getDataAt(0).toString();
         bool has_metasymbol = false;
         String prefix; // pattern prefix before the first metasymbol occurrence
         for (size_t i = 0; i < pattern.size(); i++)
@@ -376,30 +392,28 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context,
         }
 
         res.emplace_back(prefix, has_metasymbol ? ZkPathType::Prefix : ZkPathType::Exact);
-
-        return true;
     }
-
-    return false;
 }
 
 
 /** Retrieve from the query a condition of the form `path = 'path'`, from conjunctions in the WHERE clause.
   */
-static Paths extractPath(const ASTPtr & query, ContextPtr context, bool allow_unrestricted)
+static Paths extractPath(const ActionsDAG::NodeRawConstPtrs & filter_nodes, ContextPtr context, bool allow_unrestricted)
 {
-    const auto & select = query->as<ASTSelectQuery &>();
-    if (!select.where())
-        return allow_unrestricted ? Paths{{"/", ZkPathType::Recurse}} : Paths();
-
     Paths res;
-    return extractPathImpl(*select.where(), res, context, allow_unrestricted) ? res : Paths();
+    for (const auto * node : filter_nodes)
+        extractPathImpl(*node, res, context, allow_unrestricted);
+
+    if (filter_nodes.empty() && allow_unrestricted)
+        res.emplace_back("/", ZkPathType::Recurse);
+
+    return res;
 }
 
 
-void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const
+void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) const
 {
-    Paths paths = extractPath(query_info.query, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper);
+    Paths paths = extractPath(getFilterNodes().nodes, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper);
 
     zkutil::ZooKeeperPtr zookeeper = context->getZooKeeper();
 
@@ -486,5 +500,26 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, ContextPtr c
     }
 }
 
+ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info, ContextPtr context_)
+    : SourceStepWithFilter({.header = std::move(header)})
+    , storage_limits(query_info.storage_limits)
+    , context(std::move(context_))
+{
+}
+
+void ReadFromSystemZooKeeper::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    const auto & header = getOutputStream().header;
+    MutableColumns res_columns = header.cloneEmptyColumns();
+    fillData(res_columns);
+
+    UInt64 num_rows = res_columns.at(0)->size();
+    Chunk chunk(std::move(res_columns), num_rows);
+
+    auto source = std::make_shared<SourceFromSingleChunk>(header, std::move(chunk));
+    source->setStorageLimits(storage_limits);
+    processors.emplace_back(source);
+    pipeline.init(Pipe(std::move(source)));
+}
 
 }
diff --git a/src/Storages/System/StorageSystemZooKeeper.h b/src/Storages/System/StorageSystemZooKeeper.h
index 20ad29af481..c8988d787a0 100644
--- a/src/Storages/System/StorageSystemZooKeeper.h
+++ b/src/Storages/System/StorageSystemZooKeeper.h
@@ -11,7 +11,7 @@ class Context;
 
 /** Implements `zookeeper` system table, which allows you to view the data in ZooKeeper for debugging purposes.
   */
-class StorageSystemZooKeeper final : public IStorageSystemOneBlock<StorageSystemZooKeeper>
+class StorageSystemZooKeeper final : public IStorage
 {
 public:
     explicit StorageSystemZooKeeper(const StorageID & table_id_);
@@ -22,10 +22,19 @@ public:
 
     SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr /*context*/) override;
 
-protected:
-    using IStorageSystemOneBlock::IStorageSystemOneBlock;
+    void read(
+        QueryPlan & query_plan,
+        const Names & /*column_names*/,
+        const StorageSnapshotPtr & storage_snapshot,
+        SelectQueryInfo & query_info,
+        ContextPtr context,
+        QueryProcessingStage::Enum /*processed_stage*/,
+        size_t /*max_block_size*/,
+        size_t /*num_streams*/) override;
 
-    void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
+    bool isSystemStorage() const override { return true; }
+    bool supportsIndexForIn() const override { return true; }
+    bool mayBenefitFromIndexForIn(const ASTPtr & node, ContextPtr, const StorageMetadataPtr &) const override;
 };
 
 }

From 54180851ab2fd62b3dc78d3cee09734cda4cfa9d Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 6 Apr 2023 13:13:17 +0000
Subject: [PATCH 167/277] Fix name

---
 .../Optimizations/optimizeUseAggregateProjection.cpp   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index 61e6988ded1..09c157a0283 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -140,15 +140,15 @@ std::optional<AggregateFunctionMatches> matchAggregateFunctions(
 
         size_t num_args = aggregate.argument_names.size();
 
-        DataTypes argumen_types;
-        argumen_types.reserve(num_args);
+        DataTypes argument_types;
+        argument_types.reserve(num_args);
 
         auto & candidates = it->second;
         bool found_match = false;
 
         for (size_t idx : candidates)
         {
-            argumen_types.clear();
+            argument_types.clear();
             const auto & candidate = info.aggregates[idx];
 
             /// Note: this check is a bit strict.
@@ -226,7 +226,7 @@ std::optional<AggregateFunctionMatches> matchAggregateFunctions(
                     break;
                 }
 
-                argumen_types.push_back(query_node->result_type);
+                argument_types.push_back(query_node->result_type);
                 ++next_arg;
             }
 
@@ -234,7 +234,7 @@ std::optional<AggregateFunctionMatches> matchAggregateFunctions(
                 continue;
 
             found_match = true;
-            res.push_back({&candidate, std::move(argumen_types)});
+            res.push_back({&candidate, std::move(argument_types)});
             break;
         }
 

From 2d335f82cc8ea3dc18f6d05778de1b19c63aec01 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 6 Apr 2023 15:43:16 +0200
Subject: [PATCH 168/277] Fix another test

---
 src/Storages/RabbitMQ/RabbitMQConsumer.cpp | 3 +++
 src/Storages/RabbitMQ/RabbitMQConsumer.h   | 2 ++
 src/Storages/RabbitMQ/StorageRabbitMQ.cpp  | 3 +++
 3 files changed, 8 insertions(+)

diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
index 835cf82b246..65063e004a5 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp
@@ -38,7 +38,10 @@ void RabbitMQConsumer::shutdown()
 {
     stopped = true;
     cv.notify_one();
+}
 
+void RabbitMQConsumer::closeConnections()
+{
     if (consumer_channel)
         consumer_channel->close();
 }
diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h
index 89e2b192c35..c7adb856212 100644
--- a/src/Storages/RabbitMQ/RabbitMQConsumer.h
+++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h
@@ -84,6 +84,8 @@ public:
         cv.wait_for(lock, std::chrono::milliseconds(*timeout_ms), [this]{ return !received.empty() || isConsumerStopped(); });
     }
 
+    void closeConnections();
+
 private:
     void subscribe();
     void iterateEventLoop();
diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
index 0249e3203c6..7999d4af71a 100644
--- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
+++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp
@@ -837,6 +837,9 @@ void StorageRabbitMQ::shutdown()
     /// Just a paranoid try catch, it is not actually needed.
     try
     {
+        for (auto & consumer : consumers_ref)
+            consumer.lock()->closeConnections();
+
         if (drop_table)
             cleanupRabbitMQ();
 

From e91c8bfea3903d0cb4ab294721d9a598c9348e24 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 6 Apr 2023 14:43:52 +0200
Subject: [PATCH 169/277] Fix deadlock due to debug tracking of memory
 allocations

Since memory tracker should be blocked globally (by default it is blocked only
for VariableContext::User).

<details>

<summary>stacktrace</summary>

    * thread 11, name = 'Collector', stop reason = trace
        frame 1: 0x00007ffff7e37ea2 libc.so.6`pthread_mutex_lock + 274
        frame 6: 0x000000001ee10298 clickhouse`toStringCached(pointers=0x00007fffedfef068, offset=0, size=45) + 88 at StackTrace.cpp:425
        frame 7: 0x000000001ee1128e clickhouse`StackTrace::toString(this=0x00007fffedfef058) const + 46 at StackTrace.cpp:444
        frame 8: 0x000000001ede58ff clickhouse`(anonymous namespace)::debugLogBigAllocationWithoutCheck(size=25187648) + 479 at MemoryTracker.cpp:98
        frame 9: 0x000000001ede5481 clickhouse`MemoryTracker::allocImpl(this=0x0000000030729e68, size=25187648, throw_if_memory_exceeded=false, query_tracker=0x00007ffff3c22440) + 3937 at MemoryTracker.cpp:352
        frame 10: 0x000000001ede477f clickhouse`MemoryTracker::allocImpl(this=0x00007ffff3c1e530, size=25187648, throw_if_memory_exceeded=false, query_tracker=0x00007ffff3c22440) + 607 at MemoryTracker.cpp:198
        frame 11: 0x000000001ede477f clickhouse`MemoryTracker::allocImpl(this=0x00007ffff3c22440, size=25187648, throw_if_memory_exceeded=false, query_tracker=0x0000000000000000) + 607 at MemoryTracker.cpp:198
        frame 12: 0x000000001ede477f clickhouse`MemoryTracker::allocImpl(this=0x00007fffedff3bf0, size=25187648, throw_if_memory_exceeded=false, query_tracker=0x0000000000000000) + 607 at MemoryTracker.cpp:198
        frame 13: 0x000000001ed6c4c9 clickhouse`CurrentMemoryTracker::allocImpl(size=25165824, throw_if_memory_exceeded=false) + 265 at CurrentMemoryTracker.cpp:58
        frame 14: 0x000000001ed6c6a1 clickhouse`CurrentMemoryTracker::allocNoThrow(size=25165824) + 33 at CurrentMemoryTracker.cpp:91
        frame 15: 0x000000001ed63079 clickhouse`operator new(unsigned long) [inlined] void Memory::trackMemory<>(size=25165824) + 89 at memory.h:177
        frame 16: 0x000000001ed63034 clickhouse`operator new(size=25165824) + 20 at new_delete.cpp:74
        ...
        frame 27: 0x000000001ee15895 clickhouse`DB::(anonymous namespace)::collectSymbols(info=0x00007ffff7062000, (null)=<unavailable>, data_ptr=0x00007fffec2b7000) + 53 at SymbolIndex.cpp:483
        frame 28: 0x000000002e1a5d57 clickhouse`::dl_iterate_phdr(callback=(clickhouse`DB::(anonymous namespace)::collectSymbols(dl_phdr_info*, unsigned long, void*) at SymbolIndex.cpp:480), data=0x00007fffec2b7000)(dl_phdr_info *, size_t, void *), void *) + 183 at phdr_cache.cpp:76
        frame 29: 0x000000001ee15549 clickhouse`DB::SymbolIndex::update(this=0x00007fffec2b7000) + 41 at SymbolIndex.cpp:515
        frame 30: 0x000000001ee1be35 clickhouse`DB::SymbolIndex::SymbolIndex(this=<unavailable>) + 53 at SymbolIndex.h:23
        frame 31: 0x000000001ee17a6c clickhouse`DB::SymbolIndex::instanceImpl() + 108 at SymbolIndex.cpp:555
        frame 32: 0x000000001ee17b4a clickhouse`DB::SymbolIndex::instance() + 10 at SymbolIndex.cpp:561
        frame 33: 0x000000001ee1070f clickhouse`void toStringEveryLineImpl<toStringCached(std::__1::array<void*, 45ul> const&, unsigned long, unsigned long)::$_0>(fatal=false, stack_trace=0x00007fffedff1ef8, callback=0x00007fffedff1ee0)::$_0&&) + 207 at StackTrace.cpp:349
        frame 34: 0x000000001ee1040b clickhouse`toStringCached(pointers=0x00007fffedff2928, offset=0, size=43) + 459 at StackTrace.cpp:436
        frame 35: 0x000000001ee1128e clickhouse`StackTrace::toString(this=0x00007fffedff2918) const + 46 at StackTrace.cpp:444
        frame 36: 0x000000001ede58ff clickhouse`(anonymous namespace)::debugLogBigAllocationWithoutCheck(size=25165824) + 479 at MemoryTracker.cpp:98
        frame 37: 0x000000001ede5481 clickhouse`MemoryTracker::allocImpl(this=0x00007fffedff3bf0, size=25165824, throw_if_memory_exceeded=false, query_tracker=0x0000000000000000) + 3937 at MemoryTracker.cpp:352
        frame 38: 0x000000001ed6c4c9 clickhouse`CurrentMemoryTracker::allocImpl(size=25165824, throw_if_memory_exceeded=false) + 265 at CurrentMemoryTracker.cpp:58
        frame 39: 0x000000001ed6c6a1 clickhouse`CurrentMemoryTracker::allocNoThrow(size=25165824) + 33 at CurrentMemoryTracker.cpp:91
        frame 40: 0x000000001ed63079 clickhouse`operator new(unsigned long) [inlined] void Memory::trackMemory<>(size=25165824) + 89 at memory.h:177
        frame 41: 0x000000001ed63034 clickhouse`operator new(size=25165824) + 20 at new_delete.cpp:74
        ...
        frame 55: 0x0000000025badace clickhouse`DB::WriteBufferFromS3::nextImpl(this=0x00007ffff3c58f00) + 366 at WriteBufferFromS3.cpp:110

</details>

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/MemoryTracker.cpp | 40 ++++++++++++++++++------------------
 src/Common/MemoryTracker.h   |  2 ++
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp
index ca5c4a745cd..674d8d469af 100644
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@@ -82,26 +82,6 @@ inline std::string_view toDescription(OvercommitResult result)
     }
 }
 
-inline void debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]])
-{
-    /// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug).
-    /// Let's find them.
-#ifdef ABORT_ON_LOGICAL_ERROR
-    if (size < 0)
-        return;
-
-    constexpr Int64 threshold = 16 * 1024 * 1024;   /// The choice is arbitrary (maybe we should decrease it)
-    if (size < threshold)
-        return;
-
-    MemoryTrackerBlockerInThread blocker;
-    LOG_TEST(&Poco::Logger::get("MemoryTracker"), "Too big allocation ({} bytes) without checking memory limits, "
-                                                   "it may lead to OOM. Stack trace: {}", size, StackTrace().toString());
-#else
-    return;     /// Avoid trash logging in release builds
-#endif
-}
-
 }
 
 namespace ProfileEvents
@@ -175,6 +155,26 @@ void MemoryTracker::injectFault() const
         description ? description : "");
 }
 
+void MemoryTracker::debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]])
+{
+    /// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug).
+    /// Let's find them.
+#ifdef ABORT_ON_LOGICAL_ERROR
+    if (size < 0)
+        return;
+
+    constexpr Int64 threshold = 16 * 1024 * 1024;   /// The choice is arbitrary (maybe we should decrease it)
+    if (size < threshold)
+        return;
+
+    MemoryTrackerBlockerInThread blocker(VariableContext::Global);
+    LOG_TEST(&Poco::Logger::get("MemoryTracker"), "Too big allocation ({} bytes) without checking memory limits, "
+                                                   "it may lead to OOM. Stack trace: {}", size, StackTrace().toString());
+#else
+    return;     /// Avoid trash logging in release builds
+#endif
+}
+
 void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker)
 {
     if (size < 0)
diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h
index 66b56730b75..0d7748856bd 100644
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@@ -215,6 +215,8 @@ public:
 
     /// Prints info about peak memory consumption into log.
     void logPeakMemoryUsage();
+
+    void debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]]);
 };
 
 extern MemoryTracker total_memory_tracker;

From cfd9c4d85e26c2a77280582bcc5405a5c9d3d9cb Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 6 Apr 2023 14:01:05 +0000
Subject: [PATCH 170/277] Register aliases of date_diff in system.functions

---
 docs/en/sql-reference/functions/date-time-functions.md     | 2 +-
 src/Functions/dateDiff.cpp                                 | 5 +++++
 .../queries/0_stateless/25342_date_diff_aliases.reference  | 5 +++++
 tests/queries/0_stateless/25342_date_diff_aliases.sql      | 7 +++++++
 4 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/25342_date_diff_aliases.reference
 create mode 100644 tests/queries/0_stateless/25342_date_diff_aliases.sql

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 74ef9a28dc1..42a16b7185f 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -645,7 +645,7 @@ For an alternative to `date\_diff`, see function `age`.
 date_diff('unit', startdate, enddate, [timezone])
 ```
 
-Aliases: `dateDiff`, `DATE_DIFF`.
+Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_DIFF`.
 
 **Arguments**
 
diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp
index 457b77b9843..b28c97d45b2 100644
--- a/src/Functions/dateDiff.cpp
+++ b/src/Functions/dateDiff.cpp
@@ -448,6 +448,11 @@ private:
 REGISTER_FUNCTION(DateDiff)
 {
     factory.registerFunction<FunctionDateDiff<true>>({}, FunctionFactory::CaseInsensitive);
+    factory.registerAlias("date_diff", FunctionDateDiff<true>::name);
+    factory.registerAlias("DATE_DIFF", FunctionDateDiff<true>::name);
+    factory.registerAlias("timestampDiff", FunctionDateDiff<true>::name);
+    factory.registerAlias("timestamp_diff", FunctionDateDiff<true>::name);
+    factory.registerAlias("TIMESTAMP_DIFF", FunctionDateDiff<true>::name);
 }
 
 REGISTER_FUNCTION(TimeDiff)
diff --git a/tests/queries/0_stateless/25342_date_diff_aliases.reference b/tests/queries/0_stateless/25342_date_diff_aliases.reference
new file mode 100644
index 00000000000..1eeb5a3a2fa
--- /dev/null
+++ b/tests/queries/0_stateless/25342_date_diff_aliases.reference
@@ -0,0 +1,5 @@
+DATE_DIFF
+TIMESTAMP_DIFF
+date_diff
+timestampDiff
+timestamp_diff
diff --git a/tests/queries/0_stateless/25342_date_diff_aliases.sql b/tests/queries/0_stateless/25342_date_diff_aliases.sql
new file mode 100644
index 00000000000..c6b31c44f95
--- /dev/null
+++ b/tests/queries/0_stateless/25342_date_diff_aliases.sql
@@ -0,0 +1,7 @@
+SELECT name FROM system.functions
+WHERE name = 'date_diff'
+   OR name = 'DATE_DIFF'
+   OR name = 'timestampDiff'
+   OR name = 'timestamp_diff'
+   OR name = 'TIMESTAMP_DIFF'
+ORDER BY name;

From e88938d6c8f21b3fe4a3e5d3cf2bbc0c18248d83 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 6 Apr 2023 16:17:45 +0200
Subject: [PATCH 171/277] Change error code.

---
 src/DataTypes/ObjectUtils.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp
index edda0235bcc..28f000b6f0d 100644
--- a/src/DataTypes/ObjectUtils.cpp
+++ b/src/DataTypes/ObjectUtils.cpp
@@ -30,6 +30,7 @@ namespace ErrorCodes
     extern const int TYPE_MISMATCH;
     extern const int LOGICAL_ERROR;
     extern const int INCOMPATIBLE_COLUMNS;
+    extern const int NOT_IMPLEMENTED;
 }
 
 size_t getNumberOfDimensions(const IDataType & type)
@@ -121,7 +122,7 @@ DataTypePtr getDataTypeByColumn(const IColumn & column)
         return makeNullable(getDataTypeByColumn(column_nullable->getNestedColumn()));
 
     /// TODO: add more types.
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get data type of column {}", column.getFamilyName());
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get data type of column {}", column.getFamilyName());
 }
 
 template <size_t I, typename Tuple>

From 7bcb8c4970c37a4f6b5001454636effe7684d4f3 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 6 Apr 2023 14:25:27 +0000
Subject: [PATCH 172/277] Register trim aliases in system.functions

---
 src/Functions/trim.cpp                                 | 3 +++
 tests/queries/0_stateless/25343_trim_aliases.reference | 3 +++
 tests/queries/0_stateless/25343_trim_aliases.sql       | 5 +++++
 3 files changed, 11 insertions(+)
 create mode 100644 tests/queries/0_stateless/25343_trim_aliases.reference
 create mode 100644 tests/queries/0_stateless/25343_trim_aliases.sql

diff --git a/src/Functions/trim.cpp b/src/Functions/trim.cpp
index acfab47a68b..dd51c606ff7 100644
--- a/src/Functions/trim.cpp
+++ b/src/Functions/trim.cpp
@@ -112,5 +112,8 @@ REGISTER_FUNCTION(Trim)
     factory.registerFunction<FunctionTrimLeft>();
     factory.registerFunction<FunctionTrimRight>();
     factory.registerFunction<FunctionTrimBoth>();
+    factory.registerAlias("ltrim", FunctionTrimLeft::name);
+    factory.registerAlias("rtrim", FunctionTrimRight::name);
+    factory.registerAlias("trim", FunctionTrimBoth::name);
 }
 }
diff --git a/tests/queries/0_stateless/25343_trim_aliases.reference b/tests/queries/0_stateless/25343_trim_aliases.reference
new file mode 100644
index 00000000000..fa0920cd079
--- /dev/null
+++ b/tests/queries/0_stateless/25343_trim_aliases.reference
@@ -0,0 +1,3 @@
+ltrim
+rtrim
+trim
diff --git a/tests/queries/0_stateless/25343_trim_aliases.sql b/tests/queries/0_stateless/25343_trim_aliases.sql
new file mode 100644
index 00000000000..d0d739805fd
--- /dev/null
+++ b/tests/queries/0_stateless/25343_trim_aliases.sql
@@ -0,0 +1,5 @@
+SELECT name FROM system.functions
+WHERE name = 'ltrim'
+   OR name = 'rtrim'
+   OR name = 'trim'
+ORDER BY name;

From 851fdf7f4691eeb399ddcc5edbde70ffe9a79c4c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 6 Apr 2023 18:55:36 +0300
Subject: [PATCH 173/277] Update
 00002_log_and_exception_messages_formatting.sql

---
 .../0_stateless/00002_log_and_exception_messages_formatting.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index 0638c50ec69..480effec065 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -59,7 +59,7 @@ select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_st
 select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 3) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
 
 -- Same as above, but exceptions must be more informative. Feel free to update the threshold or remove this query if really necessary
-select 'exceptions shorter than 30', max2(countDistinctOrDefault(message_format_string), 27) from logs where length(message_format_string) < 30 and message ilike '%DB::Exception%' and message_format_string not in known_short_messages;
+select 'exceptions shorter than 30', max2(countDistinctOrDefault(message_format_string), 30) from logs where length(message_format_string) < 30 and message ilike '%DB::Exception%' and message_format_string not in known_short_messages;
 
 
 -- Avoid too noisy messages: top 1 message frequency must be less than 30%. We should reduce the threshold

From 8e1d9939f78b2b91b65a1ffd616e6db7ef571d18 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 6 Apr 2023 18:56:10 +0300
Subject: [PATCH 174/277] Update
 00002_log_and_exception_messages_formatting.reference

---
 .../00002_log_and_exception_messages_formatting.reference       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
index 1e7b85d6489..cd9da983785 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
@@ -2,7 +2,7 @@ runtime messages	0.001
 runtime exceptions	0.05
 messages shorter than 10	1
 messages shorter than 16	3
-exceptions shorter than 30	27
+exceptions shorter than 30	30
 noisy messages	0.3
 noisy Trace messages	0.16
 noisy Debug messages	0.09

From 4757d76fbf398f4ba6985ce07e2782f5a8abebf9 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 6 Apr 2023 15:59:13 +0000
Subject: [PATCH 175/277] fix query cache with sparse columns

---
 src/Interpreters/Cache/QueryCache.cpp         |  3 ++-
 ...02708_query_cache_sparse_columns.reference |  1 +
 .../02708_query_cache_sparse_columns.sql      | 23 +++++++++++++++++++
 3 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02708_query_cache_sparse_columns.reference
 create mode 100644 tests/queries/0_stateless/02708_query_cache_sparse_columns.sql

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index ba5388847f5..fede871aba3 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -242,8 +242,9 @@ void QueryCache::Writer::finalizeWrite()
         Chunks squashed_chunks;
         size_t rows_remaining_in_squashed = 0; /// how many further rows can the last squashed chunk consume until it reaches max_block_size
 
-        for (const auto & chunk : *query_result)
+        for (auto & chunk : *query_result)
         {
+            convertToFullIfSparse(chunk);
             const size_t rows_chunk = chunk.getNumRows();
             size_t rows_chunk_processed = 0;
 
diff --git a/tests/queries/0_stateless/02708_query_cache_sparse_columns.reference b/tests/queries/0_stateless/02708_query_cache_sparse_columns.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02708_query_cache_sparse_columns.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02708_query_cache_sparse_columns.sql b/tests/queries/0_stateless/02708_query_cache_sparse_columns.sql
new file mode 100644
index 00000000000..952c3227852
--- /dev/null
+++ b/tests/queries/0_stateless/02708_query_cache_sparse_columns.sql
@@ -0,0 +1,23 @@
+-- Tags: no-parallel
+
+DROP TABLE IF EXISTS t_cache_sparse;
+SYSTEM DROP QUERY CACHE;
+
+CREATE TABLE t_cache_sparse (id UInt64, v UInt64)
+ENGINE = MergeTree ORDER BY id
+SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9;
+
+SYSTEM STOP MERGES t_cache_sparse;
+
+INSERT INTO t_cache_sparse SELECT number, number FROM numbers(10000);
+INSERT INTO t_cache_sparse SELECT number, 0 FROM numbers(10000);
+
+SET allow_experimental_query_cache = 1;
+SET use_query_cache = 1;
+SET max_threads = 1;
+
+SELECT v FROM t_cache_sparse FORMAT Null;
+SELECT v FROM t_cache_sparse FORMAT Null;
+SELECT count() FROM system.query_cache WHERE query LIKE 'SELECT v FROM t_cache_sparse%';
+
+DROP TABLE t_cache_sparse;

From fae4906e112b12d67636028df13983e6a609915f Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Thu, 6 Apr 2023 12:07:00 -0400
Subject: [PATCH 176/277] Update docs/en/sql-reference/statements/undrop.md

Co-authored-by: Alexander Tokmakov <tavplubix@gmail.com>
---
 docs/en/sql-reference/statements/undrop.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md
index e56829e7151..89208b3aaba 100644
--- a/docs/en/sql-reference/statements/undrop.md
+++ b/docs/en/sql-reference/statements/undrop.md
@@ -8,7 +8,7 @@ sidebar_label: UNDROP
 Cancels the dropping of the table.
 
 Beginning with ClickHouse version 23.3 it is possible to UNDROP a table 
-within 8 minutes of issuing the DROP TABLE statement.  Dropped tables are listed in 
+within `database_atomic_delay_before_drop_table_sec` (8 minutes by default) of issuing the DROP TABLE statement.  Dropped tables are listed in 
 a system table called `system.dropped_tables`.
 
 :::note

From 28f86e7b841ec531f279c8c5d4943fcd118b19be Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Thu, 6 Apr 2023 12:07:53 -0400
Subject: [PATCH 177/277] Update docs/en/sql-reference/statements/undrop.md

Co-authored-by: Alexander Tokmakov <tavplubix@gmail.com>
---
 docs/en/sql-reference/statements/undrop.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md
index 89208b3aaba..7f42d92d923 100644
--- a/docs/en/sql-reference/statements/undrop.md
+++ b/docs/en/sql-reference/statements/undrop.md
@@ -25,7 +25,7 @@ Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md)
 Syntax:
 
 ``` sql
-UNDROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC]
+UNDROP TABLE [db.]name [UUID '<uuid>'] [ON CLUSTER cluster]
 ```
 
 **Example**

From 66af848f8250a636a1b37b3ef614d20ddebf81ad Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Thu, 6 Apr 2023 12:09:34 -0400
Subject: [PATCH 178/277] Update docs/en/sql-reference/statements/undrop.md

---
 docs/en/sql-reference/statements/undrop.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md
index 7f42d92d923..0822268f908 100644
--- a/docs/en/sql-reference/statements/undrop.md
+++ b/docs/en/sql-reference/statements/undrop.md
@@ -7,7 +7,7 @@ sidebar_label: UNDROP
 
 Cancels the dropping of the table.
 
-Beginning with ClickHouse version 23.3 it is possible to UNDROP a table 
+Beginning with ClickHouse version 23.3 it is possible to UNDROP a table in an Atomic database
 within `database_atomic_delay_before_drop_table_sec` (8 minutes by default) of issuing the DROP TABLE statement.  Dropped tables are listed in 
 a system table called `system.dropped_tables`.
 

From cbda8762b2a4a29debe9aadb9239c59961fb36c5 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Thu, 6 Apr 2023 12:18:09 -0400
Subject: [PATCH 179/277] add note about MV

---
 docs/en/sql-reference/statements/undrop.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md
index 0822268f908..40ac1ab4f99 100644
--- a/docs/en/sql-reference/statements/undrop.md
+++ b/docs/en/sql-reference/statements/undrop.md
@@ -11,6 +11,8 @@ Beginning with ClickHouse version 23.3 it is possible to UNDROP a table in an At
 within `database_atomic_delay_before_drop_table_sec` (8 minutes by default) of issuing the DROP TABLE statement.  Dropped tables are listed in 
 a system table called `system.dropped_tables`.
 
+If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view.
+
 :::note
 UNDROP TABLE is experimental.  To use it add this setting: 
 ```sql

From b6975d36e9e1eefbe9d8722d1bea6837a74e8e86 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Thu, 6 Apr 2023 19:05:32 +0200
Subject: [PATCH 180/277] Fix bytesSize() of zk SetRequest

---
 src/Common/ZooKeeper/IKeeper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h
index a94e367cd70..172714fe04f 100644
--- a/src/Common/ZooKeeper/IKeeper.h
+++ b/src/Common/ZooKeeper/IKeeper.h
@@ -273,7 +273,7 @@ struct SetRequest : virtual Request
     void addRootPath(const String & root_path) override;
     String getPath() const override { return path; }
 
-    size_t bytesSize() const override { return data.size() + data.size() + sizeof(version); }
+    size_t bytesSize() const override { return path.size() + data.size() + sizeof(version); }
 };
 
 struct SetResponse : virtual Response

From 4544abc7d6ae98d06b8fc11b1274fe949362641d Mon Sep 17 00:00:00 2001
From: ltrk2 <107155950+ltrk2@users.noreply.github.com>
Date: Thu, 6 Apr 2023 11:37:12 -0700
Subject: [PATCH 181/277] Remove dead code and unused dependencies

---
 src/Bridge/CMakeLists.txt       | 2 +-
 src/Bridge/IBridge.cpp          | 5 -----
 src/Dictionaries/CMakeLists.txt | 1 -
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/Bridge/CMakeLists.txt b/src/Bridge/CMakeLists.txt
index daf38bd6cbc..5f0e97fc630 100644
--- a/src/Bridge/CMakeLists.txt
+++ b/src/Bridge/CMakeLists.txt
@@ -2,4 +2,4 @@ add_library (bridge
     IBridge.cpp
 )
 
-target_link_libraries (bridge PRIVATE daemon dbms Poco::Data Poco::Data::ODBC)
+target_link_libraries (bridge PRIVATE daemon dbms)
diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp
index 1ea77573e5f..0f0efbd5eed 100644
--- a/src/Bridge/IBridge.cpp
+++ b/src/Bridge/IBridge.cpp
@@ -20,11 +20,6 @@
 
 #include "config.h"
 
-#if USE_ODBC
-#    include <Poco/Data/ODBC/Connector.h>
-#endif
-
-
 namespace DB
 {
 
diff --git a/src/Dictionaries/CMakeLists.txt b/src/Dictionaries/CMakeLists.txt
index 0260804ab5b..c9dd554a6f1 100644
--- a/src/Dictionaries/CMakeLists.txt
+++ b/src/Dictionaries/CMakeLists.txt
@@ -26,7 +26,6 @@ target_link_libraries(clickhouse_dictionaries
         clickhouse_common_io
         dbms
         Poco::Data
-        Poco::Data::ODBC
         Poco::MongoDB
         Poco::Redis
         string_utils

From 00c8e6e60d6f69ec7c9abd14f83dfad4711ce4a4 Mon Sep 17 00:00:00 2001
From: ltrk2 <107155950+ltrk2@users.noreply.github.com>
Date: Thu, 6 Apr 2023 11:39:33 -0700
Subject: [PATCH 182/277] Use std::string_view instead of strlen

---
 src/Functions/extractTextFromHTML.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp
index a15611579bb..c7ef97c7bee 100644
--- a/src/Functions/extractTextFromHTML.cpp
+++ b/src/Functions/extractTextFromHTML.cpp
@@ -70,16 +70,16 @@ namespace ErrorCodes
 namespace
 {
 
-inline bool startsWith(const char * s, const char * end, const char * prefix)
+bool startsWith(const char * s, const char * end, const std::string_view prefix)
 {
-    return s + strlen(prefix) < end && 0 == memcmp(s, prefix, strlen(prefix));
+    return s + prefix.length() < end && 0 == memcmp(s, prefix.data(), prefix.length());
 }
 
-inline bool checkAndSkip(const char * __restrict & s, const char * end, const char * prefix)
+inline bool checkAndSkip(const char * __restrict & s, const char * end, const std::string_view prefix)
 {
     if (startsWith(s, end, prefix))
     {
-        s += strlen(prefix);
+        s += prefix.length();
         return true;
     }
     return false;
@@ -138,7 +138,7 @@ bool processCDATA(const char * __restrict & src, const char * end, char * __rest
     return true;
 }
 
-bool processElementAndSkipContent(const char * __restrict & src, const char * end, const char * tag_name)
+bool processElementAndSkipContent(const char * __restrict & src, const char * end, const std::string_view tag_name)
 {
     const auto * old_src = src;
 

From 790e570af6c37507d21527c0b5ae0f67d8e474b8 Mon Sep 17 00:00:00 2001
From: ltrk2 <107155950+ltrk2@users.noreply.github.com>
Date: Thu, 6 Apr 2023 11:57:44 -0700
Subject: [PATCH 183/277] Use std::string::starts_with instead of a roll your
 own variant

---
 src/Common/mysqlxx/PoolFactory.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/Common/mysqlxx/PoolFactory.cpp b/src/Common/mysqlxx/PoolFactory.cpp
index 5fae934a400..9479273e36e 100644
--- a/src/Common/mysqlxx/PoolFactory.cpp
+++ b/src/Common/mysqlxx/PoolFactory.cpp
@@ -23,12 +23,6 @@ PoolWithFailover PoolFactory::get(const std::string & config_name, unsigned defa
     return get(Poco::Util::Application::instance().config(), config_name, default_connections, max_connections, max_tries);
 }
 
-/// Duplicate of code from StringUtils.h. Copied here for less dependencies.
-static bool startsWith(const std::string & s, const char * prefix)
-{
-    return s.size() >= strlen(prefix) && 0 == memcmp(s.data(), prefix, strlen(prefix));
-}
-
 static std::string getPoolEntryName(const Poco::Util::AbstractConfiguration & config,
         const std::string & config_name)
 {
@@ -55,7 +49,7 @@ static std::string getPoolEntryName(const Poco::Util::AbstractConfiguration & co
         for (const auto & replica_config_key : replica_keys)
         {
             /// There could be another elements in the same level in configuration file, like "user", "port"...
-            if (startsWith(replica_config_key, "replica"))
+            if (replica_config_key.starts_with("replica"))
             {
                 std::string replica_name = config_name + "." + replica_config_key;
                 std::string tmp_host = config.getString(replica_name + ".host", host);

From e8fb13518018e7dad2cc1881375c8578c158a62e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 6 Apr 2023 19:07:53 +0000
Subject: [PATCH 184/277] Reduce memory usage for multiple alter delete
 mutations.

---
 src/Interpreters/MutationsInterpreter.cpp     |  8 ++-
 .../02125_many_mutations_2.reference          |  4 ++
 .../0_stateless/02125_many_mutations_2.sh     | 51 +++++++++++++++++++
 3 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/02125_many_mutations_2.reference
 create mode 100755 tests/queries/0_stateless/02125_many_mutations_2.sh

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 0b52a1a51bc..26442f94007 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -38,6 +38,7 @@
 #include <Analyzer/QueryNode.h>
 #include <Analyzer/TableNode.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
+#include <Parsers/makeASTForLogicalFunction.h>
 
 
 namespace DB
@@ -965,10 +966,15 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
 
         ExpressionActionsChain & actions_chain = stage.expressions_chain;
 
-        for (const auto & ast : stage.filters)
+        if (!stage.filters.empty())
         {
+            auto ast = stage.filters.front();
+            if (stage.filters.size() > 1)
+                ast = makeASTForLogicalAnd(std::move(stage.filters));
+
             if (!actions_chain.steps.empty())
                 actions_chain.addStep();
+
             stage.analyzer->appendExpression(actions_chain, ast, dry_run);
             stage.filter_column_names.push_back(ast->getColumnName());
         }
diff --git a/tests/queries/0_stateless/02125_many_mutations_2.reference b/tests/queries/0_stateless/02125_many_mutations_2.reference
new file mode 100644
index 00000000000..4bdea51dfc1
--- /dev/null
+++ b/tests/queries/0_stateless/02125_many_mutations_2.reference
@@ -0,0 +1,4 @@
+2000
+20000
+0
+1000
diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh
new file mode 100755
index 00000000000..df170a402c6
--- /dev/null
+++ b/tests/queries/0_stateless/02125_many_mutations_2.sh
@@ -0,0 +1,51 @@
+#!/usr/bin/env bash
+# Tags: long, no-tsan, no-debug, no-asan, no-msan, no-ubsan
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x"
+$CLICKHOUSE_CLIENT -q "insert into many_mutations select number, number + 1 from numbers(2000)"
+$CLICKHOUSE_CLIENT -q "system stop merges many_mutations"
+
+$CLICKHOUSE_CLIENT -q "select count() from many_mutations"
+
+job()
+{
+   for i in {1..1000}
+   do
+      $CLICKHOUSE_CLIENT -q "alter table many_mutations delete where y = ${i} * 2 settings mutations_sync=0"
+   done
+}
+
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+job &
+
+wait
+
+$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done"
+$CLICKHOUSE_CLIENT -q "system start merges many_mutations"
+$CLICKHOUSE_CLIENT -q "optimize table many_mutations final"
+$CLICKHOUSE_CLIENT -q "system flush logs"
+$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done"
+$CLICKHOUSE_CLIENT -q "select count() from many_mutations"
+$CLICKHOUSE_CLIENT -q "select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9"

From 57255eb56c075c129cbedcc76082c3642560d737 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Thu, 6 Apr 2023 19:22:10 +0000
Subject: [PATCH 185/277] Fix equals.

---
 src/Storages/System/StorageSystemZooKeeper.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index ee33253e40f..c96c8ec60ae 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -333,11 +333,14 @@ static void extractPathImpl(const ActionsDAG::Node & node, Paths & res, ContextP
     }
     else if (function_name == "equals")
     {
-        if (!isPathNode(node.children.at(0)))
-            return;
+        const ActionsDAG::Node * value = nullptr;
 
-        auto value = node.children.at(1);
-        if (!value->column)
+        if (isPathNode(node.children.at(0)))
+            value = node.children.at(1);
+        else if (isPathNode(node.children.at(1)))
+            value = node.children.at(0);
+
+        if (!value || !value->column)
             return;
 
         if (!isString(removeNullable(removeLowCardinality(value->result_type))))

From b347464496c04f52218fa5fc6b56229c4da5e381 Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 6 Apr 2023 13:01:34 -0700
Subject: [PATCH 186/277] Update ClickHouse logo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 61d840ecd34..cd10c29b386 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/clickhouse-presentations/raw/master/images/logo-400x240.png)](https://clickhouse.com)
+[![ClickHouse — open source distributed column-oriented DBMS](https://clickhouse.com/images/clickhouse_gh_logo_400.png)](https://clickhouse.com)
 
 ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
 

From 4449e49e725c0a27b9059d3844f10cd8f9109da4 Mon Sep 17 00:00:00 2001
From: ltrk2 <107155950+ltrk2@users.noreply.github.com>
Date: Thu, 6 Apr 2023 13:28:30 -0700
Subject: [PATCH 187/277] Mark a function inline

---
 src/Functions/extractTextFromHTML.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp
index c7ef97c7bee..4eefeaa9f86 100644
--- a/src/Functions/extractTextFromHTML.cpp
+++ b/src/Functions/extractTextFromHTML.cpp
@@ -70,7 +70,7 @@ namespace ErrorCodes
 namespace
 {
 
-bool startsWith(const char * s, const char * end, const std::string_view prefix)
+inline bool startsWith(const char * s, const char * end, const std::string_view prefix)
 {
     return s + prefix.length() < end && 0 == memcmp(s, prefix.data(), prefix.length());
 }

From 09e8a321fc361df604818104e12eccea9be3869f Mon Sep 17 00:00:00 2001
From: Justin de Guzman <justin@justindeguzman.net>
Date: Thu, 6 Apr 2023 14:44:29 -0700
Subject: [PATCH 188/277] Change ClickHouse logo to dark mode

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cd10c29b386..cee3a945262 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-[![ClickHouse — open source distributed column-oriented DBMS](https://clickhouse.com/images/clickhouse_gh_logo_400.png)](https://clickhouse.com)
+[<img alt="ClickHouse — open source distributed column-oriented DBMS" width="400px" src="https://clickhouse.com/images/ch_gh_logo_rounded.png" />](https://clickhouse.com?utm_source=github)
 
 ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
 

From 2e139c21d25f883626e1dbd2a5b8202fabe9297e Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 6 Apr 2023 21:57:03 +0000
Subject: [PATCH 189/277] Parallel reading in FROM file()

---
 src/Processors/ResizeProcessor.h |  2 +-
 src/Storages/StorageFile.cpp     | 15 ++++++++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/Processors/ResizeProcessor.h b/src/Processors/ResizeProcessor.h
index 07d7149ebb4..766c39172a2 100644
--- a/src/Processors/ResizeProcessor.h
+++ b/src/Processors/ResizeProcessor.h
@@ -10,7 +10,7 @@ namespace DB
 /** Has arbitrary non zero number of inputs and arbitrary non zero number of outputs.
   * All of them have the same structure.
   *
-  * Pulls data from arbitrary input (whenever it is ready) and pushes it to arbitrary output (whenever is is not full).
+  * Pulls data from arbitrary input (whenever it is ready) and pushes it to arbitrary output (whenever it is not full).
   * Doesn't do any heavy calculations.
   * Doesn't preserve an order of data.
   *
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 5fd5664b9e6..84f030ce90e 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -40,6 +40,7 @@
 #include <Common/parseGlobs.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/ProfileEvents.h>
+#include "Processors/ResizeProcessor.h"
 
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@@ -700,7 +701,7 @@ Pipe StorageFile::read(
     ContextPtr context,
     QueryProcessingStage::Enum /*processed_stage*/,
     size_t max_block_size,
-    size_t num_streams)
+    const size_t max_num_streams)
 {
     if (use_table_fd)
     {
@@ -731,7 +732,8 @@ Pipe StorageFile::read(
 
     auto this_ptr = std::static_pointer_cast<StorageFile>(shared_from_this());
 
-    if (num_streams > paths.size())
+    size_t num_streams = max_num_streams;
+    if (max_num_streams > paths.size())
         num_streams = paths.size();
 
     Pipes pipes;
@@ -789,7 +791,14 @@ Pipe StorageFile::read(
             std::move(read_buffer)));
     }
 
-    return Pipe::unitePipes(std::move(pipes));
+    Pipe pipe = Pipe::unitePipes(std::move(pipes));
+    /// parallelize output as much as possible
+    if (num_streams < max_num_streams)
+    {
+        pipe.addTransform(std::make_shared<ResizeProcessor>(pipe.getHeader(), num_streams, max_num_streams));
+    }
+
+    return pipe;
 }
 
 

From 96213fa464f31a5d70683f438e2af5442533b742 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 6 Apr 2023 22:17:09 +0000
Subject: [PATCH 190/277] Fix header

---
 src/Storages/StorageFile.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 84f030ce90e..f8b5ea0ced7 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -34,13 +34,13 @@
 #include <Processors/Formats/ISchemaReader.h>
 #include <Processors/Sources/NullSource.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/ResizeProcessor.h>
 
 #include <Common/escapeForFileName.h>
 #include <Common/typeid_cast.h>
 #include <Common/parseGlobs.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/ProfileEvents.h>
-#include "Processors/ResizeProcessor.h"
 
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>

From 343a07179892efddc48488d6b211dad0fed866a8 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 6 Apr 2023 22:38:58 +0000
Subject: [PATCH 191/277] add `lost_part_count` column to `system.replicas`

---
 .../MergeTree/ReplicatedTableStatus.h         |  1 +
 src/Storages/StorageReplicatedMergeTree.cpp   | 24 ++++++++++++++++++-
 src/Storages/System/StorageSystemReplicas.cpp |  3 +++
 .../02117_show_create_table_system.reference  |  1 +
 4 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/ReplicatedTableStatus.h b/src/Storages/MergeTree/ReplicatedTableStatus.h
index b9f84091e9b..46e971f562a 100644
--- a/src/Storages/MergeTree/ReplicatedTableStatus.h
+++ b/src/Storages/MergeTree/ReplicatedTableStatus.h
@@ -25,6 +25,7 @@ struct ReplicatedTableStatus
     UInt64 absolute_delay;
     UInt8 total_replicas;
     UInt8 active_replicas;
+    UInt64 lost_part_count;
     String last_queue_update_exception;
     /// If the error has happened fetching the info from ZooKeeper, this field will be set.
     String zookeeper_exception;
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index cbfe3f8cab2..600168d2637 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -652,6 +652,8 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes()
     futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/alter_partition_version", String(), zkutil::CreateMode::Persistent));
     /// For deduplication of async inserts
     futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/async_blocks", String(), zkutil::CreateMode::Persistent));
+    /// To track "lost forever" parts count, just for `system.replicas` table
+    futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/lost_part_count", String(), zkutil::CreateMode::Persistent));
 
     /// As for now, "/temp" node must exist, but we want to be able to remove it in future
     if (zookeeper->exists(zookeeper_path + "/temp"))
@@ -5960,6 +5962,7 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit
     res.log_pointer = 0;
     res.total_replicas = 0;
     res.active_replicas = 0;
+    res.lost_part_count = 0;
     res.last_queue_update_exception = getLastQueueUpdateException();
 
     if (with_zk_fields && !res.is_session_expired)
@@ -5976,6 +5979,7 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit
 
             paths.clear();
             paths.push_back(fs::path(replica_path) / "log_pointer");
+            paths.push_back(fs::path(zookeeper_path) / "lost_part_count");
             for (const String & replica : all_replicas)
                 paths.push_back(fs::path(zookeeper_path) / "replicas" / replica / "is_active");
 
@@ -5993,10 +5997,14 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit
 
             res.log_pointer = log_pointer_str.empty() ? 0 : parse<UInt64>(log_pointer_str);
             res.total_replicas = all_replicas.size();
+            if (get_result[1].error == Coordination::Error::ZNONODE)
+                res.lost_part_count = 0;
+            else
+                res.lost_part_count = get_result[1].data.empty() ? 0 : parse<UInt64>(get_result[1].data);
 
             for (size_t i = 0, size = all_replicas.size(); i < size; ++i)
             {
-                bool is_replica_active = get_result[i + 1].error != Coordination::Error::ZNONODE;
+                bool is_replica_active = get_result[i + 2].error != Coordination::Error::ZNONODE;
                 res.active_replicas += static_cast<UInt8>(is_replica_active);
                 res.replica_is_active.emplace(all_replicas[i], is_replica_active);
             }
@@ -8862,6 +8870,20 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP
 
             getCommitPartOps(ops, new_data_part);
 
+            /// Increment lost_part_count
+            auto lost_part_count_path = fs::path(zookeeper_path) / "lost_part_count";
+            Coordination::Stat lost_part_count_stat;
+            String lost_part_count_str;
+            if (zookeeper->tryGet(lost_part_count_path, lost_part_count_str, &lost_part_count_stat))
+            {
+                UInt64 lost_part_count = lost_part_count_str.empty() ? 0 : parse<UInt64>(lost_part_count_str);
+                ops.emplace_back(zkutil::makeSetRequest(lost_part_count_path, fmt::format("{}", lost_part_count + 1), lost_part_count_stat.version));
+            }
+            else
+            {
+                ops.emplace_back(zkutil::makeCreateRequest(lost_part_count_path, "1", zkutil::CreateMode::Persistent));
+            }
+
             Coordination::Responses responses;
             if (auto code = zookeeper->tryMulti(ops, responses); code == Coordination::Error::ZOK)
             {
diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index 240d452fe29..1a09c8fb96f 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -59,6 +59,7 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_)
         { "absolute_delay",                       std::make_shared<DataTypeUInt64>()   },
         { "total_replicas",                       std::make_shared<DataTypeUInt8>()    },
         { "active_replicas",                      std::make_shared<DataTypeUInt8>()    },
+        { "lost_part_count",                      std::make_shared<DataTypeUInt64>()   },
         { "last_queue_update_exception",          std::make_shared<DataTypeString>()   },
         { "zookeeper_exception",                  std::make_shared<DataTypeString>()   },
         { "replica_is_active",                    std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), std::make_shared<DataTypeUInt8>()) }
@@ -112,6 +113,7 @@ Pipe StorageSystemReplicas::read(
             || column_name == "log_pointer"
             || column_name == "total_replicas"
             || column_name == "active_replicas"
+            || column_name == "lost_part_count"
             || column_name == "zookeeper_exception"
             || column_name == "replica_is_active")
         {
@@ -212,6 +214,7 @@ Pipe StorageSystemReplicas::read(
         res_columns[col_num++]->insert(status.absolute_delay);
         res_columns[col_num++]->insert(status.total_replicas);
         res_columns[col_num++]->insert(status.active_replicas);
+        res_columns[col_num++]->insert(status.lost_part_count);
         res_columns[col_num++]->insert(status.last_queue_update_exception);
         res_columns[col_num++]->insert(status.zookeeper_exception);
 
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index b07d6e01161..c3598788b2c 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -867,6 +867,7 @@ CREATE TABLE system.replicas
     `absolute_delay` UInt64,
     `total_replicas` UInt8,
     `active_replicas` UInt8,
+    `lost_part_count` UInt64,
     `last_queue_update_exception` String,
     `zookeeper_exception` String,
     `replica_is_active` Map(String, UInt8)

From 9976f17e12668346c37482e8acb7901e1d31f117 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 6 Apr 2023 22:54:00 +0000
Subject: [PATCH 192/277] add docs

---
 docs/en/operations/system-tables/replicas.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md
index e711d9a7784..15426eefbcc 100644
--- a/docs/en/operations/system-tables/replicas.md
+++ b/docs/en/operations/system-tables/replicas.md
@@ -50,6 +50,7 @@ last_queue_update:           2021-10-12 14:50:08
 absolute_delay:              99
 total_replicas:              5
 active_replicas:             5
+lost_part_count:             0
 last_queue_update_exception:
 zookeeper_exception:
 replica_is_active:           {'r1':1,'r2':1}
@@ -90,6 +91,7 @@ The next 4 columns have a non-zero value only where there is an active session w
 -   `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has.
 -   `total_replicas` (`UInt8`) - The total number of known replicas of this table.
 -   `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas).
+-   `lost_part_count` (`UInt64`) - The number of data parts lost in the table by all replicas in total since table creation. Value is persisted in ClickHouse Keeper and can only increase.
 -   `last_queue_update_exception` (`String`) - When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions.
 -   `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ClickHouse Keeper.
 -   `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active.

From 2d07704243fd8f740ec247c92e50553f95526a49 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 6 Apr 2023 23:50:23 +0000
Subject: [PATCH 193/277] fix zk tests

---
 .../0_stateless/02221_system_zookeeper_unrestricted.reference   | 2 ++
 .../02221_system_zookeeper_unrestricted_like.reference          | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference
index 60d3c78d740..53b44764d5c 100644
--- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference
+++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference
@@ -36,6 +36,8 @@ log
 log
 log_pointer
 log_pointer
+lost_part_count
+lost_part_count
 max_processed_insert_time
 max_processed_insert_time
 metadata
diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference
index c59be6a3af5..ccc3064ccbd 100644
--- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference
+++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference
@@ -17,6 +17,7 @@ leader_election
 leader_election-0
 log
 log_pointer
+lost_part_count
 max_processed_insert_time
 metadata
 metadata
@@ -58,6 +59,7 @@ leader_election
 leader_election-0
 log
 log_pointer
+lost_part_count
 max_processed_insert_time
 metadata
 metadata

From f3e3117d24e99d148f478a454335d7f84c021d8b Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 7 Apr 2023 07:41:05 +0000
Subject: [PATCH 194/277] Fix test

---
 .../02103_with_names_and_types_parallel_parsing.sh            | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh
index 487282099e2..a6e704093a2 100755
--- a/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh
+++ b/tests/queries/0_stateless/02103_with_names_and_types_parallel_parsing.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
 
 DATA_FILE=$USER_FILES_PATH/test_02103.data
 
@@ -14,7 +14,7 @@ FORMATS=('TSVWithNames' 'TSVWithNamesAndTypes' 'TSVRawWithNames' 'TSVRawWithName
 for format in "${FORMATS[@]}"
 do
     $CLICKHOUSE_CLIENT -q "SELECT number, range(number + 10) AS array, toString(number) AS string FROM numbers(10) FORMAT $format" > $DATA_FILE
-    $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40"
+    $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103.data', '$format', 'number UInt64, array Array(UInt64), string String') ORDER BY number SETTINGS input_format_parallel_parsing=1, min_chunk_bytes_for_parallel_parsing=40"
 done
 
 rm $DATA_FILE

From 36eabc57d25ff992cf6f1b74f6cafcbc8b76f3db Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 7 Apr 2023 07:49:19 +0000
Subject: [PATCH 195/277] Sync replicas after inserts

---
 tests/integration/test_alternative_keeper_config/test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integration/test_alternative_keeper_config/test.py b/tests/integration/test_alternative_keeper_config/test.py
index d2cfc4fe25e..2d59d2ee8b9 100644
--- a/tests/integration/test_alternative_keeper_config/test.py
+++ b/tests/integration/test_alternative_keeper_config/test.py
@@ -59,6 +59,8 @@ def test_create_insert(started_cluster):
     node2.query("INSERT INTO tbl VALUES (1, 'str1')")  # Test deduplication
     node3.query("INSERT INTO tbl VALUES (2, 'str2')")
 
+    node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'test_cluster' tbl")
+
     for node in [node1, node2, node3]:
         expected = [[1, "str1"], [2, "str2"]]
         assert node.query("SELECT * FROM tbl ORDER BY id") == TSV(expected)

From 67e633525060c58a1d067caa2deb8de2869985ff Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 7 Apr 2023 07:17:00 +0000
Subject: [PATCH 196/277] Fix nullptr to memcpy

---
 src/Common/SpaceSaving.h                                       | 3 +++
 .../queries/0_stateless/02710_topk_with_empty_array.reference  | 1 +
 tests/queries/0_stateless/02710_topk_with_empty_array.sql      | 1 +
 3 files changed, 5 insertions(+)
 create mode 100644 tests/queries/0_stateless/02710_topk_with_empty_array.reference
 create mode 100644 tests/queries/0_stateless/02710_topk_with_empty_array.sql

diff --git a/src/Common/SpaceSaving.h b/src/Common/SpaceSaving.h
index c83e836eb83..f5f66e41307 100644
--- a/src/Common/SpaceSaving.h
+++ b/src/Common/SpaceSaving.h
@@ -51,6 +51,9 @@ struct SpaceSavingArena<StringRef>
 {
     StringRef emplace(StringRef key)
     {
+        if (!key.data)
+            return key;
+
         return copyStringInArena(arena, key);
     }
 
diff --git a/tests/queries/0_stateless/02710_topk_with_empty_array.reference b/tests/queries/0_stateless/02710_topk_with_empty_array.reference
new file mode 100644
index 00000000000..17212447ad8
--- /dev/null
+++ b/tests/queries/0_stateless/02710_topk_with_empty_array.reference
@@ -0,0 +1 @@
+[[]]
diff --git a/tests/queries/0_stateless/02710_topk_with_empty_array.sql b/tests/queries/0_stateless/02710_topk_with_empty_array.sql
new file mode 100644
index 00000000000..7de066e9ae4
--- /dev/null
+++ b/tests/queries/0_stateless/02710_topk_with_empty_array.sql
@@ -0,0 +1 @@
+SELECT topK(emptyArrayInt16());

From 011480924a0e261f520803e7d609c4c52dece89b Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Thu, 6 Apr 2023 19:00:51 +0200
Subject: [PATCH 197/277] Use forward declaration of ThreadPool

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Access/DiskAccessStorage.cpp              | 12 ++--
 src/Access/DiskAccessStorage.h                |  4 +-
 src/Access/ReplicatedAccessStorage.cpp        |  8 ++-
 src/Access/ReplicatedAccessStorage.h          |  6 +-
 src/AggregateFunctions/IAggregateFunction.h   |  2 +-
 src/Backups/BackupCoordinationFileInfos.cpp   |  1 +
 src/Backups/BackupEntriesCollector.cpp        |  1 +
 src/Backups/BackupFileInfo.cpp                |  1 +
 src/Backups/BackupFileInfo.h                  |  3 +-
 src/Backups/BackupUtils.h                     |  1 -
 src/Backups/BackupsWorker.cpp                 | 19 +++---
 src/Backups/BackupsWorker.h                   |  7 +-
 src/Bridge/IBridge.cpp                        |  1 +
 src/Common/SystemLogBase.cpp                  | 13 ++--
 src/Common/SystemLogBase.h                    |  6 +-
 src/Common/ThreadPool.h                       |  1 +
 src/Common/ThreadPool_fwd.h                   | 13 ++++
 src/Coordination/Changelog.h                  |  1 +
 src/Coordination/KeeperStorage.cpp            |  1 +
 src/Core/BackgroundSchedulePool.cpp           |  5 +-
 src/Core/BackgroundSchedulePool.h             |  4 +-
 src/Databases/DatabaseFactory.h               |  2 +-
 src/Databases/IDatabase.h                     |  2 +-
 src/Disks/IDisk.h                             | 16 +++--
 src/Disks/IO/IOUringReader.cpp                |  9 +--
 src/Disks/IO/IOUringReader.h                  | 11 +++-
 src/Disks/IO/ThreadPoolReader.cpp             | 10 ++-
 src/Disks/IO/ThreadPoolReader.h               |  7 +-
 src/Disks/IO/ThreadPoolRemoteFSReader.cpp     | 11 +++-
 src/Disks/IO/ThreadPoolRemoteFSReader.h       |  6 +-
 src/Disks/ObjectStorages/IObjectStorage.h     |  2 +-
 src/Disks/VolumeJBOD.h                        |  1 +
 src/Functions/FunctionShowCertificate.h       |  1 +
 src/IO/BackupsIOThreadPool.cpp                |  1 +
 src/IO/BackupsIOThreadPool.h                  |  4 +-
 src/IO/IOThreadPool.cpp                       |  1 +
 src/IO/IOThreadPool.h                         |  4 +-
 src/IO/ParallelReadBuffer.h                   |  1 -
 src/IO/ReadBufferFromFileBase.cpp             |  1 +
 src/IO/WriteBufferFromS3.h                    |  1 -
 src/Interpreters/Context.h                    |  2 +-
 src/Interpreters/CrashLog.h                   |  1 +
 src/Interpreters/DDLWorker.cpp                | 12 ++--
 src/Interpreters/DDLWorker.h                  |  6 +-
 src/Interpreters/DatabaseCatalog.cpp          |  5 +-
 src/Interpreters/MetricLog.cpp                |  6 +-
 src/Interpreters/MetricLog.h                  |  3 +-
 src/Interpreters/OpenTelemetrySpanLog.h       |  1 +
 src/Interpreters/QueryThreadLog.h             |  7 +-
 src/Server/MySQLHandler.cpp                   |  1 +
 .../MergeTree/BackgroundJobsAssignee.h        |  1 -
 .../MergeTree/MergeTreeBackgroundExecutor.cpp | 66 +++++++++++++++++--
 .../MergeTree/MergeTreeBackgroundExecutor.h   | 49 ++------------
 src/Storages/MergeTree/MergeTreeMarksLoader.h |  2 +-
 .../MergeTree/MergeTreePrefetchedReadPool.cpp |  1 +
 .../MergeTree/MergeTreePrefetchedReadPool.h   |  3 +-
 src/Storages/StorageDictionary.h              |  1 +
 .../System/StorageSystemCertificates.cpp      |  3 +-
 58 files changed, 226 insertions(+), 145 deletions(-)
 create mode 100644 src/Common/ThreadPool_fwd.h

diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp
index ef88e8a225f..710cf257b95 100644
--- a/src/Access/DiskAccessStorage.cpp
+++ b/src/Access/DiskAccessStorage.cpp
@@ -10,6 +10,7 @@
 #include <Interpreters/Access/InterpreterCreateUserQuery.h>
 #include <Interpreters/Access/InterpreterShowGrantsQuery.h>
 #include <Common/logger_useful.h>
+#include <Common/ThreadPool.h>
 #include <Poco/JSON/JSON.h>
 #include <Poco/JSON/Object.h>
 #include <Poco/JSON/Stringifier.h>
@@ -19,6 +20,7 @@
 #include <base/range.h>
 #include <filesystem>
 #include <fstream>
+#include <memory>
 
 
 namespace DB
@@ -317,15 +319,15 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type)
         return; /// If the lists' writing thread is still waiting we can update `types_of_lists_to_write` easily,
                 /// without restarting that thread.
 
-    if (lists_writing_thread.joinable())
-        lists_writing_thread.join();
+    if (lists_writing_thread && lists_writing_thread->joinable())
+        lists_writing_thread->join();
 
     /// Create the 'need_rebuild_lists.mark' file.
     /// This file will be used later to find out if writing lists is successful or not.
     std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)};
     out.close();
 
-    lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
+    lists_writing_thread = std::make_unique<ThreadFromGlobalPool>(&DiskAccessStorage::listsWritingThreadFunc, this);
     lists_writing_thread_is_waiting = true;
 }
 
@@ -349,10 +351,10 @@ void DiskAccessStorage::listsWritingThreadFunc()
 
 void DiskAccessStorage::stopListsWritingThread()
 {
-    if (lists_writing_thread.joinable())
+    if (lists_writing_thread && lists_writing_thread->joinable())
     {
         lists_writing_thread_should_exit.notify_one();
-        lists_writing_thread.join();
+        lists_writing_thread->join();
     }
 }
 
diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h
index b1ef1d10ba7..069a966c8e9 100644
--- a/src/Access/DiskAccessStorage.h
+++ b/src/Access/DiskAccessStorage.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Access/MemoryAccessStorage.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <boost/container/flat_set.hpp>
 
 
@@ -81,7 +81,7 @@ private:
     bool failed_to_write_lists TSA_GUARDED_BY(mutex) = false;
 
     /// List files are written in a separate thread.
-    ThreadFromGlobalPool lists_writing_thread;
+    std::unique_ptr<ThreadFromGlobalPool> lists_writing_thread;
 
     /// Signals `lists_writing_thread` to exit.
     std::condition_variable lists_writing_thread_should_exit;
diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp
index ddc5e8bfed1..f34e6728ab3 100644
--- a/src/Access/ReplicatedAccessStorage.cpp
+++ b/src/Access/ReplicatedAccessStorage.cpp
@@ -1,3 +1,4 @@
+#include <memory>
 #include <Access/AccessEntityIO.h>
 #include <Access/MemoryAccessStorage.h>
 #include <Access/ReplicatedAccessStorage.h>
@@ -15,6 +16,7 @@
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/escapeForFileName.h>
 #include <Common/setThreadName.h>
+#include <Common/ThreadPool.h>
 #include <base/range.h>
 #include <base/sleep.h>
 #include <boost/range/algorithm_ext/erase.hpp>
@@ -72,7 +74,7 @@ void ReplicatedAccessStorage::startWatchingThread()
 {
     bool prev_watching_flag = watching.exchange(true);
     if (!prev_watching_flag)
-        watching_thread = ThreadFromGlobalPool(&ReplicatedAccessStorage::runWatchingThread, this);
+        watching_thread = std::make_unique<ThreadFromGlobalPool>(&ReplicatedAccessStorage::runWatchingThread, this);
 }
 
 void ReplicatedAccessStorage::stopWatchingThread()
@@ -81,8 +83,8 @@ void ReplicatedAccessStorage::stopWatchingThread()
     if (prev_watching_flag)
     {
         watched_queue->finish();
-        if (watching_thread.joinable())
-            watching_thread.join();
+        if (watching_thread && watching_thread->joinable())
+            watching_thread->join();
     }
 }
 
diff --git a/src/Access/ReplicatedAccessStorage.h b/src/Access/ReplicatedAccessStorage.h
index d9d4b628f8d..555d58e6b04 100644
--- a/src/Access/ReplicatedAccessStorage.h
+++ b/src/Access/ReplicatedAccessStorage.h
@@ -2,7 +2,7 @@
 
 #include <atomic>
 
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <Common/ZooKeeper/Common.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ConcurrentBoundedQueue.h>
@@ -21,7 +21,7 @@ public:
     static constexpr char STORAGE_TYPE[] = "replicated";
 
     ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_, bool allow_backup);
-    virtual ~ReplicatedAccessStorage() override;
+    ~ReplicatedAccessStorage() override;
 
     const char * getStorageType() const override { return STORAGE_TYPE; }
 
@@ -43,7 +43,7 @@ private:
     std::mutex cached_zookeeper_mutex;
 
     std::atomic<bool> watching = false;
-    ThreadFromGlobalPool watching_thread;
+    std::unique_ptr<ThreadFromGlobalPool> watching_thread;
     std::shared_ptr<ConcurrentBoundedQueue<UUID>> watched_queue;
 
     std::optional<UUID> insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h
index 4a050a58600..ddc0535d0e4 100644
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@@ -9,7 +9,7 @@
 #include <Interpreters/Context_fwd.h>
 #include <base/types.h>
 #include <Common/Exception.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <Core/IResolvedFunction.h>
 
 #include "config.h"
diff --git a/src/Backups/BackupCoordinationFileInfos.cpp b/src/Backups/BackupCoordinationFileInfos.cpp
index 44f00f6c543..eead742b510 100644
--- a/src/Backups/BackupCoordinationFileInfos.cpp
+++ b/src/Backups/BackupCoordinationFileInfos.cpp
@@ -1,5 +1,6 @@
 #include <Backups/BackupCoordinationFileInfos.h>
 #include <Common/quoteString.h>
+#include <Common/Exception.h>
 
 
 namespace DB
diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp
index 1adc4d41fee..ab836487ec0 100644
--- a/src/Backups/BackupEntriesCollector.cpp
+++ b/src/Backups/BackupEntriesCollector.cpp
@@ -15,6 +15,7 @@
 #include <base/sleep.h>
 #include <Common/escapeForFileName.h>
 #include <boost/range/algorithm/copy.hpp>
+#include <base/scope_guard.h>
 #include <filesystem>
 
 namespace fs = std::filesystem;
diff --git a/src/Backups/BackupFileInfo.cpp b/src/Backups/BackupFileInfo.cpp
index 24548ca05fe..5a3076d1647 100644
--- a/src/Backups/BackupFileInfo.cpp
+++ b/src/Backups/BackupFileInfo.cpp
@@ -6,6 +6,7 @@
 #include <Common/logger_useful.h>
 #include <Common/scope_guard_safe.h>
 #include <Common/setThreadName.h>
+#include <Common/ThreadPool.h>
 #include <IO/HashingReadBuffer.h>
 
 
diff --git a/src/Backups/BackupFileInfo.h b/src/Backups/BackupFileInfo.h
index 96df8ab2e0b..ae6ec83a37b 100644
--- a/src/Backups/BackupFileInfo.h
+++ b/src/Backups/BackupFileInfo.h
@@ -1,8 +1,9 @@
 #pragma once
 
 #include <Core/Types.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 
+namespace Poco { class Logger; }
 
 namespace DB
 {
diff --git a/src/Backups/BackupUtils.h b/src/Backups/BackupUtils.h
index f451b003652..3dc0a58d304 100644
--- a/src/Backups/BackupUtils.h
+++ b/src/Backups/BackupUtils.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <Parsers/ASTBackupQuery.h>
-#include <Common/ThreadPool.h>
 
 
 namespace DB
diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp
index 58f0b3effc5..4b17174a8de 100644
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@@ -23,6 +23,7 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/setThreadName.h>
 #include <Common/scope_guard_safe.h>
+#include <Common/ThreadPool.h>
 
 
 namespace CurrentMetrics
@@ -182,8 +183,8 @@ namespace
 
 
 BackupsWorker::BackupsWorker(size_t num_backup_threads, size_t num_restore_threads, bool allow_concurrent_backups_, bool allow_concurrent_restores_)
-    : backups_thread_pool(CurrentMetrics::BackupsThreads, CurrentMetrics::BackupsThreadsActive, num_backup_threads, /* max_free_threads = */ 0, num_backup_threads)
-    , restores_thread_pool(CurrentMetrics::RestoreThreads, CurrentMetrics::RestoreThreadsActive, num_restore_threads, /* max_free_threads = */ 0, num_restore_threads)
+    : backups_thread_pool(std::make_unique<ThreadPool>(CurrentMetrics::BackupsThreads, CurrentMetrics::BackupsThreadsActive, num_backup_threads, /* max_free_threads = */ 0, num_backup_threads))
+    , restores_thread_pool(std::make_unique<ThreadPool>(CurrentMetrics::RestoreThreads, CurrentMetrics::RestoreThreadsActive, num_restore_threads, /* max_free_threads = */ 0, num_restore_threads))
     , log(&Poco::Logger::get("BackupsWorker"))
     , allow_concurrent_backups(allow_concurrent_backups_)
     , allow_concurrent_restores(allow_concurrent_restores_)
@@ -248,7 +249,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
 
         if (backup_settings.async)
         {
-            backups_thread_pool.scheduleOrThrowOnError(
+            backups_thread_pool->scheduleOrThrowOnError(
                 [this, backup_query, backup_id, backup_name_for_logging, backup_info, backup_settings, backup_coordination, context_in_use, mutable_context]
                 {
                     doBackup(
@@ -435,7 +436,7 @@ void BackupsWorker::buildFileInfosForBackupEntries(const BackupPtr & backup, con
     LOG_TRACE(log, "{}", Stage::BUILDING_FILE_INFOS);
     backup_coordination->setStage(Stage::BUILDING_FILE_INFOS, "");
     backup_coordination->waitForStage(Stage::BUILDING_FILE_INFOS);
-    backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), backups_thread_pool));
+    backup_coordination->addFileInfos(::DB::buildFileInfosForBackupEntries(backup_entries, backup->getBaseBackup(), *backups_thread_pool));
 }
 
 
@@ -522,7 +523,7 @@ void BackupsWorker::writeBackupEntries(BackupMutablePtr backup, BackupEntries &&
             }
         };
 
-        if (always_single_threaded || !backups_thread_pool.trySchedule([job] { job(true); }))
+        if (always_single_threaded || !backups_thread_pool->trySchedule([job] { job(true); }))
             job(false);
     }
 
@@ -581,7 +582,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
 
         if (restore_settings.async)
         {
-            restores_thread_pool.scheduleOrThrowOnError(
+            restores_thread_pool->scheduleOrThrowOnError(
                 [this, restore_query, restore_id, backup_name_for_logging, backup_info, restore_settings, restore_coordination, context_in_use]
                 {
                     doRestore(
@@ -716,7 +717,7 @@ void BackupsWorker::doRestore(
             }
 
             /// Execute the data restoring tasks.
-            restoreTablesData(restore_id, backup, std::move(data_restore_tasks), restores_thread_pool);
+            restoreTablesData(restore_id, backup, std::move(data_restore_tasks), *restores_thread_pool);
 
             /// We have restored everything, we need to tell other hosts (they could be waiting for it).
             restore_coordination->setStage(Stage::COMPLETED, "");
@@ -941,8 +942,8 @@ void BackupsWorker::shutdown()
     if (has_active_backups_and_restores)
         LOG_INFO(log, "Waiting for {} backups and {} restores to be finished", num_active_backups, num_active_restores);
 
-    backups_thread_pool.wait();
-    restores_thread_pool.wait();
+    backups_thread_pool->wait();
+    restores_thread_pool->wait();
 
     if (has_active_backups_and_restores)
         LOG_INFO(log, "All backup and restore tasks have finished");
diff --git a/src/Backups/BackupsWorker.h b/src/Backups/BackupsWorker.h
index d319daf42bd..cbfadc24b7b 100644
--- a/src/Backups/BackupsWorker.h
+++ b/src/Backups/BackupsWorker.h
@@ -1,7 +1,8 @@
 #pragma once
 
 #include <Backups/BackupStatus.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
+#include <Interpreters/Context_fwd.h>
 #include <Core/UUID.h>
 #include <Parsers/IAST_fwd.h>
 #include <unordered_map>
@@ -132,8 +133,8 @@ private:
     void setNumFilesAndSize(const OperationID & id, size_t num_files, UInt64 total_size, size_t num_entries,
                             UInt64 uncompressed_size, UInt64 compressed_size, size_t num_read_files, UInt64 num_read_bytes);
 
-    ThreadPool backups_thread_pool;
-    ThreadPool restores_thread_pool;
+    std::unique_ptr<ThreadPool> backups_thread_pool;
+    std::unique_ptr<ThreadPool> restores_thread_pool;
 
     std::unordered_map<OperationID, Info> infos;
     std::condition_variable status_changed;
diff --git a/src/Bridge/IBridge.cpp b/src/Bridge/IBridge.cpp
index 1ea77573e5f..fce705dee5f 100644
--- a/src/Bridge/IBridge.cpp
+++ b/src/Bridge/IBridge.cpp
@@ -14,6 +14,7 @@
 #include <Server/HTTP/HTTPServer.h>
 #include <base/errnoToString.h>
 #include <base/range.h>
+#include <base/scope_guard.h>
 
 #include <sys/time.h>
 #include <sys/resource.h>
diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp
index 13150194df2..86adcbbd31b 100644
--- a/src/Common/SystemLogBase.cpp
+++ b/src/Common/SystemLogBase.cpp
@@ -18,6 +18,7 @@
 
 #include <Common/MemoryTrackerBlockerInThread.h>
 #include <Common/SystemLogBase.h>
+#include <Common/ThreadPool.h>
 
 #include <Common/logger_useful.h>
 #include <base/scope_guard.h>
@@ -35,20 +36,18 @@ namespace
     constexpr size_t DBMS_SYSTEM_LOG_QUEUE_SIZE = 1048576;
 }
 
+ISystemLog::~ISystemLog() = default;
+
 void ISystemLog::stopFlushThread()
 {
     {
         std::lock_guard lock(mutex);
 
-        if (!saving_thread.joinable())
-        {
+        if (!saving_thread || !saving_thread->joinable())
             return;
-        }
 
         if (is_shutdown)
-        {
             return;
-        }
 
         is_shutdown = true;
 
@@ -56,13 +55,13 @@ void ISystemLog::stopFlushThread()
         flush_event.notify_all();
     }
 
-    saving_thread.join();
+    saving_thread->join();
 }
 
 void ISystemLog::startup()
 {
     std::lock_guard lock(mutex);
-    saving_thread = ThreadFromGlobalPool([this] { savingThreadFunction(); });
+    saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
 }
 
 static thread_local bool recursive_add_call = false;
diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h
index c2cedb2ae39..8ac731c34f7 100644
--- a/src/Common/SystemLogBase.h
+++ b/src/Common/SystemLogBase.h
@@ -10,7 +10,7 @@
 #include <Interpreters/Context_fwd.h>
 #include <Parsers/IAST_fwd.h>
 #include <Storages/IStorage_fwd.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 
 #define SYSTEM_LOG_ELEMENTS(M) \
     M(AsynchronousMetricLogElement) \
@@ -60,12 +60,12 @@ public:
     /// Stop the background flush thread before destructor. No more data will be written.
     virtual void shutdown() = 0;
 
-    virtual ~ISystemLog() = default;
+    virtual ~ISystemLog();
 
     virtual void savingThreadFunction() = 0;
 
 protected:
-    ThreadFromGlobalPool saving_thread;
+    std::unique_ptr<ThreadFromGlobalPool> saving_thread;
 
     /// Data shared between callers of add()/flush()/shutdown(), and the saving thread
     std::mutex mutex;
diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h
index b2f77f9693c..68023c8a410 100644
--- a/src/Common/ThreadPool.h
+++ b/src/Common/ThreadPool.h
@@ -17,6 +17,7 @@
 #include <Common/ThreadStatus.h>
 #include <Common/OpenTelemetryTraceContext.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/ThreadPool_fwd.h>
 #include <base/scope_guard.h>
 
 /** Very simple thread pool similar to boost::threadpool.
diff --git a/src/Common/ThreadPool_fwd.h b/src/Common/ThreadPool_fwd.h
new file mode 100644
index 00000000000..2782acc9c51
--- /dev/null
+++ b/src/Common/ThreadPool_fwd.h
@@ -0,0 +1,13 @@
+#pragma once
+
+template <typename Thread>
+class ThreadPoolImpl;
+
+template <bool propagate_opentelemetry_context>
+class ThreadFromGlobalPoolImpl;
+
+using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl<false>;
+
+using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl<true>;
+
+using ThreadPool = ThreadPoolImpl<ThreadFromGlobalPoolNoTracingContextPropagation>;
diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h
index 288f71bb915..56b0475ba8b 100644
--- a/src/Coordination/Changelog.h
+++ b/src/Coordination/Changelog.h
@@ -10,6 +10,7 @@
 #include <libnuraft/nuraft.hxx>
 #include <libnuraft/raft_server.hxx>
 #include <Common/ConcurrentBoundedQueue.h>
+#include <Common/ThreadPool.h>
 
 namespace DB
 {
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index 41a6af54204..dc6c05e2594 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -12,6 +12,7 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/ZooKeeper/IKeeper.h>
 #include <base/hex.h>
+#include <base/scope_guard.h>
 #include <Common/logger_useful.h>
 #include <Common/setThreadName.h>
 #include <Common/LockMemoryExceptionInThread.h>
diff --git a/src/Core/BackgroundSchedulePool.cpp b/src/Core/BackgroundSchedulePool.cpp
index 5384ee7f961..39724ec07fa 100644
--- a/src/Core/BackgroundSchedulePool.cpp
+++ b/src/Core/BackgroundSchedulePool.cpp
@@ -4,6 +4,7 @@
 #include <Common/Stopwatch.h>
 #include <Common/CurrentThread.h>
 #include <Common/logger_useful.h>
+#include <Common/ThreadPool.h>
 #include <chrono>
 
 
@@ -160,7 +161,7 @@ BackgroundSchedulePool::BackgroundSchedulePool(size_t size_, CurrentMetrics::Met
     for (auto & thread : threads)
         thread = ThreadFromGlobalPoolNoTracingContextPropagation([this] { threadFunction(); });
 
-    delayed_thread = ThreadFromGlobalPoolNoTracingContextPropagation([this] { delayExecutionThreadFunction(); });
+    delayed_thread = std::make_unique<ThreadFromGlobalPoolNoTracingContextPropagation>([this] { delayExecutionThreadFunction(); });
 }
 
 
@@ -198,7 +199,7 @@ BackgroundSchedulePool::~BackgroundSchedulePool()
         delayed_tasks_cond_var.notify_all();
 
         LOG_TRACE(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Waiting for threads to finish.");
-        delayed_thread.join();
+        delayed_thread->join();
 
         for (auto & thread : threads)
             thread.join();
diff --git a/src/Core/BackgroundSchedulePool.h b/src/Core/BackgroundSchedulePool.h
index ef6fbfa68e9..e97b02e976f 100644
--- a/src/Core/BackgroundSchedulePool.h
+++ b/src/Core/BackgroundSchedulePool.h
@@ -14,7 +14,7 @@
 #include <Common/ZooKeeper/Types.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/CurrentThread.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <base/scope_guard.h>
 
 
@@ -86,7 +86,7 @@ private:
     std::condition_variable delayed_tasks_cond_var;
     std::mutex delayed_tasks_mutex;
     /// Thread waiting for next delayed task.
-    ThreadFromGlobalPoolNoTracingContextPropagation delayed_thread;
+    std::unique_ptr<ThreadFromGlobalPoolNoTracingContextPropagation> delayed_thread;
     /// Tasks ordered by scheduled time.
     DelayedTasks delayed_tasks;
 
diff --git a/src/Databases/DatabaseFactory.h b/src/Databases/DatabaseFactory.h
index 8992ea27093..cb631cd76d0 100644
--- a/src/Databases/DatabaseFactory.h
+++ b/src/Databases/DatabaseFactory.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Common/ThreadPool.h>
+#include <Interpreters/Context_fwd.h>
 #include <Databases/IDatabase.h>
 
 namespace DB
diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h
index b8880c4c4cc..53a2f372814 100644
--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@@ -7,7 +7,7 @@
 #include <Storages/IStorage_fwd.h>
 #include <base/types.h>
 #include <Common/Exception.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <QueryPipeline/BlockIO.h>
 
 #include <ctime>
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 4e488bbb39a..797235b5fb8 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -254,8 +254,8 @@ public:
     virtual NameSet getCacheLayersNames() const
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                        "Method `getCacheLayersNames()` is not implemented for disk: {}",
-                        getDataSourceDescription().type);
+            "Method `getCacheLayersNames()` is not implemented for disk: {}",
+            toString(getDataSourceDescription().type));
     }
 
     /// Returns a list of storage objects (contains path, size, ...).
@@ -263,7 +263,9 @@ public:
     /// be multiple files in remote fs for single clickhouse file.
     virtual StoredObjects getStorageObjects(const String &) const
     {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getStorageObjects() not implemented for disk: {}`", getDataSourceDescription().type);
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+            "Method `getStorageObjects()` not implemented for disk: {}",
+            toString(getDataSourceDescription().type));
     }
 
     /// For one local path there might be multiple remote paths in case of Log family engines.
@@ -281,8 +283,8 @@ public:
     virtual void getRemotePathsRecursive(const String &, std::vector<LocalPathWithObjectStoragePaths> &)
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                        "Method `getRemotePathsRecursive() not implemented for disk: {}`",
-                        getDataSourceDescription().type);
+            "Method `getRemotePathsRecursive() not implemented for disk: {}`",
+            toString(getDataSourceDescription().type));
     }
 
     /// Batch request to remove multiple files.
@@ -398,7 +400,7 @@ public:
         throw Exception(
             ErrorCodes::NOT_IMPLEMENTED,
             "Method getObjectStorage() is not implemented for disk type: {}",
-            getDataSourceDescription().type);
+            toString(getDataSourceDescription().type));
     }
 
     /// Create disk object storage according to disk type.
@@ -409,7 +411,7 @@ public:
         throw Exception(
             ErrorCodes::NOT_IMPLEMENTED,
             "Method createDiskObjectStorage() is not implemented for disk type: {}",
-            getDataSourceDescription().type);
+            toString(getDataSourceDescription().type));
     }
 
     virtual bool supportsStat() const { return false; }
diff --git a/src/Disks/IO/IOUringReader.cpp b/src/Disks/IO/IOUringReader.cpp
index 7bf1982d515..7b68e0ee2de 100644
--- a/src/Disks/IO/IOUringReader.cpp
+++ b/src/Disks/IO/IOUringReader.cpp
@@ -1,15 +1,16 @@
 #include "IOUringReader.h"
+#include <memory>
 
 #if USE_LIBURING
 
 #include <base/errnoToString.h>
 #include <Common/assert_cast.h>
-#include <Common/Exception.h>
 #include <Common/MemorySanitizer.h>
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Stopwatch.h>
 #include <Common/setThreadName.h>
+#include <Common/ThreadPool.h>
 #include <Common/logger_useful.h>
 #include <future>
 
@@ -44,7 +45,7 @@ namespace ErrorCodes
 }
 
 IOUringReader::IOUringReader(uint32_t entries_)
- : log(&Poco::Logger::get("IOUringReader"))
+    : log(&Poco::Logger::get("IOUringReader"))
 {
     struct io_uring_probe * probe = io_uring_get_probe();
     if (!probe)
@@ -70,7 +71,7 @@ IOUringReader::IOUringReader(uint32_t entries_)
         throwFromErrno("Failed initializing io_uring", ErrorCodes::IO_URING_INIT_FAILED, -ret);
 
     cq_entries = params.cq_entries;
-    ring_completion_monitor = ThreadFromGlobalPool([this] { monitorRing(); });
+    ring_completion_monitor = std::make_unique<ThreadFromGlobalPool>([this] { monitorRing(); });
 }
 
 std::future<IAsynchronousReader::Result> IOUringReader::submit(Request request)
@@ -333,7 +334,7 @@ IOUringReader::~IOUringReader()
         io_uring_submit(&ring);
     }
 
-    ring_completion_monitor.join();
+    ring_completion_monitor->join();
 
     io_uring_queue_exit(&ring);
 }
diff --git a/src/Disks/IO/IOUringReader.h b/src/Disks/IO/IOUringReader.h
index e3fcf116448..9b80ac6e5e0 100644
--- a/src/Disks/IO/IOUringReader.h
+++ b/src/Disks/IO/IOUringReader.h
@@ -4,15 +4,20 @@
 
 #if USE_LIBURING
 
-#include <Common/ThreadPool.h>
+#include <Common/Exception.h>
+#include <Common/ThreadPool_fwd.h>
 #include <IO/AsynchronousReader.h>
 #include <deque>
 #include <unordered_map>
 #include <liburing.h>
 
+namespace Poco { class Logger; }
+
 namespace DB
 {
 
+class Exception;
+
 /** Perform reads using the io_uring Linux subsystem.
   *
   * The class sets up a single io_uring that clients submit read requests to, they are
@@ -30,7 +35,7 @@ private:
     uint32_t cq_entries;
 
     std::atomic<bool> cancelled{false};
-    ThreadFromGlobalPool ring_completion_monitor;
+    std::unique_ptr<ThreadFromGlobalPool> ring_completion_monitor;
 
     struct EnqueuedRequest
     {
@@ -74,7 +79,7 @@ public:
 
     void wait() override {}
 
-    virtual ~IOUringReader() override;
+    ~IOUringReader() override;
 };
 
 }
diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp
index 3a071d13122..de57fa157da 100644
--- a/src/Disks/IO/ThreadPoolReader.cpp
+++ b/src/Disks/IO/ThreadPoolReader.cpp
@@ -8,6 +8,7 @@
 #include <Common/setThreadName.h>
 #include <Common/MemorySanitizer.h>
 #include <Common/CurrentThread.h>
+#include <Common/ThreadPool.h>
 #include <Poco/Environment.h>
 #include <base/errnoToString.h>
 #include <Poco/Event.h>
@@ -87,7 +88,7 @@ static bool hasBugInPreadV2()
 #endif
 
 ThreadPoolReader::ThreadPoolReader(size_t pool_size, size_t queue_size_)
-    : pool(CurrentMetrics::ThreadPoolFSReaderThreads, CurrentMetrics::ThreadPoolFSReaderThreadsActive, pool_size, pool_size, queue_size_)
+    : pool(std::make_unique<ThreadPool>(CurrentMetrics::ThreadPoolFSReaderThreads, CurrentMetrics::ThreadPoolFSReaderThreadsActive, pool_size, pool_size, queue_size_))
 {
 }
 
@@ -200,7 +201,7 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
 
     ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMiss);
 
-    auto schedule = threadPoolCallbackRunner<Result>(pool, "ThreadPoolRead");
+    auto schedule = threadPoolCallbackRunner<Result>(*pool, "ThreadPoolRead");
 
     return schedule([request, fd]() -> Result
     {
@@ -244,4 +245,9 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
     }, request.priority);
 }
 
+void ThreadPoolReader::wait()
+{
+    pool->wait();
+}
+
 }
diff --git a/src/Disks/IO/ThreadPoolReader.h b/src/Disks/IO/ThreadPoolReader.h
index dc754e0a81c..4c55be29bf9 100644
--- a/src/Disks/IO/ThreadPoolReader.h
+++ b/src/Disks/IO/ThreadPoolReader.h
@@ -1,7 +1,8 @@
 #pragma once
 
+#include <memory>
 #include <IO/AsynchronousReader.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
 
 
@@ -28,14 +29,14 @@ namespace DB
 class ThreadPoolReader final : public IAsynchronousReader
 {
 private:
-    ThreadPool pool;
+    std::unique_ptr<ThreadPool> pool;
 
 public:
     ThreadPoolReader(size_t pool_size, size_t queue_size_);
 
     std::future<Result> submit(Request request) override;
 
-    void wait() override { pool.wait(); }
+    void wait() override;
 
     /// pool automatically waits for all tasks in destructor.
 };
diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
index 1980f57c876..4d0f39357ab 100644
--- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
+++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
@@ -1,6 +1,7 @@
 #include "ThreadPoolRemoteFSReader.h"
 
 #include "config.h"
+#include <Common/ThreadPool_fwd.h>
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentMetrics.h>
@@ -14,6 +15,7 @@
 #include <base/getThreadId.h>
 
 #include <future>
+#include <memory>
 
 
 namespace ProfileEvents
@@ -62,7 +64,7 @@ IAsynchronousReader::Result RemoteFSFileDescriptor::readInto(char * data, size_t
 
 
 ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_)
-    : pool(CurrentMetrics::ThreadPoolRemoteFSReaderThreads, CurrentMetrics::ThreadPoolRemoteFSReaderThreadsActive, pool_size, pool_size, queue_size_)
+    : pool(std::make_unique<ThreadPool>(CurrentMetrics::ThreadPoolRemoteFSReaderThreads, CurrentMetrics::ThreadPoolRemoteFSReaderThreadsActive, pool_size, pool_size, queue_size_))
 {
 }
 
@@ -92,7 +94,12 @@ std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Reques
         ProfileEvents::increment(ProfileEvents::ThreadpoolReaderReadBytes, result.size);
 
         return Result{ .size = result.size, .offset = result.offset, .execution_watch = std::move(watch) };
-    }, pool, "VFSRead", request.priority);
+    }, *pool, "VFSRead", request.priority);
+}
+
+void ThreadPoolRemoteFSReader::wait()
+{
+    pool->wait();
 }
 
 }
diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h
index cd289150ba1..3a765993292 100644
--- a/src/Disks/IO/ThreadPoolRemoteFSReader.h
+++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h
@@ -2,7 +2,7 @@
 
 #include <IO/AsynchronousReader.h>
 #include <IO/ReadBuffer.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
 
 namespace DB
@@ -15,10 +15,10 @@ public:
 
     std::future<IAsynchronousReader::Result> submit(Request request) override;
 
-    void wait() override { pool.wait(); }
+    void wait() override;
 
 private:
-    ThreadPool pool;
+    std::unique_ptr<ThreadPool> pool;
 };
 
 class RemoteFSFileDescriptor : public IAsynchronousReader::IFileDescriptor
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 2cfb4d43a43..2f27dc18e4b 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -16,7 +16,7 @@
 #include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h>
 #include <Disks/ObjectStorages/StoredObject.h>
 #include <Disks/DiskType.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <Disks/WriteMode.h>
 
 
diff --git a/src/Disks/VolumeJBOD.h b/src/Disks/VolumeJBOD.h
index 81da64c488d..ef6f215bf18 100644
--- a/src/Disks/VolumeJBOD.h
+++ b/src/Disks/VolumeJBOD.h
@@ -2,6 +2,7 @@
 
 #include <memory>
 #include <optional>
+#include <queue>
 
 #include <Disks/IVolume.h>
 
diff --git a/src/Functions/FunctionShowCertificate.h b/src/Functions/FunctionShowCertificate.h
index 3c30d8138e5..5061a198614 100644
--- a/src/Functions/FunctionShowCertificate.h
+++ b/src/Functions/FunctionShowCertificate.h
@@ -15,6 +15,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
 #include <Interpreters/Context.h>
+#include <base/scope_guard.h>
 
 #if USE_SSL
     #include <openssl/x509v3.h>
diff --git a/src/IO/BackupsIOThreadPool.cpp b/src/IO/BackupsIOThreadPool.cpp
index 0829553945a..e135ef66ffb 100644
--- a/src/IO/BackupsIOThreadPool.cpp
+++ b/src/IO/BackupsIOThreadPool.cpp
@@ -1,5 +1,6 @@
 #include <IO/BackupsIOThreadPool.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/ThreadPool.h>
 #include <Core/Field.h>
 
 namespace CurrentMetrics
diff --git a/src/IO/BackupsIOThreadPool.h b/src/IO/BackupsIOThreadPool.h
index f5aae5741a8..745bf267300 100644
--- a/src/IO/BackupsIOThreadPool.h
+++ b/src/IO/BackupsIOThreadPool.h
@@ -1,6 +1,8 @@
 #pragma once
 
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
+#include <cstdlib>
+#include <memory>
 
 namespace DB
 {
diff --git a/src/IO/IOThreadPool.cpp b/src/IO/IOThreadPool.cpp
index 98bb6ffe6a7..6765deff4d4 100644
--- a/src/IO/IOThreadPool.cpp
+++ b/src/IO/IOThreadPool.cpp
@@ -1,5 +1,6 @@
 #include <IO/IOThreadPool.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/ThreadPool.h>
 #include <Core/Field.h>
 
 namespace CurrentMetrics
diff --git a/src/IO/IOThreadPool.h b/src/IO/IOThreadPool.h
index 4fcf99b6048..cfe755ed45a 100644
--- a/src/IO/IOThreadPool.h
+++ b/src/IO/IOThreadPool.h
@@ -1,6 +1,8 @@
 #pragma once
 
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
+#include <cstdlib>
+#include <memory>
 
 namespace DB
 {
diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h
index d6e9b7989ad..a7b78be442c 100644
--- a/src/IO/ParallelReadBuffer.h
+++ b/src/IO/ParallelReadBuffer.h
@@ -5,7 +5,6 @@
 #include <IO/SeekableReadBuffer.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
 #include <Common/ArenaWithFreeLists.h>
-#include <Common/ThreadPool.h>
 
 namespace DB
 {
diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp
index d94cf12294b..4181615bc52 100644
--- a/src/IO/ReadBufferFromFileBase.cpp
+++ b/src/IO/ReadBufferFromFileBase.cpp
@@ -1,4 +1,5 @@
 #include <IO/ReadBufferFromFileBase.h>
+#include <IO/Progress.h>
 #include <Interpreters/Context.h>
 
 namespace DB
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index e56d590c57a..7c3c439f9c3 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -10,7 +10,6 @@
 
 #include <base/types.h>
 #include <Common/logger_useful.h>
-#include <Common/ThreadPool.h>
 #include <IO/BufferWithOwnMemory.h>
 #include <IO/WriteBuffer.h>
 #include <IO/WriteSettings.h>
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index d5ade8c02c7..73c16a35efd 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -5,7 +5,7 @@
 #include <Common/MultiVersion.h>
 #include <Common/OpenTelemetryTraceContext.h>
 #include <Common/RemoteHostFilter.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <Common/Throttler_fwd.h>
 #include <Core/Block.h>
 #include <Core/NamesAndTypes.h>
diff --git a/src/Interpreters/CrashLog.h b/src/Interpreters/CrashLog.h
index f6061355562..78794574c82 100644
--- a/src/Interpreters/CrashLog.h
+++ b/src/Interpreters/CrashLog.h
@@ -3,6 +3,7 @@
 #include <Interpreters/SystemLog.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/NamesAndAliases.h>
+#include <Core/Field.h>
 
 
 /// Call this function on crash.
diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp
index c4529af2c51..7c021bd82e6 100644
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@@ -31,9 +31,11 @@
 #include <base/getFQDNOrHostName.h>
 #include <Common/logger_useful.h>
 #include <base/sort.h>
+#include <memory>
 #include <random>
 #include <pcg_random.hpp>
 #include <Common/scope_guard_safe.h>
+#include <Common/ThreadPool.h>
 
 #include <Interpreters/ZooKeeperLog.h>
 
@@ -121,8 +123,8 @@ void DDLWorker::startup()
 {
     [[maybe_unused]] bool prev_stop_flag = stop_flag.exchange(false);
     chassert(prev_stop_flag);
-    main_thread = ThreadFromGlobalPool(&DDLWorker::runMainThread, this);
-    cleanup_thread = ThreadFromGlobalPool(&DDLWorker::runCleanupThread, this);
+    main_thread = std::make_unique<ThreadFromGlobalPool>(&DDLWorker::runMainThread, this);
+    cleanup_thread = std::make_unique<ThreadFromGlobalPool>(&DDLWorker::runCleanupThread, this);
 }
 
 void DDLWorker::shutdown()
@@ -132,8 +134,10 @@ void DDLWorker::shutdown()
     {
         queue_updated_event->set();
         cleanup_event->set();
-        main_thread.join();
-        cleanup_thread.join();
+        if (main_thread)
+            main_thread->join();
+        if (cleanup_thread)
+            cleanup_thread->join();
         worker_pool.reset();
     }
 }
diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h
index 6cf034edae8..01ed89907a1 100644
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@@ -3,7 +3,7 @@
 #include <Common/CurrentThread.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/DNSResolver.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <Common/ZooKeeper/IKeeper.h>
 #include <Storages/IStorage_fwd.h>
 #include <Parsers/IAST_fwd.h>
@@ -145,8 +145,8 @@ protected:
     std::atomic<bool> initialized = false;
     std::atomic<bool> stop_flag = true;
 
-    ThreadFromGlobalPool main_thread;
-    ThreadFromGlobalPool cleanup_thread;
+    std::unique_ptr<ThreadFromGlobalPool> main_thread;
+    std::unique_ptr<ThreadFromGlobalPool> cleanup_thread;
 
     /// Size of the pool for query execution.
     size_t pool_size = 1;
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index f37e41614b0..8d3fa91a7fe 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -8,16 +8,17 @@
 #include <Databases/DatabaseMemory.h>
 #include <Databases/DatabaseOnDisk.h>
 #include <Disks/IDisk.h>
-#include <Common/quoteString.h>
 #include <Storages/StorageMemory.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <Parsers/formatAST.h>
 #include <IO/ReadHelpers.h>
 #include <Poco/DirectoryIterator.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/quoteString.h>
 #include <Common/atomicRename.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/logger_useful.h>
-#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/ThreadPool.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/noexcept_scope.h>
 #include <Common/checkStackSize.h>
diff --git a/src/Interpreters/MetricLog.cpp b/src/Interpreters/MetricLog.cpp
index 578cc118a6b..24f77f7d0ba 100644
--- a/src/Interpreters/MetricLog.cpp
+++ b/src/Interpreters/MetricLog.cpp
@@ -1,4 +1,5 @@
 #include <Interpreters/MetricLog.h>
+#include <Common/ThreadPool.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
@@ -58,7 +59,7 @@ void MetricLog::startCollectMetric(size_t collect_interval_milliseconds_)
 {
     collect_interval_milliseconds = collect_interval_milliseconds_;
     is_shutdown_metric_thread = false;
-    metric_flush_thread = ThreadFromGlobalPool([this] { metricThreadFunction(); });
+    metric_flush_thread = std::make_unique<ThreadFromGlobalPool>([this] { metricThreadFunction(); });
 }
 
 
@@ -67,7 +68,8 @@ void MetricLog::stopCollectMetric()
     bool old_val = false;
     if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true))
         return;
-    metric_flush_thread.join();
+    if (metric_flush_thread)
+        metric_flush_thread->join();
 }
 
 
diff --git a/src/Interpreters/MetricLog.h b/src/Interpreters/MetricLog.h
index 44fcdced07c..aacdd4f49d2 100644
--- a/src/Interpreters/MetricLog.h
+++ b/src/Interpreters/MetricLog.h
@@ -3,6 +3,7 @@
 #include <Interpreters/SystemLog.h>
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/ThreadPool_fwd.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/NamesAndAliases.h>
 
@@ -50,7 +51,7 @@ public:
 private:
     void metricThreadFunction();
 
-    ThreadFromGlobalPool metric_flush_thread;
+    std::unique_ptr<ThreadFromGlobalPool> metric_flush_thread;
     size_t collect_interval_milliseconds;
     std::atomic<bool> is_shutdown_metric_thread{false};
 };
diff --git a/src/Interpreters/OpenTelemetrySpanLog.h b/src/Interpreters/OpenTelemetrySpanLog.h
index 9305605f30b..7368b184e5e 100644
--- a/src/Interpreters/OpenTelemetrySpanLog.h
+++ b/src/Interpreters/OpenTelemetrySpanLog.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Interpreters/SystemLog.h>
+#include <Common/OpenTelemetryTraceContext.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/NamesAndAliases.h>
 
diff --git a/src/Interpreters/QueryThreadLog.h b/src/Interpreters/QueryThreadLog.h
index 6cdb3142ba3..684d7fce53e 100644
--- a/src/Interpreters/QueryThreadLog.h
+++ b/src/Interpreters/QueryThreadLog.h
@@ -2,16 +2,11 @@
 
 #include <Interpreters/SystemLog.h>
 #include <Interpreters/ClientInfo.h>
+#include <Common/ProfileEvents.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/NamesAndAliases.h>
 
 
-namespace ProfileEvents
-{
-    class Counters;
-}
-
-
 namespace DB
 {
 
diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp
index 40cc51f8aae..96566a7d706 100644
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@@ -22,6 +22,7 @@
 #include <Common/setThreadName.h>
 #include <Core/MySQL/Authentication.h>
 #include <Common/logger_useful.h>
+#include <base/scope_guard.h>
 
 #include "config_version.h"
 
diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.h b/src/Storages/MergeTree/BackgroundJobsAssignee.h
index db93b5f710b..27e75a79b97 100644
--- a/src/Storages/MergeTree/BackgroundJobsAssignee.h
+++ b/src/Storages/MergeTree/BackgroundJobsAssignee.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
-#include <Common/ThreadPool.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <pcg_random.hpp>
 
diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
index 84fa9ec2c8e..65a796456bf 100644
--- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
@@ -2,21 +2,77 @@
 
 #include <algorithm>
 
+#include <Common/ThreadPool.h>
 #include <Common/setThreadName.h>
 #include <Common/Exception.h>
 #include <Storages/MergeTree/BackgroundJobsAssignee.h>
 #include <Common/noexcept_scope.h>
 
 
+namespace CurrentMetrics
+{
+    extern const Metric MergeTreeBackgroundExecutorThreads;
+    extern const Metric MergeTreeBackgroundExecutorThreadsActive;
+}
+
 namespace DB
 {
 
 namespace ErrorCodes
 {
     extern const int ABORTED;
+    extern const int INVALID_CONFIG_PARAMETER;
 }
 
 
+template <class Queue>
+MergeTreeBackgroundExecutor<Queue>::MergeTreeBackgroundExecutor(
+    String name_,
+    size_t threads_count_,
+    size_t max_tasks_count_,
+    CurrentMetrics::Metric metric_,
+    CurrentMetrics::Metric max_tasks_metric_)
+    : name(name_)
+    , threads_count(threads_count_)
+    , max_tasks_count(max_tasks_count_)
+    , metric(metric_)
+    , max_tasks_metric(max_tasks_metric_, 2 * max_tasks_count) // active + pending
+    , pool(std::make_unique<ThreadPool>(CurrentMetrics::MergeTreeBackgroundExecutorThreads, CurrentMetrics::MergeTreeBackgroundExecutorThreadsActive))
+{
+    if (max_tasks_count == 0)
+        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Task count for MergeTreeBackgroundExecutor must not be zero");
+
+    pending.setCapacity(max_tasks_count);
+    active.set_capacity(max_tasks_count);
+
+    pool->setMaxThreads(std::max(1UL, threads_count));
+    pool->setMaxFreeThreads(std::max(1UL, threads_count));
+    pool->setQueueSize(std::max(1UL, threads_count));
+
+    for (size_t number = 0; number < threads_count; ++number)
+        pool->scheduleOrThrowOnError([this] { threadFunction(); });
+}
+
+template <class Queue>
+MergeTreeBackgroundExecutor<Queue>::MergeTreeBackgroundExecutor(
+    String name_,
+    size_t threads_count_,
+    size_t max_tasks_count_,
+    CurrentMetrics::Metric metric_,
+    CurrentMetrics::Metric max_tasks_metric_,
+    std::string_view policy)
+    requires requires(Queue queue) { queue.updatePolicy(policy); } // Because we use explicit template instantiation
+    : MergeTreeBackgroundExecutor(name_, threads_count_, max_tasks_count_, metric_, max_tasks_metric_)
+{
+    pending.updatePolicy(policy);
+}
+
+template <class Queue>
+MergeTreeBackgroundExecutor<Queue>::~MergeTreeBackgroundExecutor()
+{
+    wait();
+}
+
 template <class Queue>
 void MergeTreeBackgroundExecutor<Queue>::wait()
 {
@@ -26,7 +82,7 @@ void MergeTreeBackgroundExecutor<Queue>::wait()
         has_tasks.notify_all();
     }
 
-    pool.wait();
+    pool->wait();
 }
 
 template <class Queue>
@@ -52,12 +108,12 @@ void MergeTreeBackgroundExecutor<Queue>::increaseThreadsAndMaxTasksCount(size_t
     pending.setCapacity(new_max_tasks_count);
     active.set_capacity(new_max_tasks_count);
 
-    pool.setMaxThreads(std::max(1UL, new_threads_count));
-    pool.setMaxFreeThreads(std::max(1UL, new_threads_count));
-    pool.setQueueSize(std::max(1UL, new_threads_count));
+    pool->setMaxThreads(std::max(1UL, new_threads_count));
+    pool->setMaxFreeThreads(std::max(1UL, new_threads_count));
+    pool->setQueueSize(std::max(1UL, new_threads_count));
 
     for (size_t number = threads_count; number < new_threads_count; ++number)
-        pool.scheduleOrThrowOnError([this] { threadFunction(); });
+        pool->scheduleOrThrowOnError([this] { threadFunction(); });
 
     max_tasks_metric.changeTo(2 * new_max_tasks_count); // pending + active
     max_tasks_count.store(new_max_tasks_count, std::memory_order_relaxed);
diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
index a27fb18c0fe..1ed4fc2dabc 100644
--- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
@@ -15,24 +15,14 @@
 
 #include <Common/CurrentMetrics.h>
 #include <Common/logger_useful.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <Common/Stopwatch.h>
 #include <base/defines.h>
 #include <Storages/MergeTree/IExecutableTask.h>
 
 
-namespace CurrentMetrics
-{
-    extern const Metric MergeTreeBackgroundExecutorThreads;
-    extern const Metric MergeTreeBackgroundExecutorThreadsActive;
-}
-
 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int INVALID_CONFIG_PARAMETER;
-}
 
 struct TaskRuntimeData;
 using TaskRuntimeDataPtr = std::shared_ptr<TaskRuntimeData>;
@@ -255,28 +245,7 @@ public:
         size_t threads_count_,
         size_t max_tasks_count_,
         CurrentMetrics::Metric metric_,
-        CurrentMetrics::Metric max_tasks_metric_)
-        : name(name_)
-        , threads_count(threads_count_)
-        , max_tasks_count(max_tasks_count_)
-        , metric(metric_)
-        , max_tasks_metric(max_tasks_metric_, 2 * max_tasks_count) // active + pending
-        , pool(CurrentMetrics::MergeTreeBackgroundExecutorThreads, CurrentMetrics::MergeTreeBackgroundExecutorThreadsActive)
-    {
-        if (max_tasks_count == 0)
-            throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Task count for MergeTreeBackgroundExecutor must not be zero");
-
-        pending.setCapacity(max_tasks_count);
-        active.set_capacity(max_tasks_count);
-
-        pool.setMaxThreads(std::max(1UL, threads_count));
-        pool.setMaxFreeThreads(std::max(1UL, threads_count));
-        pool.setQueueSize(std::max(1UL, threads_count));
-
-        for (size_t number = 0; number < threads_count; ++number)
-            pool.scheduleOrThrowOnError([this] { threadFunction(); });
-    }
-
+        CurrentMetrics::Metric max_tasks_metric_);
     MergeTreeBackgroundExecutor(
         String name_,
         size_t threads_count_,
@@ -284,16 +253,8 @@ public:
         CurrentMetrics::Metric metric_,
         CurrentMetrics::Metric max_tasks_metric_,
         std::string_view policy)
-        requires requires(Queue queue) { queue.updatePolicy(policy); } // Because we use explicit template instantiation
-        : MergeTreeBackgroundExecutor(name_, threads_count_, max_tasks_count_, metric_, max_tasks_metric_)
-    {
-        pending.updatePolicy(policy);
-    }
-
-    ~MergeTreeBackgroundExecutor()
-    {
-        wait();
-    }
+        requires requires(Queue queue) { queue.updatePolicy(policy); }; // Because we use explicit template instantiation
+    ~MergeTreeBackgroundExecutor();
 
     /// Handler for hot-reloading
     /// Supports only increasing the number of threads and tasks, because
@@ -335,7 +296,7 @@ private:
     mutable std::mutex mutex;
     std::condition_variable has_tasks TSA_GUARDED_BY(mutex);
     bool shutdown TSA_GUARDED_BY(mutex) = false;
-    ThreadPool pool;
+    std::unique_ptr<ThreadPool> pool;
     Poco::Logger * log = &Poco::Logger::get("MergeTreeBackgroundExecutor");
 };
 
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.h b/src/Storages/MergeTree/MergeTreeMarksLoader.h
index 0294cbbf3fc..17e52939d3f 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.h
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.h
@@ -3,7 +3,7 @@
 #include <Storages/MergeTree/IDataPartStorage.h>
 #include <Storages/MarkCache.h>
 #include <IO/ReadSettings.h>
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 
 
 namespace DB
diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
index d76b8522f42..114039f7ff7 100644
--- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
@@ -6,6 +6,7 @@
 #include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
 #include <Storages/MergeTree/IMergeTreeReader.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
+#include <Interpreters/Context.h>
 #include <Common/ElapsedTimeProfileEventIncrement.h>
 #include <IO/Operators.h>
 #include <base/getThreadId.h>
diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h
index 98cfe28c563..74d76f723a9 100644
--- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h
+++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.h
@@ -1,10 +1,11 @@
 #pragma once
 
-#include <Common/ThreadPool.h>
+#include <Common/ThreadPool_fwd.h>
 #include <Interpreters/ExpressionActionsSettings.h>
 #include <Storages/MergeTree/MergeTreeReadPool.h>
 #include <Storages/MergeTree/MergeTreeIOSettings.h>
 #include <IO/AsyncReadCounters.h>
+#include <boost/heap/priority_queue.hpp>
 #include <queue>
 
 namespace DB
diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h
index b3442ec2f99..2cbcc58fa65 100644
--- a/src/Storages/StorageDictionary.h
+++ b/src/Storages/StorageDictionary.h
@@ -4,6 +4,7 @@
 
 #include <Storages/IStorage.h>
 #include <Interpreters/IExternalLoaderConfigRepository.h>
+#include <base/scope_guard.h>
 
 
 namespace DB
diff --git a/src/Storages/System/StorageSystemCertificates.cpp b/src/Storages/System/StorageSystemCertificates.cpp
index c4d262f2f44..e1767e4c03a 100644
--- a/src/Storages/System/StorageSystemCertificates.cpp
+++ b/src/Storages/System/StorageSystemCertificates.cpp
@@ -6,7 +6,8 @@
 #include <re2/re2.h>
 #include <boost/algorithm/string.hpp>
 #include <filesystem>
-#include "Poco/File.h"
+#include <base/scope_guard.h>
+#include <Poco/File.h>
 #if USE_SSL
     #include <openssl/x509v3.h>
     #include "Poco/Net/SSLManager.h"

From bea9468285a8df083161ea7c3e4ced027fbd1f5b Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 7 Apr 2023 10:47:52 +0000
Subject: [PATCH 198/277] Fix 01548_parallel_parsing_max_memory.sh

---
 tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh
index 8c4900043d0..308b90dcd55 100755
--- a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh
+++ b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh
@@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > ${CLICKHOUSE_TMP}/1g.csv
 
-$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String')"
+$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=50Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String') settings max_threads=1"

From 78038a3c2cec705b405947bc6c0f53e07f844666 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 7 Apr 2023 11:34:04 +0000
Subject: [PATCH 199/277] Fix: do not resize pipeline when there is no files to
 process (globs expands to empty set)

---
 src/Storages/StorageFile.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index f8b5ea0ced7..665630c3559 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -792,12 +792,13 @@ Pipe StorageFile::read(
     }
 
     Pipe pipe = Pipe::unitePipes(std::move(pipes));
-    /// parallelize output as much as possible
-    if (num_streams < max_num_streams)
+    /// Parallelize output as much as possible
+    /// Note: number of streams can be 0 if paths is empty
+    ///       It happens if globs in file(path, ...) expands to empty set i.e. no files to process
+    if (num_streams > 0 && num_streams < max_num_streams)
     {
         pipe.addTransform(std::make_shared<ResizeProcessor>(pipe.getHeader(), num_streams, max_num_streams));
     }
-
     return pipe;
 }
 

From 5b2b20a0b06e88ea1165cb59a6ff8f65653cd71f Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 7 Apr 2023 15:13:21 +0200
Subject: [PATCH 200/277] Rename ThreadGroupStatus to ThreadGroup

There are methods like getThreadGroup() and ThreadGroupSwitcher class,
so seems that this is logical.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/CurrentThread.cpp                  |  2 +-
 src/Common/CurrentThread.h                    |  6 ++--
 src/Common/ThreadStatus.cpp                   |  6 ++--
 src/Common/ThreadStatus.h                     | 24 +++++++--------
 src/Interpreters/Aggregator.cpp               |  4 +--
 src/Interpreters/ExternalLoader.cpp           |  2 +-
 src/Interpreters/ProcessList.cpp              |  2 +-
 src/Interpreters/ProcessList.h                |  4 +--
 src/Interpreters/ThreadStatusExt.cpp          | 30 +++++++++----------
 .../Executors/CompletedPipelineExecutor.cpp   |  2 +-
 .../PullingAsyncPipelineExecutor.cpp          |  2 +-
 .../PushingAsyncPipelineExecutor.cpp          |  2 +-
 .../Impl/ParallelFormattingOutputFormat.cpp   |  4 +--
 .../Impl/ParallelFormattingOutputFormat.h     |  4 +--
 .../Impl/ParallelParsingInputFormat.cpp       |  4 +--
 .../Formats/Impl/ParallelParsingInputFormat.h |  4 +--
 .../Transforms/buildPushingToViewsChain.cpp   |  6 ++--
 src/Storages/MergeTree/MergeList.cpp          |  4 +--
 src/Storages/MergeTree/MergeList.h            |  6 ++--
 19 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp
index 6ec46d6508c..fd2ad0bbaf1 100644
--- a/src/Common/CurrentThread.cpp
+++ b/src/Common/CurrentThread.cpp
@@ -90,7 +90,7 @@ void CurrentThread::attachInternalTextLogsQueue(const std::shared_ptr<InternalTe
 }
 
 
-ThreadGroupStatusPtr CurrentThread::getGroup()
+ThreadGroupPtr CurrentThread::getGroup()
 {
     if (unlikely(!current_thread))
         return nullptr;
diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h
index 373f7aa3e10..5af94346e34 100644
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@@ -39,7 +39,7 @@ public:
     static ThreadStatus & get();
 
     /// Group to which belongs current thread
-    static ThreadGroupStatusPtr getGroup();
+    static ThreadGroupPtr getGroup();
 
     /// A logs queue used by TCPHandler to pass logs to a client
     static void attachInternalTextLogsQueue(const std::shared_ptr<InternalTextLogsQueue> & logs_queue,
@@ -69,9 +69,9 @@ public:
 
     /// You must call one of these methods when create a query child thread:
     /// Add current thread to a group associated with the thread group
-    static void attachToGroup(const ThreadGroupStatusPtr & thread_group);
+    static void attachToGroup(const ThreadGroupPtr & thread_group);
     /// Is useful for a ThreadPool tasks
-    static void attachToGroupIfDetached(const ThreadGroupStatusPtr & thread_group);
+    static void attachToGroupIfDetached(const ThreadGroupPtr & thread_group);
 
     /// Non-master threads call this method in destructor automatically
     static void detachFromGroupIfNotDetached();
diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
index 1b783aa9ec4..e00c9b168a9 100644
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@@ -61,7 +61,7 @@ static thread_local ThreadStack alt_stack;
 static thread_local bool has_alt_stack = false;
 #endif
 
-ThreadGroupStatus::ThreadGroupStatus()
+ThreadGroup::ThreadGroup()
     : master_thread_id(CurrentThread::get().thread_id)
 {}
 
@@ -119,7 +119,7 @@ ThreadStatus::ThreadStatus()
 #endif
 }
 
-ThreadGroupStatusPtr ThreadStatus::getThreadGroup() const
+ThreadGroupPtr ThreadStatus::getThreadGroup() const
 {
     return thread_group;
 }
@@ -139,7 +139,7 @@ ContextPtr ThreadStatus::getGlobalContext() const
     return global_context.lock();
 }
 
-void ThreadGroupStatus::attachInternalTextLogsQueue(const InternalTextLogsQueuePtr & logs_queue, LogsLevel logs_level)
+void ThreadGroup::attachInternalTextLogsQueue(const InternalTextLogsQueuePtr & logs_queue, LogsLevel logs_level)
 {
     std::lock_guard lock(mutex);
     shared_data.logs_queue_ptr = logs_queue;
diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index cb85aa67b11..16083fe0925 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -58,15 +58,15 @@ using ThreadStatusPtr = ThreadStatus *;
   * Create via CurrentThread::initializeQuery (for queries) or directly (for various background tasks).
   * Use via CurrentThread::getGroup.
   */
-class ThreadGroupStatus;
-using ThreadGroupStatusPtr = std::shared_ptr<ThreadGroupStatus>;
+class ThreadGroup;
+using ThreadGroupPtr = std::shared_ptr<ThreadGroup>;
 
-class ThreadGroupStatus
+class ThreadGroup
 {
 public:
-    ThreadGroupStatus();
+    ThreadGroup();
     using FatalErrorCallback = std::function<void()>;
-    ThreadGroupStatus(ContextPtr query_context_, FatalErrorCallback fatal_error_callback_ = {});
+    ThreadGroup(ContextPtr query_context_, FatalErrorCallback fatal_error_callback_ = {});
 
     /// The first thread created this thread group
     const UInt64 master_thread_id;
@@ -104,9 +104,9 @@ public:
     void attachInternalProfileEventsQueue(const InternalProfileEventsQueuePtr & profile_queue);
 
     /// When new query starts, new thread group is created for it, current thread becomes master thread of the query
-    static ThreadGroupStatusPtr createForQuery(ContextPtr query_context_, FatalErrorCallback fatal_error_callback_ = {});
+    static ThreadGroupPtr createForQuery(ContextPtr query_context_, FatalErrorCallback fatal_error_callback_ = {});
 
-    static ThreadGroupStatusPtr createForBackgroundProcess(ContextPtr storage_context);
+    static ThreadGroupPtr createForBackgroundProcess(ContextPtr storage_context);
 
     std::vector<UInt64> getInvolvedThreadIds() const;
     void linkThread(UInt64 thread_it);
@@ -163,7 +163,7 @@ public:
 
 private:
     /// Group of threads, to which this thread attached
-    ThreadGroupStatusPtr thread_group;
+    ThreadGroupPtr thread_group;
 
     /// Is set once
     ContextWeakPtr global_context;
@@ -174,7 +174,7 @@ private:
     using FatalErrorCallback = std::function<void()>;
     FatalErrorCallback fatal_error_callback;
 
-    ThreadGroupStatus::SharedData local_data;
+    ThreadGroup::SharedData local_data;
 
     bool performance_counters_finalized = false;
 
@@ -215,7 +215,7 @@ public:
     ThreadStatus();
     ~ThreadStatus();
 
-    ThreadGroupStatusPtr getThreadGroup() const;
+    ThreadGroupPtr getThreadGroup() const;
 
     const String & getQueryId() const;
 
@@ -239,7 +239,7 @@ public:
     void setInternalThread();
 
     /// Attaches slave thread to existing thread group
-    void attachToGroup(const ThreadGroupStatusPtr & thread_group_, bool check_detached = true);
+    void attachToGroup(const ThreadGroupPtr & thread_group_, bool check_detached = true);
 
     /// Detaches thread from the thread group and the query, dumps performance counters if they have not been dumped
     void detachFromGroup();
@@ -287,7 +287,7 @@ private:
 
     void logToQueryThreadLog(QueryThreadLog & thread_log, const String & current_database);
 
-    void attachToGroupImpl(const ThreadGroupStatusPtr & thread_group_);
+    void attachToGroupImpl(const ThreadGroupPtr & thread_group_);
 };
 
 /**
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index d6fbf072d05..2d5de796e1c 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -2315,7 +2315,7 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl(
 
     std::atomic<UInt32> next_bucket_to_merge = 0;
 
-    auto converter = [&](size_t thread_id, ThreadGroupStatusPtr thread_group)
+    auto converter = [&](size_t thread_id, ThreadGroupPtr thread_group)
     {
         SCOPE_EXIT_SAFE(
             if (thread_group)
@@ -3043,7 +3043,7 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari
 
         LOG_TRACE(log, "Merging partially aggregated two-level data.");
 
-        auto merge_bucket = [&bucket_to_blocks, &result, this](Int32 bucket, Arena * aggregates_pool, ThreadGroupStatusPtr thread_group)
+        auto merge_bucket = [&bucket_to_blocks, &result, this](Int32 bucket, Arena * aggregates_pool, ThreadGroupPtr thread_group)
         {
             SCOPE_EXIT_SAFE(
                 if (thread_group)
diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp
index 04a116ec0c7..c1cbd8b75be 100644
--- a/src/Interpreters/ExternalLoader.cpp
+++ b/src/Interpreters/ExternalLoader.cpp
@@ -967,7 +967,7 @@ private:
     }
 
     /// Does the loading, possibly in the separate thread.
-    void doLoading(const String & name, size_t loading_id, bool forced_to_reload, size_t min_id_to_finish_loading_dependencies_, bool async, ThreadGroupStatusPtr thread_group = {})
+    void doLoading(const String & name, size_t loading_id, bool forced_to_reload, size_t min_id_to_finish_loading_dependencies_, bool async, ThreadGroupPtr thread_group = {})
     {
         SCOPE_EXIT_SAFE(
             if (thread_group)
diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp
index 51053bd2884..aca474bf152 100644
--- a/src/Interpreters/ProcessList.cpp
+++ b/src/Interpreters/ProcessList.cpp
@@ -340,7 +340,7 @@ QueryStatus::QueryStatus(
     const String & query_,
     const ClientInfo & client_info_,
     QueryPriorities::Handle && priority_handle_,
-    ThreadGroupStatusPtr && thread_group_,
+    ThreadGroupPtr && thread_group_,
     IAST::QueryKind query_kind_,
     UInt64 watch_start_nanoseconds)
     : WithContext(context_)
diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h
index a04beac4901..b593bcef395 100644
--- a/src/Interpreters/ProcessList.h
+++ b/src/Interpreters/ProcessList.h
@@ -86,7 +86,7 @@ protected:
     ClientInfo client_info;
 
     /// Info about all threads involved in query execution
-    ThreadGroupStatusPtr thread_group;
+    ThreadGroupPtr thread_group;
 
     Stopwatch watch;
 
@@ -162,7 +162,7 @@ public:
         const String & query_,
         const ClientInfo & client_info_,
         QueryPriorities::Handle && priority_handle_,
-        ThreadGroupStatusPtr && thread_group_,
+        ThreadGroupPtr && thread_group_,
         IAST::QueryKind query_kind_,
         UInt64 watch_start_nanoseconds);
 
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 72ce08bf653..070cd3f98e1 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -41,14 +41,14 @@ namespace ErrorCodes
     extern const int CANNOT_SET_THREAD_PRIORITY;
 }
 
-ThreadGroupStatus::ThreadGroupStatus(ContextPtr query_context_, FatalErrorCallback fatal_error_callback_)
+ThreadGroup::ThreadGroup(ContextPtr query_context_, FatalErrorCallback fatal_error_callback_)
     : master_thread_id(CurrentThread::get().thread_id)
     , query_context(query_context_)
     , global_context(query_context_->getGlobalContext())
     , fatal_error_callback(fatal_error_callback_)
 {}
 
-std::vector<UInt64> ThreadGroupStatus::getInvolvedThreadIds() const
+std::vector<UInt64> ThreadGroup::getInvolvedThreadIds() const
 {
     std::vector<UInt64> res;
 
@@ -60,22 +60,22 @@ std::vector<UInt64> ThreadGroupStatus::getInvolvedThreadIds() const
     return res;
 }
 
-void ThreadGroupStatus::linkThread(UInt64 thread_it)
+void ThreadGroup::linkThread(UInt64 thread_it)
 {
     std::lock_guard lock(mutex);
     thread_ids.insert(thread_it);
 }
 
-ThreadGroupStatusPtr ThreadGroupStatus::createForQuery(ContextPtr query_context_, std::function<void()> fatal_error_callback_)
+ThreadGroupPtr ThreadGroup::createForQuery(ContextPtr query_context_, std::function<void()> fatal_error_callback_)
 {
-    auto group = std::make_shared<ThreadGroupStatus>(query_context_, std::move(fatal_error_callback_));
+    auto group = std::make_shared<ThreadGroup>(query_context_, std::move(fatal_error_callback_));
     group->memory_tracker.setDescription("(for query)");
     return group;
 }
 
-ThreadGroupStatusPtr ThreadGroupStatus::createForBackgroundProcess(ContextPtr storage_context)
+ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_context)
 {
-    auto group = std::make_shared<ThreadGroupStatus>(storage_context);
+    auto group = std::make_shared<ThreadGroup>(storage_context);
 
     group->memory_tracker.setDescription("background process to apply mutate/merge in table");
     /// However settings from storage context have to be applied
@@ -89,7 +89,7 @@ ThreadGroupStatusPtr ThreadGroupStatus::createForBackgroundProcess(ContextPtr st
     return group;
 }
 
-void ThreadGroupStatus::attachQueryForLog(const String & query_, UInt64 normalized_hash)
+void ThreadGroup::attachQueryForLog(const String & query_, UInt64 normalized_hash)
 {
     auto hash = normalized_hash ? normalized_hash : normalizedQueryHash<false>(query_);
 
@@ -109,7 +109,7 @@ void ThreadStatus::attachQueryForLog(const String & query_)
     thread_group->attachQueryForLog(local_data.query_for_logs, local_data.normalized_query_hash);
 }
 
-void ThreadGroupStatus::attachInternalProfileEventsQueue(const InternalProfileEventsQueuePtr & profile_queue)
+void ThreadGroup::attachInternalProfileEventsQueue(const InternalProfileEventsQueuePtr & profile_queue)
 {
     std::lock_guard lock(mutex);
     shared_data.profile_queue_ptr = profile_queue;
@@ -168,7 +168,7 @@ void ThreadStatus::applyQuerySettings()
 #endif
 }
 
-void ThreadStatus::attachToGroupImpl(const ThreadGroupStatusPtr & thread_group_)
+void ThreadStatus::attachToGroupImpl(const ThreadGroupPtr & thread_group_)
 {
     /// Attach or init current thread to thread group and copy useful information from it
     thread_group = thread_group_;
@@ -234,7 +234,7 @@ void ThreadStatus::setInternalThread()
     internal_thread = true;
 }
 
-void ThreadStatus::attachToGroup(const ThreadGroupStatusPtr & thread_group_, bool check_detached)
+void ThreadStatus::attachToGroup(const ThreadGroupPtr & thread_group_, bool check_detached)
 {
     if (thread_group && check_detached)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't attach query to the thread, it is already attached");
@@ -541,14 +541,14 @@ void ThreadStatus::logToQueryViewsLog(const ViewRuntimeData & vinfo)
     views_log->add(element);
 }
 
-void CurrentThread::attachToGroup(const ThreadGroupStatusPtr & thread_group)
+void CurrentThread::attachToGroup(const ThreadGroupPtr & thread_group)
 {
     if (unlikely(!current_thread))
         return;
     current_thread->attachToGroup(thread_group, true);
 }
 
-void CurrentThread::attachToGroupIfDetached(const ThreadGroupStatusPtr & thread_group)
+void CurrentThread::attachToGroupIfDetached(const ThreadGroupPtr & thread_group)
 {
     if (unlikely(!current_thread))
         return;
@@ -574,7 +574,7 @@ CurrentThread::QueryScope::QueryScope(ContextMutablePtr query_context, std::func
     if (!query_context->hasQueryContext())
         query_context->makeQueryContext();
 
-    auto group = ThreadGroupStatus::createForQuery(query_context, std::move(fatal_error_callback));
+    auto group = ThreadGroup::createForQuery(query_context, std::move(fatal_error_callback));
     CurrentThread::attachToGroup(group);
 }
 
@@ -584,7 +584,7 @@ CurrentThread::QueryScope::QueryScope(ContextPtr query_context, std::function<vo
         throw Exception(
             ErrorCodes::LOGICAL_ERROR, "Cannot initialize query scope without query context");
 
-    auto group = ThreadGroupStatus::createForQuery(query_context, std::move(fatal_error_callback));
+    auto group = ThreadGroup::createForQuery(query_context, std::move(fatal_error_callback));
     CurrentThread::attachToGroup(group);
 }
 
diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp
index e624ecd52de..cc1dd131997 100644
--- a/src/Processors/Executors/CompletedPipelineExecutor.cpp
+++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp
@@ -32,7 +32,7 @@ struct CompletedPipelineExecutor::Data
     }
 };
 
-static void threadFunction(CompletedPipelineExecutor::Data & data, ThreadGroupStatusPtr thread_group, size_t num_threads)
+static void threadFunction(CompletedPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads)
 {
     SCOPE_EXIT_SAFE(
         if (thread_group)
diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
index 73a3142d459..b2608f665b7 100644
--- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
+++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp
@@ -67,7 +67,7 @@ const Block & PullingAsyncPipelineExecutor::getHeader() const
     return lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader();
 }
 
-static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGroupStatusPtr thread_group, size_t num_threads)
+static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads)
 {
     SCOPE_EXIT_SAFE(
         if (thread_group)
diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
index 3aec7608e6d..ac40cef35d9 100644
--- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
+++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
@@ -97,7 +97,7 @@ struct PushingAsyncPipelineExecutor::Data
     }
 };
 
-static void threadFunction(PushingAsyncPipelineExecutor::Data & data, ThreadGroupStatusPtr thread_group, size_t num_threads)
+static void threadFunction(PushingAsyncPipelineExecutor::Data & data, ThreadGroupPtr thread_group, size_t num_threads)
 {
     SCOPE_EXIT_SAFE(
         if (thread_group)
diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
index 3fc57ca1c1e..62ee4e4a48d 100644
--- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp
@@ -96,7 +96,7 @@ namespace DB
     }
 
 
-    void ParallelFormattingOutputFormat::collectorThreadFunction(const ThreadGroupStatusPtr & thread_group)
+    void ParallelFormattingOutputFormat::collectorThreadFunction(const ThreadGroupPtr & thread_group)
     {
         SCOPE_EXIT_SAFE(
             if (thread_group)
@@ -157,7 +157,7 @@ namespace DB
     }
 
 
-    void ParallelFormattingOutputFormat::formatterThreadFunction(size_t current_unit_number, size_t first_row_num, const ThreadGroupStatusPtr & thread_group)
+    void ParallelFormattingOutputFormat::formatterThreadFunction(size_t current_unit_number, size_t first_row_num, const ThreadGroupPtr & thread_group)
     {
         SCOPE_EXIT_SAFE(
             if (thread_group)
diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
index 790d05e83dd..4e5aaab5dcb 100644
--- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
+++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
@@ -270,10 +270,10 @@ private:
     }
 
     /// Collects all temporary buffers into main WriteBuffer.
-    void collectorThreadFunction(const ThreadGroupStatusPtr & thread_group);
+    void collectorThreadFunction(const ThreadGroupPtr & thread_group);
 
     /// This function is executed in ThreadPool and the only purpose of it is to format one Chunk into a continuous buffer in memory.
-    void formatterThreadFunction(size_t current_unit_number, size_t first_row_num, const ThreadGroupStatusPtr & thread_group);
+    void formatterThreadFunction(size_t current_unit_number, size_t first_row_num, const ThreadGroupPtr & thread_group);
 
     void setRowsBeforeLimit(size_t rows_before_limit) override
     {
diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
index a2e5074efb1..f4d619a263b 100644
--- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp
@@ -8,7 +8,7 @@
 namespace DB
 {
 
-void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr thread_group)
+void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupPtr thread_group)
 {
     SCOPE_EXIT_SAFE(
         if (thread_group)
@@ -62,7 +62,7 @@ void ParallelParsingInputFormat::segmentatorThreadFunction(ThreadGroupStatusPtr
     }
 }
 
-void ParallelParsingInputFormat::parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number)
+void ParallelParsingInputFormat::parserThreadFunction(ThreadGroupPtr thread_group, size_t current_ticket_number)
 {
     SCOPE_EXIT_SAFE(
         if (thread_group)
diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
index 97df9308dbf..ae9f123d411 100644
--- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
+++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h
@@ -317,8 +317,8 @@ private:
         }
     }
 
-    void segmentatorThreadFunction(ThreadGroupStatusPtr thread_group);
-    void parserThreadFunction(ThreadGroupStatusPtr thread_group, size_t current_ticket_number);
+    void segmentatorThreadFunction(ThreadGroupPtr thread_group);
+    void parserThreadFunction(ThreadGroupPtr thread_group, size_t current_ticket_number);
 
     /// Save/log a background exception, set termination flag, wake up all
     /// threads. This function is used by segmentator and parsed threads.
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 91845bc18ad..e32e2116f71 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -270,14 +270,14 @@ Chain buildPushingToViewsChain(
         ASTPtr query;
         Chain out;
 
-        /// NOTE: ThreadGroupStatus always should have context attached,
+        /// NOTE: ThreadGroup always should have context attached,
         /// otherwise entry to the system.query_views_log will not be added
         /// (see ThreadStatus::logToQueryViewsLog())
-        ThreadGroupStatusPtr running_group;
+        ThreadGroupPtr running_group;
         if (current_thread)
             running_group = current_thread->getThreadGroup();
         if (!running_group)
-            running_group = std::make_shared<ThreadGroupStatus>(context);
+            running_group = std::make_shared<ThreadGroup>(context);
 
         /// We are creating a ThreadStatus per view to store its metrics individually
         /// Since calling ThreadStatus() changes current_thread we save it and restore it after the calls
diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index 7970efd1a20..91e1cb1b078 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -11,7 +11,7 @@ namespace DB
 {
 
 
-ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupStatusPtr thread_group)
+ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupPtr thread_group)
 {
     chassert(thread_group);
 
@@ -59,7 +59,7 @@ MergeListElement::MergeListElement(
         is_mutation = (result_part_info.getDataVersion() != source_data_version);
     }
 
-    thread_group = ThreadGroupStatus::createForBackgroundProcess(context);
+    thread_group = ThreadGroup::createForBackgroundProcess(context);
 }
 
 MergeInfo MergeListElement::getInfo() const
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index c3ba4f00f7f..308f00feda9 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -69,11 +69,11 @@ struct Settings;
 class ThreadGroupSwitcher : private boost::noncopyable
 {
 public:
-    explicit ThreadGroupSwitcher(ThreadGroupStatusPtr thread_group);
+    explicit ThreadGroupSwitcher(ThreadGroupPtr thread_group);
     ~ThreadGroupSwitcher();
 
 private:
-    ThreadGroupStatusPtr prev_thread_group;
+    ThreadGroupPtr prev_thread_group;
 };
 
 struct MergeListElement : boost::noncopyable
@@ -113,7 +113,7 @@ struct MergeListElement : boost::noncopyable
     /// Detected after merge already started
     std::atomic<MergeAlgorithm> merge_algorithm;
 
-    ThreadGroupStatusPtr thread_group;
+    ThreadGroupPtr thread_group;
 
     MergeListElement(
         const StorageID & table_id_,

From aacf2a083882bf395d94a6f7dba1fa2d35db2991 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 7 Apr 2023 15:15:10 +0200
Subject: [PATCH 201/277] Move ThreadGroupSwitcher to ThreadStatus.h (out from
 MergeTree code)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Common/ThreadStatus.h            | 16 +++++++++++++++-
 src/Interpreters/ThreadStatusExt.cpp | 18 ++++++++++++++++++
 src/Storages/MergeTree/MergeList.cpp | 18 ------------------
 src/Storages/MergeTree/MergeList.h   | 14 --------------
 4 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h
index 16083fe0925..600dfc56d2b 100644
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@@ -41,7 +41,6 @@ class TaskStatsInfoGetter;
 class InternalTextLogsQueue;
 struct ViewRuntimeData;
 class QueryViewsLog;
-class ThreadGroupSwitcher;
 using InternalTextLogsQueuePtr = std::shared_ptr<InternalTextLogsQueue>;
 using InternalTextLogsQueueWeakPtr = std::weak_ptr<InternalTextLogsQueue>;
 
@@ -120,6 +119,21 @@ private:
     std::unordered_set<UInt64> thread_ids;
 };
 
+/**
+ * Since merge is executed with multiple threads, this class
+ * switches the parent MemoryTracker as part of the thread group to account all the memory used.
+ */
+class ThreadGroupSwitcher : private boost::noncopyable
+{
+public:
+    explicit ThreadGroupSwitcher(ThreadGroupPtr thread_group);
+    ~ThreadGroupSwitcher();
+
+private:
+    ThreadGroupPtr prev_thread_group;
+};
+
+
 /**
  * We use **constinit** here to tell the compiler the current_thread variable is initialized.
  * If we didn't help the compiler, then it would most likely add a check before every use of the variable to initialize it if needed.
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 070cd3f98e1..c6e36263e7c 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -115,6 +115,24 @@ void ThreadGroup::attachInternalProfileEventsQueue(const InternalProfileEventsQu
     shared_data.profile_queue_ptr = profile_queue;
 }
 
+ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupPtr thread_group)
+{
+    chassert(thread_group);
+
+    /// might be nullptr
+    prev_thread_group = CurrentThread::getGroup();
+
+    CurrentThread::detachFromGroupIfNotDetached();
+    CurrentThread::attachToGroup(thread_group);
+}
+
+ThreadGroupSwitcher::~ThreadGroupSwitcher()
+{
+    CurrentThread::detachFromGroupIfNotDetached();
+    if (prev_thread_group)
+        CurrentThread::attachToGroup(prev_thread_group);
+}
+
 void ThreadStatus::attachInternalProfileEventsQueue(const InternalProfileEventsQueuePtr & profile_queue)
 {
     if (!thread_group)
diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index 91e1cb1b078..0bf662921ad 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -11,24 +11,6 @@ namespace DB
 {
 
 
-ThreadGroupSwitcher::ThreadGroupSwitcher(ThreadGroupPtr thread_group)
-{
-    chassert(thread_group);
-
-    /// might be nullptr
-    prev_thread_group = CurrentThread::getGroup();
-
-    CurrentThread::detachFromGroupIfNotDetached();
-    CurrentThread::attachToGroup(thread_group);
-}
-
-ThreadGroupSwitcher::~ThreadGroupSwitcher()
-{
-    CurrentThread::detachFromGroupIfNotDetached();
-    if (prev_thread_group)
-        CurrentThread::attachToGroup(prev_thread_group);
-}
-
 MergeListElement::MergeListElement(
     const StorageID & table_id_,
     FutureMergedMutatedPartPtr future_part,
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 308f00feda9..9c8c2ebd1e4 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -62,20 +62,6 @@ using MergeListEntry = BackgroundProcessListEntry<MergeListElement, MergeInfo>;
 struct Settings;
 
 
-/**
- * Since merge is executed with multiple threads, this class
- * switches the parent MemoryTracker as part of the thread group to account all the memory used.
- */
-class ThreadGroupSwitcher : private boost::noncopyable
-{
-public:
-    explicit ThreadGroupSwitcher(ThreadGroupPtr thread_group);
-    ~ThreadGroupSwitcher();
-
-private:
-    ThreadGroupPtr prev_thread_group;
-};
-
 struct MergeListElement : boost::noncopyable
 {
     const StorageID table_id;

From 44ae8485f185ba43323d61a2e85007f5067b4e2b Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 7 Apr 2023 15:29:51 +0200
Subject: [PATCH 202/277] Use one ThreadGroup while pushing to materialized
 views

Before this patch only the case when ThreadStatus (current_thread)
already has ThreadGroup works that way, after they will be identical in
this aspect.

But this should not affect anything, but it just make sense.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Interpreters/InterpreterInsertQuery.cpp   | 24 ++++++++++++++++---
 src/Interpreters/InterpreterInsertQuery.h     |  1 +
 .../Transforms/buildPushingToViewsChain.cpp   | 22 ++++++++---------
 .../Transforms/buildPushingToViewsChain.h     |  4 ++++
 4 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index b4a19ea7403..e78a61831a1 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -32,6 +32,7 @@
 #include <Storages/StorageMaterializedView.h>
 #include <Storages/WindowView/StorageWindowView.h>
 #include <TableFunctions/TableFunctionFactory.h>
+#include <Common/ThreadStatus.h>
 #include <Common/checkStackSize.h>
 
 
@@ -233,8 +234,14 @@ Chain InterpreterInsertQuery::buildChain(
     ThreadStatusesHolderPtr thread_status_holder,
     std::atomic_uint64_t * elapsed_counter_ms)
 {
+    ThreadGroupPtr running_group;
+    if (current_thread)
+        running_group = current_thread->getThreadGroup();
+    if (!running_group)
+        running_group = std::make_shared<ThreadGroup>(getContext());
+
     auto sample = getSampleBlock(columns, table, metadata_snapshot);
-    return buildChainImpl(table, metadata_snapshot, sample, thread_status_holder, elapsed_counter_ms);
+    return buildChainImpl(table, metadata_snapshot, sample, thread_status_holder, running_group, elapsed_counter_ms);
 }
 
 Chain InterpreterInsertQuery::buildChainImpl(
@@ -242,6 +249,7 @@ Chain InterpreterInsertQuery::buildChainImpl(
     const StorageMetadataPtr & metadata_snapshot,
     const Block & query_sample_block,
     ThreadStatusesHolderPtr thread_status_holder,
+    ThreadGroupPtr running_group,
     std::atomic_uint64_t * elapsed_counter_ms)
 {
     ThreadStatus * thread_status = current_thread;
@@ -273,7 +281,9 @@ Chain InterpreterInsertQuery::buildChainImpl(
     }
     else
     {
-        out = buildPushingToViewsChain(table, metadata_snapshot, context_ptr, query_ptr, no_destination, thread_status_holder, elapsed_counter_ms);
+        out = buildPushingToViewsChain(table, metadata_snapshot, context_ptr,
+            query_ptr, no_destination,
+            thread_status_holder, running_group, elapsed_counter_ms);
     }
 
     /// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order.
@@ -461,9 +471,17 @@ BlockIO InterpreterInsertQuery::execute()
             pipeline = interpreter_watch.buildQueryPipeline();
         }
 
+        ThreadGroupPtr running_group;
+        if (current_thread)
+            running_group = current_thread->getThreadGroup();
+        if (!running_group)
+            running_group = std::make_shared<ThreadGroup>(getContext());
         for (size_t i = 0; i < out_streams_size; ++i)
         {
-            auto out = buildChainImpl(table, metadata_snapshot, query_sample_block, nullptr, nullptr);
+            auto out = buildChainImpl(table, metadata_snapshot, query_sample_block,
+                /* thread_status_holder= */ nullptr,
+                running_group,
+                /* elapsed_counter_ms= */ nullptr);
             out_chains.emplace_back(std::move(out));
         }
     }
diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h
index bb6509a9102..f60d6567d74 100644
--- a/src/Interpreters/InterpreterInsertQuery.h
+++ b/src/Interpreters/InterpreterInsertQuery.h
@@ -70,6 +70,7 @@ private:
         const StorageMetadataPtr & metadata_snapshot,
         const Block & query_sample_block,
         ThreadStatusesHolderPtr thread_status_holder,
+        ThreadGroupPtr running_group,
         std::atomic_uint64_t * elapsed_counter_ms);
 };
 
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index e32e2116f71..0bdd7a88851 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -195,6 +195,7 @@ Chain buildPushingToViewsChain(
     const ASTPtr & query_ptr,
     bool no_destination,
     ThreadStatusesHolderPtr thread_status_holder,
+    ThreadGroupPtr running_group,
     std::atomic_uint64_t * elapsed_counter_ms,
     const Block & live_view_header)
 {
@@ -270,15 +271,6 @@ Chain buildPushingToViewsChain(
         ASTPtr query;
         Chain out;
 
-        /// NOTE: ThreadGroup always should have context attached,
-        /// otherwise entry to the system.query_views_log will not be added
-        /// (see ThreadStatus::logToQueryViewsLog())
-        ThreadGroupPtr running_group;
-        if (current_thread)
-            running_group = current_thread->getThreadGroup();
-        if (!running_group)
-            running_group = std::make_shared<ThreadGroup>(context);
-
         /// We are creating a ThreadStatus per view to store its metrics individually
         /// Since calling ThreadStatus() changes current_thread we save it and restore it after the calls
         /// Later on, before doing any task related to a view, we'll switch to its ThreadStatus, do the work,
@@ -354,18 +346,24 @@ Chain buildPushingToViewsChain(
             runtime_stats->type = QueryViewsLogElement::ViewType::LIVE;
             query = live_view->getInnerQuery(); // Used only to log in system.query_views_log
             out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, insert_context, ASTPtr(), true, thread_status_holder, view_counter_ms, storage_header);
+                view, view_metadata_snapshot, insert_context, ASTPtr(),
+                /* no_destination= */ true,
+                thread_status_holder, running_group, view_counter_ms, storage_header);
         }
         else if (auto * window_view = dynamic_cast<StorageWindowView *>(view.get()))
         {
             runtime_stats->type = QueryViewsLogElement::ViewType::WINDOW;
             query = window_view->getMergeableQuery(); // Used only to log in system.query_views_log
             out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, insert_context, ASTPtr(), true, thread_status_holder, view_counter_ms);
+                view, view_metadata_snapshot, insert_context, ASTPtr(),
+                /* no_destination= */ true,
+                thread_status_holder, running_group, view_counter_ms);
         }
         else
             out = buildPushingToViewsChain(
-                view, view_metadata_snapshot, insert_context, ASTPtr(), false, thread_status_holder, view_counter_ms);
+                view, view_metadata_snapshot, insert_context, ASTPtr(),
+                /* no_destination= */ false,
+                thread_status_holder, running_group, view_counter_ms);
 
         views_data->views.emplace_back(ViewRuntimeData{
             std::move(query),
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h
index 76131a8df6e..a2e7e39ff23 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.h
+++ b/src/Processors/Transforms/buildPushingToViewsChain.h
@@ -61,6 +61,10 @@ Chain buildPushingToViewsChain(
     /// We could specify separate thread_status for each view.
     /// Needed mainly to collect counters separately. Should be improved.
     ThreadStatusesHolderPtr thread_status_holder,
+    /// Usually current_thread->getThreadGroup(), but sometimes ThreadStatus
+    /// may not have ThreadGroup (i.e. Buffer background flush), and in this
+    /// case it should be passed outside.
+    ThreadGroupPtr running_group,
     /// Counter to measure time spent separately per view. Should be improved.
     std::atomic_uint64_t * elapsed_counter_ms,
     /// LiveView executes query itself, it needs source block structure.

From 396f6a6421b49f775119663718d1fdc98953e48b Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Fri, 7 Apr 2023 17:07:36 +0200
Subject: [PATCH 203/277] Update src/Storages/StorageReplicatedMergeTree.cpp

Co-authored-by: Alexander Tokmakov <tavplubix@clickhouse.com>
---
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 600168d2637..1f081523ed9 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -8877,7 +8877,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP
             if (zookeeper->tryGet(lost_part_count_path, lost_part_count_str, &lost_part_count_stat))
             {
                 UInt64 lost_part_count = lost_part_count_str.empty() ? 0 : parse<UInt64>(lost_part_count_str);
-                ops.emplace_back(zkutil::makeSetRequest(lost_part_count_path, fmt::format("{}", lost_part_count + 1), lost_part_count_stat.version));
+                ops.emplace_back(zkutil::makeSetRequest(lost_part_count_path, toString(lost_part_count + 1), lost_part_count_stat.version));
             }
             else
             {

From d0a37f8798a2c87b37e31a4fa4f1b656b11d5982 Mon Sep 17 00:00:00 2001
From: lzydmxy <13126752315@163.com>
Date: Sat, 8 Apr 2023 01:44:43 +0800
Subject: [PATCH 204/277] fix test test_multiple_disks

---
 tests/integration/test_multiple_disks/test.py | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py
index 221d830f62e..f6e5d456dfb 100644
--- a/tests/integration/test_multiple_disks/test.py
+++ b/tests/integration/test_multiple_disks/test.py
@@ -90,6 +90,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "small_jbod_with_external",
@@ -100,6 +102,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "small_jbod_with_external_no_merges",
@@ -110,6 +114,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "small_jbod_with_external_no_merges",
@@ -120,6 +126,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.1,
             "prefer_not_to_merge": 1,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "one_more_small_jbod_with_external",
@@ -130,6 +138,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "one_more_small_jbod_with_external",
@@ -140,6 +150,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "jbods_with_external",
@@ -150,6 +162,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "10485760",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "jbods_with_external",
@@ -160,6 +174,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "moving_jbod_with_external",
@@ -170,6 +186,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.7,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "moving_jbod_with_external",
@@ -180,6 +198,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.7,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "default_disk_with_external",
@@ -190,6 +210,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "2097152",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "default_disk_with_external",
@@ -200,6 +222,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "20971520",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "special_warning_policy",
@@ -210,6 +234,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "special_warning_policy",
@@ -220,6 +246,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "0",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "special_warning_policy",
@@ -230,6 +258,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "1024",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
         {
             "policy_name": "special_warning_policy",
@@ -240,6 +270,8 @@ def test_system_tables(start_cluster):
             "max_data_part_size": "1024000000",
             "move_factor": 0.1,
             "prefer_not_to_merge": 0,
+            "perform_ttl_move_on_insert": 1,
+            "load_balancing": "ROUND_ROBIN",
         },
     ]
 

From b6261104e700354de56c5e6e5c4a54e0ab25bcaa Mon Sep 17 00:00:00 2001
From: Michael Kolupaev <michael.kolupaev@clickhouse.com>
Date: Thu, 6 Apr 2023 23:04:51 +0000
Subject: [PATCH 205/277] Fix bracketed-paste mode messing up password input in
 client reconnect

---
 src/Client/ClientBase.cpp       | 17 +++++++++++++----
 src/Client/LineReader.h         |  3 +++
 src/Client/ReplxxLineReader.cpp |  6 ++++++
 src/Client/ReplxxLineReader.h   |  1 +
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index 120d273aa62..a5296a143e1 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -7,6 +7,7 @@
 
 #include <base/argsToConfig.h>
 #include <base/safeExit.h>
+#include <base/scope_guard.h>
 #include <Core/Block.h>
 #include <Core/Protocol.h>
 #include <Common/DateLUT.h>
@@ -2219,9 +2220,6 @@ void ClientBase::runInteractive()
     LineReader lr(history_file, config().has("multiline"), query_extenders, query_delimiters);
 #endif
 
-    /// Enable bracketed-paste-mode so that we are able to paste multiline queries as a whole.
-    lr.enableBracketedPaste();
-
     static const std::initializer_list<std::pair<String, String>> backslash_aliases =
         {
             { "\\l", "SHOW DATABASES" },
@@ -2239,7 +2237,18 @@ void ClientBase::runInteractive()
 
     do
     {
-        auto input = lr.readLine(prompt(), ":-] ");
+        String input;
+        {
+            /// Enable bracketed-paste-mode so that we are able to paste multiline queries as a whole.
+            /// But keep it disabled outside of query input, because it breaks password input
+            /// (e.g. if we need to reconnect and show a password prompt).
+            /// (Alternatively, we could make the password input ignore the control sequences.)
+            lr.enableBracketedPaste();
+            SCOPE_EXIT({ lr.disableBracketedPaste(); });
+
+            input = lr.readLine(prompt(), ":-] ");
+        }
+
         if (input.empty())
             break;
 
diff --git a/src/Client/LineReader.h b/src/Client/LineReader.h
index 321cf41b77e..df64a3a85a9 100644
--- a/src/Client/LineReader.h
+++ b/src/Client/LineReader.h
@@ -46,7 +46,10 @@ public:
     /// clickhouse-client so that without -m flag, one can still paste multiline queries, and
     /// possibly get better pasting performance. See https://cirw.in/blog/bracketed-paste for
     /// more details.
+    /// These methods (if implemented) emit the control characters immediately, without waiting
+    /// for the next readLine() call.
     virtual void enableBracketedPaste() {}
+    virtual void disableBracketedPaste() {}
 
 protected:
     enum InputStatus
diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp
index 1979b37a94b..180be77ca1c 100644
--- a/src/Client/ReplxxLineReader.cpp
+++ b/src/Client/ReplxxLineReader.cpp
@@ -519,4 +519,10 @@ void ReplxxLineReader::enableBracketedPaste()
     rx.enable_bracketed_paste();
 }
 
+void ReplxxLineReader::disableBracketedPaste()
+{
+    bracketed_paste_enabled = false;
+    rx.disable_bracketed_paste();
+}
+
 }
diff --git a/src/Client/ReplxxLineReader.h b/src/Client/ReplxxLineReader.h
index d36a1d0f42c..5cb8e48eb86 100644
--- a/src/Client/ReplxxLineReader.h
+++ b/src/Client/ReplxxLineReader.h
@@ -19,6 +19,7 @@ public:
     ~ReplxxLineReader() override;
 
     void enableBracketedPaste() override;
+    void disableBracketedPaste() override;
 
     /// If highlight is on, we will set a flag to denote whether the last token is a delimiter.
     /// This is useful to determine the behavior of <ENTER> key when multiline is enabled.

From 18a2eb355e9eae7fbac6cb6b9b8a64505584c452 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 7 Apr 2023 20:16:25 +0200
Subject: [PATCH 206/277] fix a bug in sync replica

---
 src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 4 +++-
 src/Storages/StorageReplicatedMergeTree.cpp         | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index c8ce55f9600..623210ae04c 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1147,7 +1147,8 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange(
                 *it, /* is_successful = */ false,
                 min_unprocessed_insert_time_changed, max_processed_insert_time_changed, lock);
 
-            (*it)->removed_by_other_entry = true;
+            LogEntryPtr removing_entry = std::move(*it);   /// Make it live a bit longer
+            removing_entry->removed_by_other_entry = true;
             it = queue.erase(it);
             notifySubscribers(queue.size(), &znode_name);
             ++removed_entries;
@@ -2491,6 +2492,7 @@ ReplicatedMergeTreeQueue::addSubscriber(ReplicatedMergeTreeQueue::SubscriberCall
                 || std::find(lightweight_entries.begin(), lightweight_entries.end(), entry->type) != lightweight_entries.end())
                 out_entry_names.insert(entry->znode_name);
         }
+        LOG_TEST(log, "Waiting for {} entries to be processed: {}", out_entry_names.size(), fmt::join(out_entry_names, ", "));
     }
 
     auto it = subscribers.emplace(subscribers.end(), std::move(callback));
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index cbfe3f8cab2..734da8b85a3 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -7616,7 +7616,6 @@ bool StorageReplicatedMergeTree::waitForProcessingQueue(UInt64 max_wait_millisec
         if (removed_log_entry_id)
             wait_for_ids.erase(*removed_log_entry_id);
 
-        chassert(new_queue_size || wait_for_ids.empty());
         if (wait_for_ids.empty())
             target_entry_event.set();
     };

From 9929185eab1a11e0ee7f937db4c93ef871d74ba5 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 7 Apr 2023 20:31:03 +0200
Subject: [PATCH 207/277] fix REPLACE_RANGE

---
 src/Storages/StorageReplicatedMergeTree.cpp | 62 ++++++++++++---------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 734da8b85a3..e538b4fbe6c 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2209,35 +2209,43 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
     /// Check that we could cover whole range
     for (PartDescriptionPtr & part_desc : parts_to_add)
     {
-        if (adding_parts_active_set.getContainingPart(part_desc->new_part_info).empty())
+        if (!adding_parts_active_set.getContainingPart(part_desc->new_part_info).empty())
+            continue;
+
+        MergeTreePartInfo covering_drop_range;
+        if (queue.isGoingToBeDropped(part_desc->new_part_info, &covering_drop_range))
         {
-            /// We should enqueue missing part for check, so it will be replaced with empty one (if needed)
-            /// and we will be able to execute this REPLACE_RANGE.
-            /// However, it's quite dangerous, because part may appear in source table.
-            /// So we enqueue it for check only if no replicas of source table have part either.
-            bool need_check = true;
-            if (auto * replicated_src_table = typeid_cast<StorageReplicatedMergeTree *>(source_table.get()))
-            {
-                String src_replica = replicated_src_table->findReplicaHavingPart(part_desc->src_part_name, false);
-                if (!src_replica.empty())
-                {
-                    LOG_DEBUG(log, "Found part {} on replica {} of source table, will not check part {} required for {}",
-                              part_desc->src_part_name, src_replica, part_desc->new_part_name, entry.znode_name);
-                    need_check = false;
-                }
-            }
-
-            if (need_check)
-            {
-                LOG_DEBUG(log, "Will check part {} required for {}, because no replicas have it (including replicas of source table)",
-                          part_desc->new_part_name, entry.znode_name);
-                enqueuePartForCheck(part_desc->new_part_name);
-            }
-
-            throw Exception(ErrorCodes::NO_REPLICA_HAS_PART,
-                            "Not found part {} (or part covering it) neither source table neither remote replicas",
-                            part_desc->new_part_name);
+            LOG_WARNING(log, "Will not add part {} (while replacing {}) because it's going to be dropped (DROP_RANGE: {})",
+                        part_desc->new_part_name, entry_replace.drop_range_part_name, covering_drop_range.getPartNameForLogs());
+            continue;
         }
+
+        /// We should enqueue missing part for check, so it will be replaced with empty one (if needed)
+        /// and we will be able to execute this REPLACE_RANGE.
+        /// However, it's quite dangerous, because part may appear in source table.
+        /// So we enqueue it for check only if no replicas of source table have part either.
+        bool need_check = true;
+        if (auto * replicated_src_table = typeid_cast<StorageReplicatedMergeTree *>(source_table.get()))
+        {
+            String src_replica = replicated_src_table->findReplicaHavingPart(part_desc->src_part_name, false);
+            if (!src_replica.empty())
+            {
+                LOG_DEBUG(log, "Found part {} on replica {} of source table, will not check part {} required for {}",
+                          part_desc->src_part_name, src_replica, part_desc->new_part_name, entry.znode_name);
+                need_check = false;
+            }
+        }
+
+        if (need_check)
+        {
+            LOG_DEBUG(log, "Will check part {} required for {}, because no replicas have it (including replicas of source table)",
+                      part_desc->new_part_name, entry.znode_name);
+            enqueuePartForCheck(part_desc->new_part_name);
+        }
+
+        throw Exception(ErrorCodes::NO_REPLICA_HAS_PART,
+                        "Not found part {} (or part covering it) neither source table neither remote replicas",
+                        part_desc->new_part_name);
     }
 
     /// Filter covered parts

From 8fdc2b33260ef8c02ae79b4d5d98e83df3f233e7 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Fri, 7 Apr 2023 20:06:11 +0000
Subject: [PATCH 208/277] Perf test

---
 tests/performance/reading_from_file.xml | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 tests/performance/reading_from_file.xml

diff --git a/tests/performance/reading_from_file.xml b/tests/performance/reading_from_file.xml
new file mode 100644
index 00000000000..992f1e8acae
--- /dev/null
+++ b/tests/performance/reading_from_file.xml
@@ -0,0 +1,9 @@
+<test>
+
+<fill_query>INSERT INTO function file(reading_from_file.parquet) SELECT URL FROM test.hits LIMIT 100000 SETTINGS engine_file_truncate_on_insert=1</fill_query>
+
+<query>SELECT sum(length(base58Encode(URL))) FROM file(reading_from_file.parquet) FORMAT Null</query>
+
+<drop_query>INSERT INTO FUNCTION file(reading_from_file.parquet) SELECT * FROM numbers(0) SETTINGS engine_file_truncate_on_insert=1</drop_query>
+
+</test>

From e39ee3f0af5a2e6e8ab9888ed8e9020752870bbf Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 7 Apr 2023 22:54:49 +0200
Subject: [PATCH 209/277] suppress a bug in filesystem cache

---
 src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 85420cabb8d..5ed4987a6d5 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -34,6 +34,8 @@
 #include <Interpreters/MergeTreeTransaction.h>
 #include <Interpreters/TransactionLog.h>
 
+#include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
+
 
 namespace CurrentMetrics
 {
@@ -1525,6 +1527,10 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const
         size_t file_size = getDataPartStorage().getFileSize(TXN_VERSION_METADATA_FILE_NAME);
         auto buf = getDataPartStorage().readFile(TXN_VERSION_METADATA_FILE_NAME, ReadSettings().adjustBufferSize(file_size), file_size, std::nullopt);
 
+        /// FIXME https://github.com/ClickHouse/ClickHouse/issues/48465
+        if (dynamic_cast<CachedOnDiskReadBufferFromFile *>(buf.get()))
+            return true;
+
         readStringUntilEOF(content, *buf);
         ReadBufferFromString str_buf{content};
         VersionMetadata file;

From bf3a08dc98842288d1b433efcfd614103fc8e1cc Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 7 Apr 2023 22:58:08 +0200
Subject: [PATCH 210/277] Fix 02477_projection_materialize_and_zero_copy
 flakienss (due to index granularity randomization)

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../0_stateless/02477_projection_materialize_and_zero_copy.sql  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql b/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql
index d4c24b31da2..1845919890c 100644
--- a/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql
+++ b/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql
@@ -1,7 +1,7 @@
 DROP TABLE IF EXISTS t;
 
 create table t (c1 Int64, c2 String, c3 DateTime, c4 Int8, c5 String, c6 String, c7 String, c8 String, c9 String, c10 String, c11 String, c12 String, c13 Int8, c14 Int64, c15 String, c16 String, c17 String, c18 Int64, c19 Int64, c20 Int64) engine ReplicatedMergeTree('/clickhouse/test/{database}/test_02477', '1') order by c18
-SETTINGS allow_remote_fs_zero_copy_replication=1;
+SETTINGS allow_remote_fs_zero_copy_replication=1, index_granularity=8092, index_granularity_bytes='10Mi';
 
 insert into t (c1, c18) select number, -number from numbers(2000000);
 

From 3633f0b2f3d479b884d1e70c2d4bcb0e817d2d19 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 7 Apr 2023 23:50:09 +0200
Subject: [PATCH 211/277] fix

---
 .github/workflows/backport_branches.yml      | 7 +++++++
 .github/workflows/master.yml                 | 7 +++++++
 .github/workflows/release_branches.yml       | 7 +++++++
 docs/en/development/developer-instruction.md | 2 +-
 docs/ru/development/developer-instruction.md | 2 +-
 5 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 0d81a7b303c..d69168b01ee 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -349,6 +349,13 @@ jobs:
         with:
           clear-repository: true
           submodules: true
+      - name: Apply sparse checkout for contrib # in order to check that it doesn't break build
+        run: |
+          rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
+          git -C "$GITHUB_WORKSPACE" checkout .  && echo 'restored'
+          "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
+          du -hs "$GITHUB_WORKSPACE/contrib" ||:
+          find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
       - name: Build
         run: |
           sudo rm -fr "$TEMP_PATH"
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index ecd5b85d320..1182481c897 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -487,6 +487,13 @@ jobs:
         with:
           clear-repository: true
           submodules: true
+      - name: Apply sparse checkout for contrib # in order to check that it doesn't break build
+        run: |
+          rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
+          git -C "$GITHUB_WORKSPACE" checkout .  && echo 'restored'
+          "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
+          du -hs "$GITHUB_WORKSPACE/contrib" ||:
+          find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
       - name: Build
         run: |
           sudo rm -fr "$TEMP_PATH"
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 1282dbef50b..21284815583 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -406,6 +406,13 @@ jobs:
         with:
           clear-repository: true
           submodules: true
+      - name: Apply sparse checkout for contrib # in order to check that it doesn't break build
+        run: |
+          rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
+          git -C "$GITHUB_WORKSPACE" checkout .  && echo 'restored'
+          "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
+          du -hs "$GITHUB_WORKSPACE/contrib" ||:
+          find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
       - name: Build
         run: |
           sudo rm -fr "$TEMP_PATH"
diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index ea98b2da5e6..6bcdadeb1eb 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -39,7 +39,7 @@ Next, you need to download the source files onto your working machine. This is c
 
 In the command line terminal run:
 
-    git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git
+    git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git
     cd ClickHouse
 
 Or (if you'd like to use sparse checkout for submodules and avoid checking out unneeded files):
diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md
index c208439678a..7294bc2ae87 100644
--- a/docs/ru/development/developer-instruction.md
+++ b/docs/ru/development/developer-instruction.md
@@ -41,7 +41,7 @@ ClickHouse не работает и не собирается на 32-битны
 
 Выполните в терминале:
 
-    git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git
+    git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git
     cd ClickHouse
 
 Или (если вы хотите использовать sparse checkout для submodules):

From ec60a1069428e7a127e1c6d440711abd26c980e7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 8 Apr 2023 00:54:05 +0200
Subject: [PATCH 212/277] Better exception message for ZSTD

---
 src/IO/ZstdDeflatingAppendableWriteBuffer.cpp |  8 ++++----
 src/IO/ZstdDeflatingWriteBuffer.cpp           |  2 +-
 src/IO/ZstdInflatingReadBuffer.cpp            | 15 +++++++++++----
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp
index f8c4d0e2bac..be739c0e654 100644
--- a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp
+++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp
@@ -23,11 +23,11 @@ ZstdDeflatingAppendableWriteBuffer::ZstdDeflatingAppendableWriteBuffer(
 {
     cctx = ZSTD_createCCtx();
     if (cctx == nullptr)
-        throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder init failed: zstd version: {}", ZSTD_VERSION_STRING);
+        throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD stream encoder init failed: ZSTD version: {}", ZSTD_VERSION_STRING);
     size_t ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compression_level);
     if (ZSTD_isError(ret))
         throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED,
-                        "zstd stream encoder option setting failed: error code: {}; zstd version: {}",
+                        "ZSTD stream encoder option setting failed: error code: {}; zstd version: {}",
                         ret, ZSTD_VERSION_STRING);
 
     input = {nullptr, 0, 0};
@@ -64,7 +64,7 @@ void ZstdDeflatingAppendableWriteBuffer::nextImpl()
             if (ZSTD_isError(compression_result))
                 throw Exception(
                                 ErrorCodes::ZSTD_ENCODER_FAILED,
-                                "Zstd stream encoding failed: error code: {}; zstd version: {}",
+                                "ZSTD stream decoding failed: error code: {}; ZSTD version: {}",
                                 ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING);
 
             first_write = false;
@@ -138,7 +138,7 @@ void ZstdDeflatingAppendableWriteBuffer::finalizeBefore()
     {
         if (ZSTD_isError(remaining))
             throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED,
-                            "Zstd stream encoder end failed: error: '{}' zstd version: {}",
+                            "ZSTD stream encoder end failed: error: '{}' ZSTD version: {}",
                             ZSTD_getErrorName(remaining), ZSTD_VERSION_STRING);
 
         remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end);
diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp
index a12b59b80f5..c6d2ffc39f9 100644
--- a/src/IO/ZstdDeflatingWriteBuffer.cpp
+++ b/src/IO/ZstdDeflatingWriteBuffer.cpp
@@ -63,7 +63,7 @@ void ZstdDeflatingWriteBuffer::nextImpl()
             if (ZSTD_isError(compression_result))
                 throw Exception(
                                 ErrorCodes::ZSTD_ENCODER_FAILED,
-                                "Zstd stream encoding failed: error: '{}'; zstd version: {}",
+                                "ZSTD stream encoding failed: error: '{}'; zstd version: {}",
                                 ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING);
 
             out->position() = out->buffer().begin() + output.pos;
diff --git a/src/IO/ZstdInflatingReadBuffer.cpp b/src/IO/ZstdInflatingReadBuffer.cpp
index 6956bdb6710..6f5c8b4dc71 100644
--- a/src/IO/ZstdInflatingReadBuffer.cpp
+++ b/src/IO/ZstdInflatingReadBuffer.cpp
@@ -1,4 +1,5 @@
 #include <IO/ZstdInflatingReadBuffer.h>
+#include <zstd_errors.h>
 
 
 namespace DB
@@ -56,11 +57,17 @@ bool ZstdInflatingReadBuffer::nextImpl()
 
         /// Decompress data and check errors.
         size_t ret = ZSTD_decompressStream(dctx, &output, &input);
-        if (ZSTD_isError(ret))
+        if (ZSTD_getErrorCode(ret))
+        {
             throw Exception(
-                    ErrorCodes::ZSTD_DECODER_FAILED,
-                    "Zstd stream encoding failed: error '{}'; zstd version: {}",
-                    ZSTD_getErrorName(ret), ZSTD_VERSION_STRING);
+                ErrorCodes::ZSTD_DECODER_FAILED,
+                "ZSTD stream decoding failed: error '{}'{}; ZSTD version: {}",
+                ZSTD_getErrorName(ret),
+                ZSTD_error_frameParameter_windowTooLarge == ret
+                    ? ". You can increase the maximum window size with the 'zstd_window_log_max' setting in ClickHouse. Example: 'SET zstd_window_log_max = 31'"
+                    : "",
+                ZSTD_VERSION_STRING);
+        }
 
         /// Check that something has changed after decompress (input or output position)
         assert(in->eof() || output.pos > 0 || in->position() < in->buffer().begin() + input.pos);

From 2b00c8cb9b4b62f4b5d251e7dcb3b54e33a43754 Mon Sep 17 00:00:00 2001
From: Artem Pershin <artem.pershin@genestack.com>
Date: Sat, 8 Apr 2023 18:39:04 +0300
Subject: [PATCH 213/277] Fix typo in word acquired, in comment for
 lockForAlter method

---
 src/Storages/IStorage.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 426d64f41ee..351e147e6cd 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -275,7 +275,7 @@ public:
     /// acquiring the lock instead of raising a TABLE_IS_DROPPED exception
     TableLockHolder tryLockForShare(const String & query_id, const std::chrono::milliseconds & acquire_timeout);
 
-    /// Lock table for alter. This lock must be acuqired in ALTER queries to be
+    /// Lock table for alter. This lock must be acquired in ALTER queries to be
     /// sure, that we execute only one simultaneous alter. Doesn't affect share lock.
     using AlterLockHolder = std::unique_lock<std::timed_mutex>;
     AlterLockHolder lockForAlter(const std::chrono::milliseconds & acquire_timeout);

From 0dc3193e6bf3a29a50242b1743ef1a652570adcc Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sat, 8 Apr 2023 17:29:08 +0000
Subject: [PATCH 214/277] Fix 02293_formats_json_columns

---
 .../02293_formats_json_columns.reference         | 14 +++++++-------
 .../0_stateless/02293_formats_json_columns.sh    | 16 ++++++++--------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/tests/queries/0_stateless/02293_formats_json_columns.reference b/tests/queries/0_stateless/02293_formats_json_columns.reference
index f59f02ad42b..d3fb006a73f 100644
--- a/tests/queries/0_stateless/02293_formats_json_columns.reference
+++ b/tests/queries/0_stateless/02293_formats_json_columns.reference
@@ -86,18 +86,18 @@ d	Nullable(String)
 \N	\N	3	\N
 \N	\N	\N	String
 OK
-3	
-2	
 1	
+2	
+3	
 c1	Nullable(Int64)					
 c2	Nullable(Int64)					
 c3	Nullable(String)					
 1	1	\N
-2	2	\N
-3	3	\N
-1	\N	\N
-2	\N	\N
-3	\N	\N
 1	2	String
+1	\N	\N
+2	2	\N
+2	\N	\N
+3	3	\N
+3	\N	\N
 OK
 OK
diff --git a/tests/queries/0_stateless/02293_formats_json_columns.sh b/tests/queries/0_stateless/02293_formats_json_columns.sh
index 7a21f8d9bab..0aaf2abfc45 100755
--- a/tests/queries/0_stateless/02293_formats_json_columns.sh
+++ b/tests/queries/0_stateless/02293_formats_json_columns.sh
@@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+USER_FILES_PATH=$(clickhouse client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
 
 DATA_FILE=$USER_FILES_PATH/data_02293
 
@@ -17,13 +17,13 @@ echo "JSONColumns"
 $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns"
 $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONColumns" > $DATA_FILE
 $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)"
-$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)"
+$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns) order by a"
 
 echo "JSONCompactColumns"
 $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns"
 $CLICKHOUSE_CLIENT -q "select * from test_02293 order by a format JSONCompactColumns" > $DATA_FILE
 $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)"
-$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)"
+$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns) order by c1, c2, c3"
 
 echo "JSONColumnsWithMetadata"
 $CLICKHOUSE_CLIENT -q "select sum(a) as sum, avg(a) as avg from test_02293 group by a % 4 with totals order by tuple(sum, avg) format JSONColumnsWithMetadata" --extremes=1 | grep -v "elapsed"
@@ -49,9 +49,9 @@ echo '
 ' > $DATA_FILE
 
 $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONColumns)"
-$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns)"
-$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
-$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') settings input_format_skip_unknown_fields=1"
+$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns) order by b, a, c, d"
+$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') order by a, t settings input_format_skip_unknown_fields=0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
+$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONColumns, 'a UInt32, t String') order by a, t settings input_format_skip_unknown_fields=1"
 
 echo '
 [
@@ -75,8 +75,8 @@ echo '
 ' > $DATA_FILE
 
 $CLICKHOUSE_CLIENT -q "desc file(data_02293, JSONCompactColumns)"
-$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns)"
-$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns, 'a UInt32, t UInt32')" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
+$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns) order by c1, c2, c3"
+$CLICKHOUSE_CLIENT -q "select * from file(data_02293, JSONCompactColumns, 'a UInt32, t UInt32') order by a, t" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
 
 echo '
 {

From 32ac23803f2904187b0a30cdbe7529012d1b12cf Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sat, 8 Apr 2023 19:02:18 +0000
Subject: [PATCH 215/277] Fix test which can become flaky due to file()
 parallelization

---
 ...decompression_with_escape_sequence_at_the_end_of_buffer.sh | 4 ++--
 tests/queries/0_stateless/02051_symlinks_to_user_files.sh     | 2 +-
 tests/queries/0_stateless/02211_jsonl_format_extension.sql    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh
index 47cf6e06b48..2f8d8f06dee 100755
--- a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh
+++ b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh
@@ -15,8 +15,8 @@ ${CLICKHOUSE_CLIENT} --multiline --multiquery --query "
 set min_chunk_bytes_for_parallel_parsing=10485760;
 set max_read_buffer_size = 65536;
 set input_format_parallel_parsing = 0;
-select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null;
+select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') order by foo limit 30 format Null;
 set input_format_parallel_parsing = 1;
-select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null;
+select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') order by foo limit 30 format Null;
 "
 
diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh
index dfdc71e0f0b..22d6d2938cd 100755
--- a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh
+++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh
@@ -28,5 +28,5 @@ function cleanup()
 trap cleanup EXIT
 
 ${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'";
-${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String')";
+${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String') order by a";
 
diff --git a/tests/queries/0_stateless/02211_jsonl_format_extension.sql b/tests/queries/0_stateless/02211_jsonl_format_extension.sql
index 08fff5a11f5..907cdd5cf92 100644
--- a/tests/queries/0_stateless/02211_jsonl_format_extension.sql
+++ b/tests/queries/0_stateless/02211_jsonl_format_extension.sql
@@ -1,3 +1,3 @@
 -- Tags: no-fasttest
 insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10);
-select * from file('data.jsonl');
+select * from file('data.jsonl') order by x;

From 0fbb05bf55ce84d1f998b33816f9a21c22456386 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sat, 8 Apr 2023 19:15:18 +0000
Subject: [PATCH 216/277] Fix 02286_mysql_dump_input_format

---
 .../02286_mysql_dump_input_format.reference   |  2 +-
 .../02286_mysql_dump_input_format.sh          | 92 +++++++++----------
 2 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/tests/queries/0_stateless/02286_mysql_dump_input_format.reference b/tests/queries/0_stateless/02286_mysql_dump_input_format.reference
index a736358b9b7..25a78651e33 100644
--- a/tests/queries/0_stateless/02286_mysql_dump_input_format.reference
+++ b/tests/queries/0_stateless/02286_mysql_dump_input_format.reference
@@ -12,13 +12,13 @@
 4	\N
 5	\N
 6	7
+7	6
 \N	1
 \N	2
 \N	3
 \N	3
 \N	4
 \N	5
-7	6
 OK
 1	
 2	
diff --git a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh
index 891734e9ad3..1139c1ea68c 100755
--- a/tests/queries/0_stateless/02286_mysql_dump_input_format.sh
+++ b/tests/queries/0_stateless/02286_mysql_dump_input_format.sh
@@ -9,140 +9,140 @@ USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonex
 
 cp $CURDIR/data_mysql_dump/dump*.sql $USER_FILES_PATH
 
-$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)')"
-$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'a Nullable(Int32), b Nullable(Int32)') settings input_format_mysql_dump_map_column_names = 0"
-$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'y Nullable(Int32), x Nullable(Int32)') settings input_format_mysql_dump_map_column_names = 1"
-$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') settings input_format_skip_unknown_fields = 0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
-$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') settings input_format_skip_unknown_fields = 1"
+$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), y Nullable(Int32)') order by x, y"
+$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'a Nullable(Int32), b Nullable(Int32)') order by a, b settings input_format_mysql_dump_map_column_names = 0"
+$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'y Nullable(Int32), x Nullable(Int32)') order by y, x settings input_format_mysql_dump_map_column_names = 1"
+$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') order by x, z settings input_format_skip_unknown_fields = 0" 2>&1 | grep -F -q 'INCORRECT_DATA' && echo 'OK' || echo 'FAIL'
+$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32), z String') order by x, z settings input_format_skip_unknown_fields = 1"
 
 echo "dump1"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump1.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"  2>&1 | grep -F -q 'Cannot extract table structure' && echo 'OK' || echo 'FAIL'
 $CLICKHOUSE_CLIENT -q "select * from file(dump1.sql, MySQLDump, 'x Nullable(Int32)') settings input_format_mysql_dump_table_name='test 3'" 2>&1 | grep -F -q 'EMPTY_DATA_PASSED' && echo 'OK' || echo 'FAIL'
 
 echo "dump2"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump2.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 
 echo "dump3"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump3.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump3.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
 
 echo "dump4"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump4.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump4.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
 
 echo "dump5"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump5.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump5.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 
 echo "dump6"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump6.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
 
 echo "dump7"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings max_threads=1"
+$CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 $CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
 $CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump7.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
 
 echo "dump8"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump8.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump8.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump8.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump8.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump2.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 
 echo "dump9"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump9.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump9.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 
 echo "dump10"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump10.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump10.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 
 echo "dump11"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump11.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump11.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 
 
 echo "dump12"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump12.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
 
 echo "dump13"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump13.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump13.sql, MySQLDump) settings input_format_mysql_dump_table_name='fruits', max_threads=1"
 
 echo "dump14"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump14.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
 
 echo "dump15"
 
 $CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump)"
-$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump)"
+$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2', max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
-$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3'"
+$CLICKHOUSE_CLIENT -q "select * from file(dump15.sql, MySQLDump) settings input_format_mysql_dump_table_name='test 3', max_threads=1"
 
 rm $USER_FILES_PATH/dump*.sql

From bcb913e7b362191f36895617fbb97b923264c9c7 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sat, 8 Apr 2023 20:13:24 +0000
Subject: [PATCH 217/277] A try to prevent possible tests flakyness due to
 file() parallelization

---
 .../0_stateless/02293_arrow_dictionary_indexes.sql        | 2 +-
 .../0_stateless/02314_csv_tsv_skip_first_lines.sql        | 4 ++--
 .../queries/0_stateless/02383_arrow_dict_special_cases.sh | 8 ++++----
 .../0_stateless/02455_one_row_from_csv_memory_usage.sh    | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql
index 3ff6a5ffbb3..ec68d1a4443 100644
--- a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql
+++ b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql
@@ -1,3 +1,3 @@
 -- Tags: no-fasttest
 insert into function file(02293_data.arrow) select toLowCardinality(toString(number)) from numbers(300) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1;
-select * from file(02293_data.arrow);
+select * from file(02293_data.arrow) settings max_threads=1;
diff --git a/tests/queries/0_stateless/02314_csv_tsv_skip_first_lines.sql b/tests/queries/0_stateless/02314_csv_tsv_skip_first_lines.sql
index 4a0cef35310..a569b6e9b84 100644
--- a/tests/queries/0_stateless/02314_csv_tsv_skip_first_lines.sql
+++ b/tests/queries/0_stateless/02314_csv_tsv_skip_first_lines.sql
@@ -3,10 +3,10 @@
 insert into function file(data_02314.csv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1;
 insert into function file(data_02314.csv) select number, number + 1, number + 2 from numbers(5);
 desc file(data_02314.csv) settings input_format_csv_skip_first_lines=5;
-select * from file(data_02314.csv) settings input_format_csv_skip_first_lines=5;
+select * from file(data_02314.csv) order by c1 settings input_format_csv_skip_first_lines=5;
 
 insert into function file(data_02314.tsv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1;
 insert into function file(data_02314.tsv) select number, number + 1, number + 2 from numbers(5);
 desc file(data_02314.tsv) settings input_format_tsv_skip_first_lines=5;
-select * from file(data_02314.tsv) settings input_format_tsv_skip_first_lines=5;
+select * from file(data_02314.tsv) order by c1 settings input_format_tsv_skip_first_lines=5;
 
diff --git a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh
index 86f95873f14..bab0b57f116 100755
--- a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh
+++ b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh
@@ -13,17 +13,17 @@ cp $CURDIR/data_arrow/corrupted.arrow $USER_FILES_PATH/test_02383/
 cp $CURDIR/data_arrow/dict_with_nulls.arrow $USER_FILES_PATH/test_02383/
 
 $CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary1.arrow')"
-$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary1.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary1.arrow') settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary2.arrow')"
-$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary2.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary2.arrow') settings max_threads=1"
 $CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary3.arrow')"
-$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary3.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary3.arrow') settings max_threads=1"
 
 $CLICKHOUSE_CLIENT -q "desc file('test_02383/corrupted.arrow')"
 $CLICKHOUSE_CLIENT -q "select * from file('test_02383/corrupted.arrow')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo OK || echo FAIL
 
 $CLICKHOUSE_CLIENT -q "desc file('test_02383/dict_with_nulls.arrow')"
-$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dict_with_nulls.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dict_with_nulls.arrow') settings max_threads=1"
 
 
 rm -rf $USER_FILES_PATH/test_02383
diff --git a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
index 7506e78455d..9076d7a8a5f 100755
--- a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
+++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
@@ -7,5 +7,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
 cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $user_files_path/
 
-${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=1000000000"
-${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') LIMIT 1 settings max_memory_usage=100000000"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=1000000000"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=100000000"

From b319271d59df6544ab356b465ea1a69ade01e7b9 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Sat, 8 Apr 2023 22:50:24 +0200
Subject: [PATCH 218/277] Remove misleading comment

It is no longer "significant" after `NOEXCEPT_SCOPE` macro rework #39229
---
 .../MergeTree/MergeTreeBackgroundExecutor.cpp         | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
index 65a796456bf..f2efbb2ba8d 100644
--- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
@@ -217,13 +217,10 @@ void MergeTreeBackgroundExecutor<Queue>::routine(TaskRuntimeDataPtr item)
 
         if (item->is_currently_deleting)
         {
-            /// This is significant to order the destructors.
-            {
-                NOEXCEPT_SCOPE({
-                    ALLOW_ALLOCATIONS_IN_SCOPE;
-                    item->task.reset();
-                });
-            }
+            NOEXCEPT_SCOPE({
+                ALLOW_ALLOCATIONS_IN_SCOPE;
+                item->task.reset();
+            });
             item->is_done.set();
             item = nullptr;
             return;

From d80364ffa0bebd86e25dcc89bf3be325da26830a Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sat, 8 Apr 2023 22:51:12 +0000
Subject: [PATCH 219/277] Fix tests: truncate output file on insert

---
 tests/queries/0_stateless/02187_msg_pack_uuid.sh           | 6 +++---
 tests/queries/0_stateless/02211_jsonl_format_extension.sql | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02187_msg_pack_uuid.sh b/tests/queries/0_stateless/02187_msg_pack_uuid.sh
index 9be92d66790..f04ef09a8c8 100755
--- a/tests/queries/0_stateless/02187_msg_pack_uuid.sh
+++ b/tests/queries/0_stateless/02187_msg_pack_uuid.sh
@@ -5,13 +5,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str'"
+$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str', engine_file_truncate_on_insert=1"
 $CLICKHOUSE_CLIENT -q "select * from file('uuid_str.msgpack', 'MsgPack', 'uuid UUID')"
 
-$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin'"
+$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin', engine_file_truncate_on_insert=1"
 $CLICKHOUSE_CLIENT -q "select * from file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID')"
 
-$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext'"
+$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext', engine_file_truncate_on_insert=1"
 $CLICKHOUSE_CLIENT -q "select * from file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID')"
 $CLICKHOUSE_CLIENT -q "select c1, toTypeName(c1) from file('uuid_ext.msgpack') settings input_format_msgpack_number_of_columns=1"
 
diff --git a/tests/queries/0_stateless/02211_jsonl_format_extension.sql b/tests/queries/0_stateless/02211_jsonl_format_extension.sql
index 907cdd5cf92..a95b19f4e3b 100644
--- a/tests/queries/0_stateless/02211_jsonl_format_extension.sql
+++ b/tests/queries/0_stateless/02211_jsonl_format_extension.sql
@@ -1,3 +1,3 @@
 -- Tags: no-fasttest
-insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10);
+insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10) SETTINGS engine_file_truncate_on_insert=1;
 select * from file('data.jsonl') order by x;

From 09ea79aaf7955a9db36126e9a5f18e93b98704ec Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 9 Apr 2023 03:04:26 +0200
Subject: [PATCH 220/277] Add support for {server_uuid} macro

---
 programs/keeper/Keeper.cpp                     |  1 -
 programs/server/Server.cpp                     |  2 +-
 src/Common/Exception.h                         |  1 -
 src/Common/Macros.cpp                          | 12 +++++++++++-
 src/Daemon/BaseDaemon.h                        |  1 -
 src/Dictionaries/MongoDBDictionarySource.cpp   |  1 -
 src/Disks/ObjectStorages/S3/registerDiskS3.cpp |  9 +++------
 src/IO/HTTPCommon.cpp                          |  2 --
 src/IO/ReadWriteBufferFromHTTP.h               |  1 -
 src/Processors/Sources/MongoDBSource.cpp       |  1 -
 src/Storages/StorageMongoDB.cpp                |  1 -
 11 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 266b363eb47..3853c955171 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -17,7 +17,6 @@
 #include <Poco/Net/TCPServerParams.h>
 #include <Poco/Net/TCPServer.h>
 #include <Poco/Util/HelpFormatter.h>
-#include <Poco/Version.h>
 #include <Poco/Environment.h>
 #include <sys/stat.h>
 #include <pwd.h>
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 9ef9f704f61..164e1ce14e5 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -981,7 +981,7 @@ try
 
     StatusFile status{path / "status", StatusFile::write_full_info};
 
-    DB::ServerUUID::load(path / "uuid", log);
+    ServerUUID::load(path / "uuid", log);
 
     /// Try to increase limit on number of open files.
     {
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index 8e50c1114f4..170e0d32b3c 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -4,7 +4,6 @@
 #include <vector>
 #include <memory>
 
-#include <Poco/Version.h>
 #include <Poco/Exception.h>
 
 #include <base/defines.h>
diff --git a/src/Common/Macros.cpp b/src/Common/Macros.cpp
index e5d4be446c1..18e65bbaede 100644
--- a/src/Common/Macros.cpp
+++ b/src/Common/Macros.cpp
@@ -1,8 +1,9 @@
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/Macros.h>
 #include <Common/Exception.h>
-#include <IO/WriteHelpers.h>
 #include <Common/logger_useful.h>
+#include <Core/ServerUUID.h>
+#include <IO/WriteHelpers.h>
 
 
 namespace DB
@@ -105,6 +106,15 @@ String Macros::expand(const String & s,
             res += toString(info.table_id.uuid);
             info.expanded_uuid = true;
         }
+        else if (macro_name == "server_uuid")
+        {
+            auto uuid = ServerUUID::get();
+            if (UUIDHelpers::Nil == uuid)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "Macro {server_uuid} expanded to zero, which means the UUID is not initialized (most likely it's not a server application)");
+            res += toString(uuid);
+            info.expanded_other = true;
+        }
         else if (info.shard && macro_name == "shard")
         {
             res += *info.shard;
diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h
index d28f9403c16..f90f403364b 100644
--- a/src/Daemon/BaseDaemon.h
+++ b/src/Daemon/BaseDaemon.h
@@ -15,7 +15,6 @@
 #include <Poco/Util/Application.h>
 #include <Poco/Util/ServerApplication.h>
 #include <Poco/Net/SocketAddress.h>
-#include <Poco/Version.h>
 #include <base/types.h>
 #include <Common/logger_useful.h>
 #include <base/getThreadId.h>
diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp
index a9555a94304..922e1e71bbb 100644
--- a/src/Dictionaries/MongoDBDictionarySource.cpp
+++ b/src/Dictionaries/MongoDBDictionarySource.cpp
@@ -67,7 +67,6 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
 #include <Poco/MongoDB/ObjectId.h>
 #include <Poco/URI.h>
 #include <Poco/Util/AbstractConfiguration.h>
-#include <Poco/Version.h>
 
 // only after poco
 // naming conflict:
diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
index 1c192a0d89c..70f39d893f7 100644
--- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
+++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
@@ -8,7 +8,6 @@
 
 #if USE_AWS_S3
 
-#include <aws/core/client/DefaultRetryStrategy.h>
 #include <base/getFQDNOrHostName.h>
 
 #include <Disks/DiskLocal.h>
@@ -19,9 +18,7 @@
 #include <Disks/ObjectStorages/S3/diskSettings.h>
 #include <Disks/ObjectStorages/MetadataStorageFromDisk.h>
 #include <Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h>
-#include <IO/S3Common.h>
 
-#include <Storages/StorageS3Settings.h>
 #include <Core/ServerUUID.h>
 #include <Common/Macros.h>
 
@@ -87,10 +84,10 @@ public:
 private:
     static String getServerUUID()
     {
-        DB::UUID server_uuid = DB::ServerUUID::get();
-        if (server_uuid == DB::UUIDHelpers::Nil)
+        UUID server_uuid = ServerUUID::get();
+        if (server_uuid == UUIDHelpers::Nil)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Server UUID is not initialized");
-        return DB::toString(server_uuid);
+        return toString(server_uuid);
     }
 };
 
diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index 551ce797757..9eabe3573f4 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -7,8 +7,6 @@
 #include <Common/ProfileEvents.h>
 #include <Common/SipHash.h>
 
-#include <Poco/Version.h>
-
 #include "config.h"
 
 #if USE_SSL
diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h
index 784110f735e..28f7a7f600b 100644
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@@ -21,7 +21,6 @@
 #include <Poco/Net/HTTPResponse.h>
 #include <Poco/URI.h>
 #include <Poco/URIStreamFactory.h>
-#include <Poco/Version.h>
 #include <Common/DNSResolver.h>
 #include <Common/RemoteHostFilter.h>
 #include "config.h"
diff --git a/src/Processors/Sources/MongoDBSource.cpp b/src/Processors/Sources/MongoDBSource.cpp
index a8bfefdf8a6..9e50cdcf6ab 100644
--- a/src/Processors/Sources/MongoDBSource.cpp
+++ b/src/Processors/Sources/MongoDBSource.cpp
@@ -15,7 +15,6 @@
 #include <Common/quoteString.h>
 #include <base/range.h>
 #include <Poco/URI.h>
-#include <Poco/Version.h>
 
 // only after poco
 // naming conflict:
diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp
index 0bad4ab8bf0..63b8c2d00a1 100644
--- a/src/Storages/StorageMongoDB.cpp
+++ b/src/Storages/StorageMongoDB.cpp
@@ -7,7 +7,6 @@
 #include <Poco/MongoDB/Connection.h>
 #include <Poco/MongoDB/Cursor.h>
 #include <Poco/MongoDB/Database.h>
-#include <Poco/Version.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Core/Settings.h>
 #include <Interpreters/Context.h>

From 6292a08c9c98dd85d28e5a7f10759a858c752421 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 9 Apr 2023 03:25:38 +0200
Subject: [PATCH 221/277] Add a test

---
 src/Common/Macros.cpp                             |  8 +++++---
 .../0_stateless/02711_server_uuid_macro.reference |  1 +
 .../0_stateless/02711_server_uuid_macro.sql       | 15 +++++++++++++++
 3 files changed, 21 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02711_server_uuid_macro.reference
 create mode 100644 tests/queries/0_stateless/02711_server_uuid_macro.sql

diff --git a/src/Common/Macros.cpp b/src/Common/Macros.cpp
index 18e65bbaede..f43fed6c499 100644
--- a/src/Common/Macros.cpp
+++ b/src/Common/Macros.cpp
@@ -12,6 +12,8 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int SYNTAX_ERROR;
+    extern const int BAD_ARGUMENTS;
+    extern const int NO_ELEMENTS_IN_CONFIG;
 }
 
 Macros::Macros(const Poco::Util::AbstractConfiguration & config, const String & root_key, Poco::Logger * log)
@@ -96,7 +98,7 @@ String Macros::expand(const String & s,
         else if (macro_name == "uuid" && !info.expand_special_macros_only)
         {
             if (info.table_id.uuid == UUIDHelpers::Nil)
-                throw Exception(ErrorCodes::SYNTAX_ERROR, "Macro 'uuid' and empty arguments of ReplicatedMergeTree "
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Macro 'uuid' and empty arguments of ReplicatedMergeTree "
                                 "are supported only for ON CLUSTER queries with Atomic database engine");
             /// For ON CLUSTER queries we don't want to require all macros definitions in initiator's config.
             /// However, initiator must check that for cross-replication cluster zookeeper_path does not contain {uuid} macro.
@@ -135,7 +137,7 @@ String Macros::expand(const String & s,
             info.has_unknown = true;
         }
         else
-            throw Exception(ErrorCodes::SYNTAX_ERROR, "No macro '{}' in config while processing substitutions in "
+            throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No macro '{}' in config while processing substitutions in "
                             "'{}' at '{}' or macro is not supported here", macro_name, s, toString(begin));
 
         pos = end + 1;
@@ -152,7 +154,7 @@ String Macros::getValue(const String & key) const
 {
     if (auto it = macros.find(key); it != macros.end())
         return it->second;
-    throw Exception(ErrorCodes::SYNTAX_ERROR, "No macro {} in config", key);
+    throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No macro {} in config", key);
 }
 
 
diff --git a/tests/queries/0_stateless/02711_server_uuid_macro.reference b/tests/queries/0_stateless/02711_server_uuid_macro.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02711_server_uuid_macro.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02711_server_uuid_macro.sql b/tests/queries/0_stateless/02711_server_uuid_macro.sql
new file mode 100644
index 00000000000..f708156c0ae
--- /dev/null
+++ b/tests/queries/0_stateless/02711_server_uuid_macro.sql
@@ -0,0 +1,15 @@
+DROP TABLE IF EXISTS test;
+
+-- You can create a table with the {server_uuid} substituted.
+CREATE TABLE test (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', 'replica-{server_uuid}') ORDER BY x;
+
+-- The server UUID is correctly substituted.
+SELECT engine_full LIKE ('%replica-' || serverUUID()::String || '%') FROM system.tables WHERE database = currentDatabase() AND name = 'test';
+
+-- An attempt to create a second table with the same UUID results in error.
+CREATE TABLE test2 (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', 'replica-{server_uuid}') ORDER BY x; -- { serverError REPLICA_ALREADY_EXISTS }
+
+-- The macro {server_uuid} is special, not a configuration-type macro. It's normal that it is inaccessible with the getMacro function.
+SELECT getMacro('server_uuid'); -- { serverError NO_ELEMENTS_IN_CONFIG }
+
+DROP TABLE test NO DELAY;

From d23cd018ec8196184a6a275004559202bef4b9ef Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 9 Apr 2023 07:46:27 +0200
Subject: [PATCH 222/277] Update test

---
 .../0_stateless/01148_zookeeper_path_macros_unfolding.sql     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql
index 7234cee96e0..505c406c2cc 100644
--- a/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql
+++ b/tests/queries/0_stateless/01148_zookeeper_path_macros_unfolding.sql
@@ -12,7 +12,7 @@ DETACH TABLE rmt1;
 ATTACH TABLE rmt1;
 SHOW CREATE TABLE rmt1;
 
-CREATE TABLE rmt (n UInt64, s String) ENGINE = ReplicatedMergeTree('{default_path_test}{uuid}', '{default_name_test}') ORDER BY n;    -- { serverError 62 }
+CREATE TABLE rmt (n UInt64, s String) ENGINE = ReplicatedMergeTree('{default_path_test}{uuid}', '{default_name_test}') ORDER BY n;    -- { serverError 36 }
 CREATE TABLE rmt (n UInt64, s String) ENGINE = ReplicatedMergeTree('{default_path_test}test_01148', '{default_name_test}') ORDER BY n;
 SHOW CREATE TABLE rmt;
 RENAME TABLE rmt TO rmt2;   -- { serverError 48 }
@@ -24,7 +24,7 @@ SET distributed_ddl_output_mode='none';
 DROP DATABASE IF EXISTS test_01148_atomic;
 CREATE DATABASE test_01148_atomic ENGINE=Atomic;
 CREATE TABLE test_01148_atomic.rmt2 ON CLUSTER test_shard_localhost (n int, PRIMARY KEY n) ENGINE=ReplicatedMergeTree;
-CREATE TABLE test_01148_atomic.rmt3 AS test_01148_atomic.rmt2; -- { serverError 62 }
+CREATE TABLE test_01148_atomic.rmt3 AS test_01148_atomic.rmt2; -- { serverError 36 }
 CREATE TABLE test_01148_atomic.rmt4 ON CLUSTER test_shard_localhost AS test_01148_atomic.rmt2;
 SHOW CREATE TABLE test_01148_atomic.rmt2;
 RENAME TABLE test_01148_atomic.rmt4 to test_01148_atomic.rmt3;

From c3c07c05ea23e8f9f175c76cc7accbcdf14b50f9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 9 Apr 2023 09:36:20 +0300
Subject: [PATCH 223/277] Update 02207_allow_plaintext_and_no_password.sh

---
 .../0_stateless/02207_allow_plaintext_and_no_password.sh  | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh
index 693f1d817e3..0345a0e6394 100755
--- a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh
+++ b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh
@@ -1,11 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-parallel, no-fasttest
-# Tag no-tsan: requires jemalloc to track small allocations
-# Tag no-asan: requires jemalloc to track small allocations
-# Tag no-ubsan: requires jemalloc to track small allocations
-# Tag no-msan: requires jemalloc to track small allocations
-
-
+# Tags: no-parallel, no-fasttest
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From 10883d8af5b407c5bbcf3b49bc0db76f558afa14 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Sun, 9 Apr 2023 12:52:23 +0000
Subject: [PATCH 224/277] Allow commas with subqueries, add comments

---
 src/Parsers/ExpressionListParsers.cpp         | 96 +++++++++++--------
 .../02676_trailing_commas.reference           |  3 +-
 .../0_stateless/02676_trailing_commas.sql     |  3 +-
 3 files changed, 59 insertions(+), 43 deletions(-)

diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 6008e89d038..0069821c949 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -779,6 +779,41 @@ protected:
 };
 
 
+struct ParserExpressionImpl
+{
+    static std::vector<std::pair<const char *, Operator>> operators_table;
+    static std::vector<std::pair<const char *, Operator>> unary_operators_table;
+    static const char * overlapping_operators_to_skip[];
+
+    static Operator finish_between_operator;
+
+    ParserCompoundIdentifier identifier_parser{false, true};
+    ParserNumber number_parser;
+    ParserAsterisk asterisk_parser;
+    ParserLiteral literal_parser;
+    ParserTupleOfLiterals tuple_literal_parser;
+    ParserArrayOfLiterals array_literal_parser;
+    ParserSubstitution substitution_parser;
+    ParserMySQLGlobalVariable mysql_global_variable_parser;
+
+    ParserKeyword any_parser{"ANY"};
+    ParserKeyword all_parser{"ALL"};
+
+    // Recursion
+    ParserQualifiedAsterisk qualified_asterisk_parser;
+    ParserColumnsMatcher columns_matcher_parser;
+    ParserQualifiedColumnsMatcher qualified_columns_matcher_parser;
+    ParserSubquery subquery_parser;
+
+    bool parse(std::unique_ptr<Layer> start, IParser::Pos & pos, ASTPtr & node, Expected & expected);
+
+    using Layers = std::vector<std::unique_ptr<Layer>>;
+
+    Action tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected);
+    Action tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected);
+};
+
+
 class ExpressionLayer : public Layer
 {
 public:
@@ -801,7 +836,7 @@ public:
         return Layer::getResultImpl(node);
     }
 
-    bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override
+    bool parse(IParser::Pos & pos, Expected & /*expected*/, Action & /*action*/) override
     {
         if (pos->type == TokenType::Comma)
         {
@@ -814,27 +849,40 @@ public:
             ///  - SELECT a, b, c, FROM table
             ///  - SELECT 1,
 
-            /// For this purpose we eliminate the following cases:
+            /// For this purpose we need to eliminate the following cases:
             ///  1. WITH 1 AS from SELECT 2, from
             ///  2. SELECT to, from FROM table
             ///  3. SELECT to, from AS alias FROM table
-            ///  4. SELECT to, from + to FROM table
+            ///  4. SELECT to, from + to, from IN [1,2,3], FROM table
 
+            Expected test_expected;
             auto test_pos = pos;
             ++test_pos;
 
+            /// End of query
             if (test_pos.isValid() && test_pos->type != TokenType::Semicolon)
             {
-                if (!ParserKeyword("FROM").ignore(test_pos, expected))
+                /// If we can't parse FROM then return
+                if (!ParserKeyword("FROM").ignore(test_pos, test_expected))
                     return true;
 
-                if (ParserKeyword("FROM").ignore(test_pos, expected))
+                /// If we parse a second FROM then the first one was a name of a column
+                if (ParserKeyword("FROM").ignore(test_pos, test_expected))
                     return true;
 
-                if (ParserAlias(false).ignore(test_pos, expected))
+                /// If we parse an explicit alias to FROM, then it was a name of a column
+                if (ParserAlias(false).ignore(test_pos, test_expected))
                     return true;
 
-                if (!ParserIdentifier(true).ignore(test_pos, expected))
+                /// If we parse an operator after FROM then it was a name of a column
+                auto cur_op = ParserExpressionImpl::operators_table.begin();
+                for (; cur_op != ParserExpressionImpl::operators_table.end(); ++cur_op)
+                {
+                    if (parseOperator(test_pos, cur_op->first, test_expected))
+                        break;
+                }
+
+                if (cur_op != ParserExpressionImpl::operators_table.end())
                     return true;
             }
 
@@ -2205,40 +2253,6 @@ bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expecte
     return true;
 }
 
-struct ParserExpressionImpl
-{
-    static std::vector<std::pair<const char *, Operator>> operators_table;
-    static std::vector<std::pair<const char *, Operator>> unary_operators_table;
-    static const char * overlapping_operators_to_skip[];
-
-    static Operator finish_between_operator;
-
-    ParserCompoundIdentifier identifier_parser{false, true};
-    ParserNumber number_parser;
-    ParserAsterisk asterisk_parser;
-    ParserLiteral literal_parser;
-    ParserTupleOfLiterals tuple_literal_parser;
-    ParserArrayOfLiterals array_literal_parser;
-    ParserSubstitution substitution_parser;
-    ParserMySQLGlobalVariable mysql_global_variable_parser;
-
-    ParserKeyword any_parser{"ANY"};
-    ParserKeyword all_parser{"ALL"};
-
-    // Recursion
-    ParserQualifiedAsterisk qualified_asterisk_parser;
-    ParserColumnsMatcher columns_matcher_parser;
-    ParserQualifiedColumnsMatcher qualified_columns_matcher_parser;
-    ParserSubquery subquery_parser;
-
-    bool parse(std::unique_ptr<Layer> start, IParser::Pos & pos, ASTPtr & node, Expected & expected);
-
-    using Layers = std::vector<std::unique_ptr<Layer>>;
-
-    Action tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected);
-    Action tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected);
-};
-
 
 bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
diff --git a/tests/queries/0_stateless/02676_trailing_commas.reference b/tests/queries/0_stateless/02676_trailing_commas.reference
index 41ace3e47aa..76d173ca23e 100644
--- a/tests/queries/0_stateless/02676_trailing_commas.reference
+++ b/tests/queries/0_stateless/02676_trailing_commas.reference
@@ -1,4 +1,5 @@
 1
 1
 1
-1	2
+1	2	0
+1
diff --git a/tests/queries/0_stateless/02676_trailing_commas.sql b/tests/queries/0_stateless/02676_trailing_commas.sql
index 5e2dafccb46..048405c4d20 100644
--- a/tests/queries/0_stateless/02676_trailing_commas.sql
+++ b/tests/queries/0_stateless/02676_trailing_commas.sql
@@ -1,4 +1,5 @@
 SELECT 1,;
 SELECT 1, FROM numbers(1);
 WITH 1 as a SELECT a, FROM numbers(1);
-WITH 1 as from SELECT from, from + from, FROM numbers(1);
+WITH 1 as from SELECT from, from + from, from in [0], FROM numbers(1);
+SELECT n, FROM (SELECT 1 AS n);

From 3bc5c6423b2d522418e553f8050947343dbf6160 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Sun, 9 Apr 2023 16:54:21 +0200
Subject: [PATCH 225/277] Update formats.md

---
 docs/en/interfaces/formats.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index de8357345db..5c59d8d74ca 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -78,7 +78,7 @@ The supported formats are:
 | [Null](#null)                                                                             | ✗    | ✔      |
 | [XML](#xml)                                                                               | ✗    | ✔      |
 | [CapnProto](#capnproto)                                                                   | ✔    | ✔      |
-| [LineAsString](#lineasstring)                                                             | ✔    | ✗      |
+| [LineAsString](#lineasstring)                                                             | ✔    | ✔      |
 | [Regexp](#data-format-regexp)                                                             | ✔    | ✗      |
 | [RawBLOB](#rawblob)                                                                       | ✔    | ✔      |
 | [MsgPack](#msgpack)                                                                       | ✔    | ✔      |

From 286424be05c6734543b5157287d5359b80dbe385 Mon Sep 17 00:00:00 2001
From: caipengxiang <291458254@qq.com>
Date: Sun, 9 Apr 2023 22:59:17 +0800
Subject: [PATCH 226/277] bugfix: compare Bits and sizeof(Arithmetic) * 8

---
 base/base/wide_integer_impl.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h
index 7cdb527f9cf..ed4570d5e3f 100644
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@@ -155,13 +155,13 @@ struct common_type<wide::integer<Bits, Signed>, Arithmetic>
         std::is_floating_point_v<Arithmetic>,
         Arithmetic,
         std::conditional_t<
-            sizeof(Arithmetic) < Bits * sizeof(long),
+            sizeof(Arithmetic) * 8 < Bits,
             wide::integer<Bits, Signed>,
             std::conditional_t<
-                Bits * sizeof(long) < sizeof(Arithmetic),
+                Bits < sizeof(Arithmetic) * 8,
                 Arithmetic,
                 std::conditional_t<
-                    Bits * sizeof(long) == sizeof(Arithmetic) && (std::is_same_v<Signed, signed> || std::is_signed_v<Arithmetic>),
+                    Bits == sizeof(Arithmetic) * 8 && (std::is_same_v<Signed, signed> || std::is_signed_v<Arithmetic>),
                     Arithmetic,
                     wide::integer<Bits, Signed>>>>>;
 };

From 553326c999d2febe1316ad372bab976f1f30043b Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Sun, 9 Apr 2023 15:13:40 +0000
Subject: [PATCH 227/277] add test

---
 .../0_stateless/01165_lost_part_empty_partition.reference       | 2 ++
 tests/queries/0_stateless/01165_lost_part_empty_partition.sql   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.reference b/tests/queries/0_stateless/01165_lost_part_empty_partition.reference
index e69de29bb2d..6ed281c757a 100644
--- a/tests/queries/0_stateless/01165_lost_part_empty_partition.reference
+++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.reference
@@ -0,0 +1,2 @@
+1
+1
diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql
index dc41b15118f..924798b0050 100644
--- a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql
+++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql
@@ -10,6 +10,7 @@ insert into rmt1 values (now(), rand());
 drop table rmt1;
 
 system sync replica rmt2;
+select lost_part_count from system.replicas where database = currentDatabase() and table = 'rmt2';
 drop table rmt2;
 
 
@@ -21,6 +22,7 @@ insert into rmt1 values (now(), rand());
 drop table rmt1;
 
 system sync replica rmt2;
+select lost_part_count from system.replicas where database = currentDatabase() and table = 'rmt2';
 drop table rmt2;
 
 

From 44e418987cdf12ad54bb9fb220947c9451de88b6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sun, 9 Apr 2023 22:26:49 +0300
Subject: [PATCH 228/277] Update
 tests/queries/0_stateless/02711_server_uuid_macro.sql

Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
---
 tests/queries/0_stateless/02711_server_uuid_macro.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02711_server_uuid_macro.sql b/tests/queries/0_stateless/02711_server_uuid_macro.sql
index f708156c0ae..f10ed7f8f6f 100644
--- a/tests/queries/0_stateless/02711_server_uuid_macro.sql
+++ b/tests/queries/0_stateless/02711_server_uuid_macro.sql
@@ -1,13 +1,13 @@
 DROP TABLE IF EXISTS test;
 
 -- You can create a table with the {server_uuid} substituted.
-CREATE TABLE test (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', 'replica-{server_uuid}') ORDER BY x;
+CREATE TABLE test (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test', 'replica-{server_uuid}') ORDER BY x;
 
 -- The server UUID is correctly substituted.
 SELECT engine_full LIKE ('%replica-' || serverUUID()::String || '%') FROM system.tables WHERE database = currentDatabase() AND name = 'test';
 
 -- An attempt to create a second table with the same UUID results in error.
-CREATE TABLE test2 (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test', 'replica-{server_uuid}') ORDER BY x; -- { serverError REPLICA_ALREADY_EXISTS }
+CREATE TABLE test2 (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test', 'replica-{server_uuid}') ORDER BY x; -- { serverError REPLICA_ALREADY_EXISTS }
 
 -- The macro {server_uuid} is special, not a configuration-type macro. It's normal that it is inaccessible with the getMacro function.
 SELECT getMacro('server_uuid'); -- { serverError NO_ELEMENTS_IN_CONFIG }

From 6fe6e1fea5f31dc9f2684ae29df2e07a89d8c6f0 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sun, 9 Apr 2023 19:55:45 +0000
Subject: [PATCH 229/277] Adopt tests to run in parallel or mark as no-parallel
 (for flaky check)

---
 .../02293_arrow_dictionary_indexes.sql        |  2 +-
 .../02383_arrow_dict_special_cases.sh         | 32 +++++++++----------
 .../02455_one_row_from_csv_memory_usage.sh    | 11 +++++--
 3 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql
index ec68d1a4443..3ea229a1152 100644
--- a/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql
+++ b/tests/queries/0_stateless/02293_arrow_dictionary_indexes.sql
@@ -1,3 +1,3 @@
--- Tags: no-fasttest
+-- Tags: no-fasttest, no-parallel
 insert into function file(02293_data.arrow) select toLowCardinality(toString(number)) from numbers(300) settings output_format_arrow_low_cardinality_as_dictionary=1, engine_file_truncate_on_insert=1;
 select * from file(02293_data.arrow) settings max_threads=1;
diff --git a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh
index bab0b57f116..40487f16551 100755
--- a/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh
+++ b/tests/queries/0_stateless/02383_arrow_dict_special_cases.sh
@@ -6,24 +6,24 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CURDIR"/../shell_config.sh
 
 USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+UNIQ_DEST_PATH=$USER_FILES_PATH/test-02383-$RANDOM-$RANDOM
+mkdir -p $UNIQ_DEST_PATH
 
-mkdir -p $USER_FILES_PATH/test_02383
-cp $CURDIR/data_arrow/dictionary*.arrow $USER_FILES_PATH/test_02383/
-cp $CURDIR/data_arrow/corrupted.arrow $USER_FILES_PATH/test_02383/
-cp $CURDIR/data_arrow/dict_with_nulls.arrow $USER_FILES_PATH/test_02383/
+cp $CURDIR/data_arrow/dictionary*.arrow $UNIQ_DEST_PATH/
+cp $CURDIR/data_arrow/corrupted.arrow $UNIQ_DEST_PATH/
+cp $CURDIR/data_arrow/dict_with_nulls.arrow $UNIQ_DEST_PATH/
 
-$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary1.arrow')"
-$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary1.arrow') settings max_threads=1"
-$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary2.arrow')"
-$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary2.arrow') settings max_threads=1"
-$CLICKHOUSE_CLIENT -q "desc file('test_02383/dictionary3.arrow')"
-$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dictionary3.arrow') settings max_threads=1"
+$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary1.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary1.arrow') settings max_threads=1"
+$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary2.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary2.arrow') settings max_threads=1"
+$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dictionary3.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dictionary3.arrow') settings max_threads=1"
 
-$CLICKHOUSE_CLIENT -q "desc file('test_02383/corrupted.arrow')"
-$CLICKHOUSE_CLIENT -q "select * from file('test_02383/corrupted.arrow')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo OK || echo FAIL
+$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/corrupted.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/corrupted.arrow')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo OK || echo FAIL
 
-$CLICKHOUSE_CLIENT -q "desc file('test_02383/dict_with_nulls.arrow')"
-$CLICKHOUSE_CLIENT -q "select * from file('test_02383/dict_with_nulls.arrow') settings max_threads=1"
+$CLICKHOUSE_CLIENT -q "desc file('$UNIQ_DEST_PATH/dict_with_nulls.arrow')"
+$CLICKHOUSE_CLIENT -q "select * from file('$UNIQ_DEST_PATH/dict_with_nulls.arrow') settings max_threads=1"
 
-
-rm -rf $USER_FILES_PATH/test_02383
+rm -rf $UNIQ_DEST_PATH
diff --git a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
index 9076d7a8a5f..0b6f91907d4 100755
--- a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
+++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
@@ -5,7 +5,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh
 
 user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
-cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $user_files_path/
 
-${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=1000000000"
-${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$user_files_path/10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=100000000"
+UNIQ_DEST_PATH=$user_files_path/test-02455-$RANDOM-$RANDOM
+mkdir -p $UNIQ_DEST_PATH
+cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $UNIQ_DEST_PATH/
+
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$UNIQ_DEST_PATH/10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=1000000000"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$UNIQ_DEST_PATH/10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=100000000"
+
+rm -rf $UNIQ_DEST_PATH

From 211cea5e7c99119777d387a6b5331e4703e24510 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 9 Apr 2023 22:50:21 +0200
Subject: [PATCH 230/277] Fix uncaught exception in case of parallel loader for
 hashed dictionaries

Since ThreadPool::wait() rethrows the first exception (if any):

<details>

<summary>stacktrace</summary>

    2023.04.09 12:53:33.629333 [ 22361 ] {} <Fatal> BaseDaemon: (version 22.13.1.1, build id: 5FB01DCAAFFF19F0A9A61E253567F90685989D2F) (from thread 23032) Terminate called for uncaught exception:
    2023.04.09 12:53:33.630179 [ 23645 ] {} <Fatal> BaseDaemon:
    2023.04.09 12:53:33.630213 [ 23645 ] {} <Fatal> BaseDaemon: Stack trace: 0x7f68b00baccc 0x7f68b006bef2 0x7f68b0056472 0x112a42fe 0x1c17f2a3 0x1c17f238 0xbf4bc3b 0x13961c6d 0x138ee529 0x138ed6bc 0x138dd2f0 0x138dd9c6 0x1571d0dd 0x16197c1f 0x161a231e 0x1619fc93 0x161a51b9 0x11151759 0x1115454e 0x7f68b00b8fd4 0x7f68b013966c
    2023.04.09 12:53:33.630247 [ 23645 ] {} <Fatal> BaseDaemon: 3. ? @ 0x7f68b00baccc in ?
    2023.04.09 12:53:33.630263 [ 23645 ] {} <Fatal> BaseDaemon: 4. gsignal @ 0x7f68b006bef2 in ?
    2023.04.09 12:53:33.630273 [ 23645 ] {} <Fatal> BaseDaemon: 5. abort @ 0x7f68b0056472 in ?
    2023.04.09 12:53:33.648815 [ 23645 ] {} <Fatal> BaseDaemon: 6. ./.build/./src/Daemon/BaseDaemon.cpp:456: terminate_handler() @ 0x112a42fe in /usr/lib/debug/usr/bin/clickhouse.debug
    2023.04.09 12:53:33.651484 [ 23645 ] {} <Fatal> BaseDaemon: 7. ./.build/./contrib/llvm-project/libcxxabi/src/cxa_handlers.cpp:61: std::__terminate(void (*)()) @ 0x1c17f2a3 in /usr/lib/debug/usr/bin/clickhouse.debug
    2023.04.09 12:53:33.654080 [ 23645 ] {} <Fatal> BaseDaemon: 8. ./.build/./contrib/llvm-project/libcxxabi/src/cxa_handlers.cpp:79: std::terminate() @ 0x1c17f238 in /usr/lib/debug/usr/bin/clickhouse.debug
    2023.04.09 12:53:35.025565 [ 23645 ] {} <Fatal> BaseDaemon: 9. ? @ 0xbf4bc3b in /usr/lib/debug/usr/bin/clickhouse.debug
    2023.04.09 12:53:36.495557 [ 23645 ] {} <Fatal> BaseDaemon: 10. DB::ParallelDictionaryLoader<(DB::DictionaryKeyType)0, true, true>::~ParallelDictionaryLoader() @ 0x13961c6d in /usr/lib/debug/usr/bin/clickhouse.debug
    2023.04.09 12:53:37.833142 [ 23645 ] {} <Fatal> BaseDaemon: 11. DB::HashedDictionary<(DB::DictionaryKeyType)0, true, true>::loadData() @ 0x138ee529 in /usr/lib/debug/usr/bin/clickhouse.debug
    2023.04.09 12:53:39.124989 [ 23645 ] {} <Fatal> BaseDaemon: 12. DB::HashedDictionary<(DB::DictionaryKeyType)0, true, true>::HashedDictionary(DB::StorageID const&, DB::DictionaryStructure const&, std::__1::shared_ptr<DB::IDictionarySource>, DB::HashedDictionaryStorageConfiguration const&, std::__1::shared_ptr<DB::Block>) @ 0x138ed6bc in /usr/lib/debug/usr/bin/clickhouse.debug

</details>

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/Dictionaries/HashedDictionary.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 0e5d18363e9..5cfac20e572 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -114,9 +114,18 @@ public:
 
     ~ParallelDictionaryLoader()
     {
-        for (auto & queue : shards_queues)
-            queue->clearAndFinish();
-        pool.wait();
+        try
+        {
+            for (auto & queue : shards_queues)
+                queue->clearAndFinish();
+
+            /// NOTE: It is OK to not pass the exception next, since on success finish() should be called which will call wait()
+            pool.wait();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(dictionary.log, "Exception had been thrown during parallel load of the dictionary");
+        }
     }
 
 private:

From 1187534545451970bd10eb10a986d5e33a8f0b01 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sun, 9 Apr 2023 21:26:39 +0000
Subject: [PATCH 231/277] Simpler way to resize pipeline

---
 src/Storages/StorageFile.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 665630c3559..1b195d2eb47 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -797,7 +797,7 @@ Pipe StorageFile::read(
     ///       It happens if globs in file(path, ...) expands to empty set i.e. no files to process
     if (num_streams > 0 && num_streams < max_num_streams)
     {
-        pipe.addTransform(std::make_shared<ResizeProcessor>(pipe.getHeader(), num_streams, max_num_streams));
+        pipe.resize(max_num_streams);
     }
     return pipe;
 }

From 98cabda66d32069bc1e143a9d7f85cf6369720c7 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 10 Apr 2023 00:15:15 +0200
Subject: [PATCH 232/277] Update CCTZ

---
 contrib/cctz | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/cctz b/contrib/cctz
index 7c78edd52b4..5e05432420f 160000
--- a/contrib/cctz
+++ b/contrib/cctz
@@ -1 +1 @@
-Subproject commit 7c78edd52b4d65acc103c2f195818ffcabe6fe0d
+Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2

From c89eb29b0f203e0262891080464293bf153b7e1d Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Sun, 9 Apr 2023 19:24:42 -0300
Subject: [PATCH 233/277] Update deltasumtimestamp.md

---
 .../aggregate-functions/reference/deltasumtimestamp.md          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
index 7be933d67d7..c059b2c1e63 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
@@ -7,7 +7,7 @@ sidebar_position: 141
 
 Суммирует разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована.
 
-Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), упорядоченных по некоторому временному бакету согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, невозможно объединить их в "правом" порядке. Функция отслеживает `timestamp` наблюдаемых значений, поэтому возможно правильно упорядочить состояния во время слияния.
+Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), хранящих данные, упорядоченные по некоторому временному бакету согласно timestamp (time-series), например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, их невозможно объединить их в правильном порядке без хранения исходного, неокругленного значения timestamp. Функция  `deltaSumTimestamp` отслеживает исходные `timestamp` наблюдаемых значений, поэтому значения (состояния) функции правильно вычисляются во время слияния кусков.
 
 Чтобы вычислить разницу между упорядоченными последовательными строками, вы можете использовать функцию [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) вместо функции `deltaSumTimestamp`.
 

From a4183204f9a6b38e40a8523e3c694e0fc0226756 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Sun, 9 Apr 2023 19:27:13 -0300
Subject: [PATCH 234/277] Update deltasumtimestamp.md

---
 .../aggregate-functions/reference/deltasumtimestamp.md          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
index c059b2c1e63..8b022241dac 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
@@ -7,7 +7,7 @@ sidebar_position: 141
 
 Суммирует разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована.
 
-Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), хранящих данные, упорядоченные по некоторому временному бакету согласно timestamp (time-series), например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, их невозможно объединить их в правильном порядке без хранения исходного, неокругленного значения timestamp. Функция  `deltaSumTimestamp` отслеживает исходные `timestamp` наблюдаемых значений, поэтому значения (состояния) функции правильно вычисляются во время слияния кусков.
+Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), хранящих данные, упорядоченные по некоторому временному бакету согласно timestamp (time-series), например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, их невозможно объединить в правильном порядке без хранения исходного, неокругленного значения timestamp. Функция  `deltaSumTimestamp` отслеживает исходные `timestamp` наблюдаемых значений, поэтому значения (состояния) функции правильно вычисляются во время слияния кусков.
 
 Чтобы вычислить разницу между упорядоченными последовательными строками, вы можете использовать функцию [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) вместо функции `deltaSumTimestamp`.
 

From 028a21658d98e6aad5051de2f2f9b1a94461fbfd Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Sun, 9 Apr 2023 19:34:10 -0300
Subject: [PATCH 235/277] Update deltasumtimestamp.md

---
 .../aggregate-functions/reference/deltasumtimestamp.md          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
index e08e69b7cf6..afcf2a48c23 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
@@ -6,7 +6,7 @@ title: deltaSumTimestamp
 
 Adds the difference between consecutive rows. If the difference is negative, it is ignored.
 
-This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that are ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the "right" order. This function keeps track of the `timestamp` of the values it's seen, so it's possible to order the states correctly during merging.
+This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that store data ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the correct order, without storing the original, unrounded timestamp value. The `deltaSumTimestamp` function keeps track of the original `timestamp` of the values it's seen, so the values (states) of the function are correctly computed during merging of parts.
 
 To calculate the delta sum across an ordered collection you can simply use the [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) function.
 

From 36871fad3498c807b63ab54c2bd9643bef3710b8 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Sun, 9 Apr 2023 19:38:08 -0300
Subject: [PATCH 236/277] Update deltasumtimestamp.md

---
 .../aggregate-functions/reference/deltasumtimestamp.md          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
index 8b022241dac..50434419651 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
@@ -7,7 +7,7 @@ sidebar_position: 141
 
 Суммирует разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована.
 
-Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), хранящих данные, упорядоченные по некоторому временному бакету согласно timestamp (time-series), например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, их невозможно объединить в правильном порядке без хранения исходного, неокругленного значения timestamp. Функция  `deltaSumTimestamp` отслеживает исходные `timestamp` наблюдаемых значений, поэтому значения (состояния) функции правильно вычисляются во время слияния кусков.
+Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), хранящих данные, упорядоченные по некоторому округленному временному интервалу, согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, их невозможно объединить в правильном порядке без хранения исходного, неокругленного значения timestamp. Функция  `deltaSumTimestamp` отслеживает исходные `timestamp` наблюдаемых значений, поэтому значения (состояния) функции правильно вычисляются во время слияния кусков.
 
 Чтобы вычислить разницу между упорядоченными последовательными строками, вы можете использовать функцию [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) вместо функции `deltaSumTimestamp`.
 

From c06a5ab85f9fa9af39d33c836f51cd8c353a83a8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 10 Apr 2023 01:04:23 +0200
Subject: [PATCH 237/277] Remove slow test from debug builds

---
 tests/queries/0_stateless/02703_jit_external_aggregation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02703_jit_external_aggregation.sh b/tests/queries/0_stateless/02703_jit_external_aggregation.sh
index d1af5b8b8bc..2d1dda45de0 100755
--- a/tests/queries/0_stateless/02703_jit_external_aggregation.sh
+++ b/tests/queries/0_stateless/02703_jit_external_aggregation.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long, no-asan, no-msan, no-tsan, no-ubsan
+# Tags: long, no-asan, no-msan, no-tsan, no-ubsan, no-debug
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From e1fa279c9875c35fe655adb460e4836dabf6a782 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Sun, 9 Apr 2023 23:40:32 +0000
Subject: [PATCH 238/277] Fix tests for flaky check

---
 .../0_stateless/02211_jsonl_format_extension.sql    |  2 +-
 .../02455_one_row_from_csv_memory_usage.sh          | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/02211_jsonl_format_extension.sql b/tests/queries/0_stateless/02211_jsonl_format_extension.sql
index a95b19f4e3b..61cc2a408fa 100644
--- a/tests/queries/0_stateless/02211_jsonl_format_extension.sql
+++ b/tests/queries/0_stateless/02211_jsonl_format_extension.sql
@@ -1,3 +1,3 @@
--- Tags: no-fasttest
+-- Tags: no-fasttest, no-parallel
 insert into table function file('data.jsonl', 'JSONEachRow', 'x UInt32') select * from numbers(10) SETTINGS engine_file_truncate_on_insert=1;
 select * from file('data.jsonl') order by x;
diff --git a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
index 0b6f91907d4..1b0101e4f06 100755
--- a/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
+++ b/tests/queries/0_stateless/02455_one_row_from_csv_memory_usage.sh
@@ -1,16 +1,15 @@
 #!/usr/bin/env bash
+# Tags: no-parallel
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 #  shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-user_files_path=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
+USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
 
-UNIQ_DEST_PATH=$user_files_path/test-02455-$RANDOM-$RANDOM
-mkdir -p $UNIQ_DEST_PATH
-cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $UNIQ_DEST_PATH/
+cp "$CUR_DIR"/data_csv/10m_rows.csv.xz $USER_FILES_PATH/
 
-${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$UNIQ_DEST_PATH/10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=1000000000"
-${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('$UNIQ_DEST_PATH/10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=100000000"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=1000000000"
+${CLICKHOUSE_CLIENT} --query="SELECT * FROM file('10m_rows.csv.xz' , 'CSVWithNames') order by identifier, number, name, surname, birthday LIMIT 1 settings max_memory_usage=100000000"
 
-rm -rf $UNIQ_DEST_PATH
+rm $USER_FILES_PATH/10m_rows.csv.xz

From 5cb00e13c3bff6721d1ea5dc26f5e39ba904f4a2 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 10 Apr 2023 02:31:49 +0200
Subject: [PATCH 239/277] Add a test

---
 ...12_bool_better_exception_message.reference |  5 ++++
 .../02712_bool_better_exception_message.sh    | 24 +++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 tests/queries/0_stateless/02712_bool_better_exception_message.reference
 create mode 100755 tests/queries/0_stateless/02712_bool_better_exception_message.sh

diff --git a/tests/queries/0_stateless/02712_bool_better_exception_message.reference b/tests/queries/0_stateless/02712_bool_better_exception_message.reference
new file mode 100644
index 00000000000..aba60fe1061
--- /dev/null
+++ b/tests/queries/0_stateless/02712_bool_better_exception_message.reference
@@ -0,0 +1,5 @@
+true
+false
+1
+1
+1
diff --git a/tests/queries/0_stateless/02712_bool_better_exception_message.sh b/tests/queries/0_stateless/02712_bool_better_exception_message.sh
new file mode 100755
index 00000000000..ce6a4f4874b
--- /dev/null
+++ b/tests/queries/0_stateless/02712_bool_better_exception_message.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+SCHEMADIR=$CURDIR/format_schemas
+
+$CLICKHOUSE_LOCAL <<END
+    SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": 1}');
+    SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": null}');
+END
+
+$CLICKHOUSE_LOCAL <<END 2>&1 | rg -Fc "'w' character"
+    SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": wtf}');
+END
+
+$CLICKHOUSE_LOCAL <<END 2>&1 | rg -Fc "expected 'false'"
+    SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": ftw}');
+END
+
+$CLICKHOUSE_LOCAL <<END 2>&1 | rg -Fc "'{' character"
+    SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": {}}');
+END

From 1f6b9809dd7bda0a0fa83be5f42e24f4b205128a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 10 Apr 2023 02:32:04 +0200
Subject: [PATCH 240/277] Better exception messages for unparsed Bool

---
 src/DataTypes/Serializations/SerializationBool.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp
index 81ad0ec46b1..41b5bf806e5 100644
--- a/src/DataTypes/Serializations/SerializationBool.cpp
+++ b/src/DataTypes/Serializations/SerializationBool.cpp
@@ -238,12 +238,15 @@ void SerializationBool::deserializeTextJSON(IColumn &column, ReadBuffer &istr, c
     ColumnUInt8 * col = checkAndGetDeserializeColumnType(column);
     bool value = false;
 
-    if (*istr.position() == 't' || *istr.position() == 'f')
+    char first_char = *istr.position();
+    if (first_char == 't' || first_char == 'f')
         readBoolTextWord(value, istr);
-    else if (*istr.position() == '1' || *istr.position() == '0')
+    else if (first_char == '1' || first_char == '0')
         readBoolText(value, istr);
     else
-        throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Invalid boolean value, should be true/false, 1/0.");
+        throw Exception(ErrorCodes::CANNOT_PARSE_BOOL,
+            "Invalid boolean value, should be true/false, 1/0, but it starts with the '{}' character.", first_char);
+
     col->insert(value);
 }
 

From a24cc9580cea69c9fc2e70db030fdfebd3d2a5c2 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Mon, 10 Apr 2023 03:11:10 +0200
Subject: [PATCH 241/277] Update syntax.md

---
 docs/en/sql-reference/syntax.md | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md
index 63c5042f9e8..00d11ccff40 100644
--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@@ -14,7 +14,7 @@ The `INSERT` query uses both parsers:
 INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def')
 ```
 
-The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions).
+The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#expressions).
 
 Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed.
 It allows for avoiding issues with large `INSERT` queries.
@@ -45,7 +45,7 @@ You can check whether a data type name is case-sensitive in the [system.data_typ
 
 In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**.
 
-Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#syntax-identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`.
+Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`.
 
 ## Identifiers
 
@@ -54,7 +54,7 @@ Identifiers are:
 -   Cluster, database, table, partition, and column names.
 -   Functions.
 -   Data types.
--   [Expression aliases](#syntax-expression_aliases).
+-   [Expression aliases](#expression_aliases).
 
 Identifiers can be quoted or non-quoted. The latter is preferred.
 
@@ -149,7 +149,7 @@ For example, the following SQL defines parameters named `a`, `b`, `c` and `d` -
 SET param_a = 13;
 SET param_b = 'str';
 SET param_c = '2022-08-04 18:30:53';
-SET param_d = {'10': [11, 12], '13': [14, 15]}';
+SET param_d = {'10': [11, 12], '13': [14, 15]};
 
 SELECT
    {a: UInt32},
@@ -166,7 +166,7 @@ Result:
 
 If you are using `clickhouse-client`, the parameters are specified as `--param_name=value`. For example, the following parameter has the name `message` and it is retrieved as a `String`:
 
-```sql
+```bash
 clickhouse-client --param_message='hello' --query="SELECT {message: String}"
 ```
 
@@ -190,7 +190,7 @@ Query parameters are not general text substitutions which can be used in arbitra
 ## Functions
 
 Function calls are written like an identifier with a list of arguments (possibly empty) in round brackets. In contrast to standard SQL, the brackets are required, even for an empty argument list. Example: `now()`.
-There are regular and aggregate functions (see the section “Aggregate functions”). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions.
+There are regular and aggregate functions (see the section [Aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md)). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions.
 
 ## Operators
 
@@ -199,7 +199,7 @@ For example, the expression `1 + 2 * 3 + 4` is transformed to `plus(plus(1, mult
 
 ## Data Types and Database Table Engines
 
-Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections “Data types,” “Table engines,” and “CREATE”.
+Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections [Data types](/docs/en/sql-reference/data-types/index.md), [Table engines](/docs/en/engines/table-engines/index.md), and [CREATE](/docs/en/sql-reference/statements/create/index.md).
 
 ## Expression Aliases
 
@@ -211,17 +211,17 @@ expr AS alias
 
 -   `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` clause without using the `AS` keyword.
 
-        For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`.
+    For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`.
 
-        In the [CAST](./functions/type-conversion-functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function.
+    In the [CAST](./functions/type-conversion-functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function.
 
 -   `expr` — Any expression supported by ClickHouse.
 
-        For example, `SELECT column_name * 2 AS double FROM some_table`.
+    For example, `SELECT column_name * 2 AS double FROM some_table`.
 
--   `alias` — Name for `expr`. Aliases should comply with the [identifiers](#syntax-identifiers) syntax.
+-   `alias` — Name for `expr`. Aliases should comply with the [identifiers](#identifiers) syntax.
 
-        For example, `SELECT "table t".column_name FROM table_name AS "table t"`.
+    For example, `SELECT "table t".column_name FROM table_name AS "table t"`.
 
 ### Notes on Usage
 
@@ -258,7 +258,7 @@ In this example, we declared table `t` with column `b`. Then, when selecting dat
 
 ## Asterisk
 
-In a `SELECT` query, an asterisk can replace the expression. For more information, see the section “SELECT”.
+In a `SELECT` query, an asterisk can replace the expression. For more information, see the section [SELECT](/docs/en/sql-reference/statements/select/index.md#asterisk).
 
 ## Expressions
 

From 8eaf20e1294a67344260e5d967a4b02db9ef7dd7 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Mon, 10 Apr 2023 03:26:43 +0200
Subject: [PATCH 242/277] Update syntax.md

---
 docs/en/sql-reference/syntax.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md
index 00d11ccff40..ea2df235c1a 100644
--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@@ -14,7 +14,7 @@ The `INSERT` query uses both parsers:
 INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def')
 ```
 
-The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#expressions).
+The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#expressions).
 
 Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed.
 It allows for avoiding issues with large `INSERT` queries.
@@ -108,7 +108,7 @@ Depending on the data format (input or output), `NULL` may have a different repr
 
 There are many nuances to processing `NULL`. For example, if at least one of the arguments of a comparison operation is `NULL`, the result of this operation is also `NULL`. The same is true for multiplication, addition, and other operations. For more information, read the documentation for each operation.
 
-In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#operator-is-null) and [IS NOT NULL](../sql-reference/operators/index.md) operators and the related functions `isNull` and `isNotNull`.
+In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#is-null) and [IS NOT NULL](../sql-reference/operators/index.md#is-not-null) operators and the related functions `isNull` and `isNotNull`.
 
 ### Heredoc
 
@@ -213,7 +213,7 @@ expr AS alias
 
     For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`.
 
-    In the [CAST](./functions/type-conversion-functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function.
+    In the [CAST](./functions/type-conversion-functions.md#castx-t) function, the `AS` keyword has another meaning. See the description of the function.
 
 -   `expr` — Any expression supported by ClickHouse.
 
@@ -254,7 +254,7 @@ Received exception from server (version 18.14.17):
 Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query.
 ```
 
-In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer_column_name_to_alias) to `1`.
+In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer-column-name-to-alias) to `1`.
 
 ## Asterisk
 

From 8e90b7d446521af0a40ec3c0096f62e7e9395321 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 10 Apr 2023 03:31:43 +0200
Subject: [PATCH 243/277] Fixup

---
 .../queries/0_stateless/02712_bool_better_exception_message.sh  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02712_bool_better_exception_message.sh b/tests/queries/0_stateless/02712_bool_better_exception_message.sh
index ce6a4f4874b..1da70d7025b 100755
--- a/tests/queries/0_stateless/02712_bool_better_exception_message.sh
+++ b/tests/queries/0_stateless/02712_bool_better_exception_message.sh
@@ -4,7 +4,7 @@
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
-SCHEMADIR=$CURDIR/format_schemas
+
 
 $CLICKHOUSE_LOCAL <<END
     SELECT * FROM format(JSONEachRow, 'x Bool', '{"x": 1}');

From 6b1c86c6ec7f852a4e12fca0044c4240a4ff0038 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Mon, 10 Apr 2023 03:39:13 +0200
Subject: [PATCH 244/277] Don't use type conversion with String query
 parameters

---
 src/Interpreters/ReplaceQueryParameterVisitor.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index fad9d4bbfb2..71f28f9fb2c 100644
--- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -102,7 +102,10 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
     else
         literal = temp_column[0];
 
-    ast = addTypeConversionToAST(std::make_shared<ASTLiteral>(literal), type_name);
+    if (typeid_cast<const DataTypeString *>(&data_type))
+        ast = std::make_shared<ASTLiteral>(literal);
+    else
+        ast = addTypeConversionToAST(std::make_shared<ASTLiteral>(literal), type_name);
 
     /// Keep the original alias.
     ast->setAlias(alias);

From e71c7fe7449f18591a6a3bf375495fa792939ee8 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Mon, 10 Apr 2023 03:48:58 +0200
Subject: [PATCH 245/277] Fix

---
 src/Interpreters/ReplaceQueryParameterVisitor.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index 71f28f9fb2c..7ed56dce9d0 100644
--- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -1,6 +1,7 @@
 #include <Columns/IColumn.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypeString.h>
 #include <Formats/FormatSettings.h>
 #include <IO/ReadBufferFromString.h>
 #include <Interpreters/IdentifierSemantic.h>
@@ -102,7 +103,7 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
     else
         literal = temp_column[0];
 
-    if (typeid_cast<const DataTypeString *>(&data_type))
+    if (typeid_cast<const DataTypeString *>(data_type.get()))
         ast = std::make_shared<ASTLiteral>(literal);
     else
         ast = addTypeConversionToAST(std::make_shared<ASTLiteral>(literal), type_name);

From 345b33dc1fba2937fc758ddd70e0e5f7cd1af6d1 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 10 Apr 2023 04:15:13 +0200
Subject: [PATCH 246/277] Slightly optimize Install

---
 programs/install/Install.cpp | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index b142159fbdf..d83e189f7ef 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -375,15 +375,22 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
 
                 try
                 {
-                    ReadBufferFromFile in(binary_self_path.string());
-                    WriteBufferFromFile out(main_bin_tmp_path.string());
-                    copyData(in, out);
-                    out.sync();
+                    String source = binary_self_path.string();
+                    String destination = main_bin_tmp_path.string();
 
-                    if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
+                    /// Try to make a hard link first, as an optimization.
+                    /// It is possible if the source and the destination are on the same filesystems.
+                    if (0 != link(source.c_str(), destination.c_str()))
+                    {
+                        ReadBufferFromFile in(binary_self_path.string());
+                        WriteBufferFromFile out(main_bin_tmp_path.string());
+                        copyData(in, out);
+                        out.sync();
+                        out.finalize();
+                    }
+
+                    if (0 != chmod(destination.c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
                         throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
-
-                    out.finalize();
                 }
                 catch (const Exception & e)
                 {

From 6738ab66600d9b6857af07b8d98bf2f0d5277348 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 10 Apr 2023 04:22:17 +0200
Subject: [PATCH 247/277] Fix TSan report in Kerberos

---
 contrib/krb5 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/krb5 b/contrib/krb5
index 9453aec0d50..b56ce6ba690 160000
--- a/contrib/krb5
+++ b/contrib/krb5
@@ -1 +1 @@
-Subproject commit 9453aec0d50e5aff9b189051611b321b40935d02
+Subproject commit b56ce6ba690e1f320df1a64afa34980c3e462617

From 8524f6958a82e772919e02f2cb6647e8286d3a02 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Mon, 10 Apr 2023 09:09:57 +0200
Subject: [PATCH 248/277] Update
 src/Interpreters/ReplaceQueryParameterVisitor.cpp

Co-authored-by: Alexey Milovidov <milovidov@clickhouse.com>
---
 src/Interpreters/ReplaceQueryParameterVisitor.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index 7ed56dce9d0..c92161c72c3 100644
--- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -103,6 +103,9 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
     else
         literal = temp_column[0];
 
+    /// If it's a String, substitute it in the form of a string literal without CAST
+    /// to enable substitutions in simple queries that don't support expressions
+    /// (such as CREATE USER).
     if (typeid_cast<const DataTypeString *>(data_type.get()))
         ast = std::make_shared<ASTLiteral>(literal);
     else

From 000c2b5b9128ce9c2c08d530aa0b14e5f91ed7a6 Mon Sep 17 00:00:00 2001
From: Aleksei Filatov <alexfvk@yandex-team.ru>
Date: Mon, 10 Apr 2023 13:02:31 +0300
Subject: [PATCH 249/277] Correct FixupACL for auth scheme

---
 src/Coordination/KeeperStorage.cpp | 71 +++++++++++++++++++++---------
 src/Coordination/KeeperStorage.h   |  4 ++
 2 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index 41a6af54204..6b2696034f1 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -61,16 +61,10 @@ String getSHA1(const String & userdata)
     return String{digest_id.begin(), digest_id.end()};
 }
 
-String generateDigest(const String & userdata)
-{
-    std::vector<String> user_password;
-    boost::split(user_password, userdata, [](char character) { return character == ':'; });
-    return user_password[0] + ":" + base64Encode(getSHA1(userdata));
-}
-
 bool fixupACL(
     const std::vector<Coordination::ACL> & request_acls,
-    const std::vector<KeeperStorage::AuthID> & current_ids,
+    int64_t session_id,
+    const KeeperStorage::UncommittedState & uncommitted_state,
     std::vector<Coordination::ACL> & result_acls)
 {
     if (request_acls.empty())
@@ -81,14 +75,18 @@ bool fixupACL(
     {
         if (request_acl.scheme == "auth")
         {
-            for (const auto & current_id : current_ids)
-            {
-                valid_found = true;
-                Coordination::ACL new_acl = request_acl;
-                new_acl.scheme = current_id.scheme;
-                new_acl.id = current_id.id;
-                result_acls.push_back(new_acl);
-            }
+            uncommitted_state.forEachAuthInSession(
+                session_id,
+                [&](const KeeperStorage::AuthID & auth_id)
+                {
+                    valid_found = true;
+                    Coordination::ACL new_acl = request_acl;
+
+                    new_acl.scheme = auth_id.scheme;
+                    new_acl.id = auth_id.id;
+
+                    result_acls.push_back(new_acl);
+                });
         }
         else if (request_acl.scheme == "world" && request_acl.id == "anyone")
         {
@@ -564,6 +562,32 @@ Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) cons
     return storage.acl_map.convertNumber(node_it->value.acl_id);
 }
 
+void KeeperStorage::UncommittedState::forEachAuthInSession(int64_t session_id, std::function<void(const AuthID &)> func) const
+{
+    const auto call_for_each_auth = [&func](const auto & auth_ids)
+    {
+        for (const auto & auth : auth_ids)
+        {
+            using TAuth = std::remove_reference_t<decltype(auth)>;
+
+            const AuthID * auth_ptr = nullptr;
+            if constexpr (std::is_pointer_v<TAuth>)
+                auth_ptr = auth;
+            else
+                auth_ptr = &auth;
+
+            func(*auth_ptr);
+        }
+    };
+
+    // for committed
+    if (storage.session_and_auth.contains(session_id))
+        call_for_each_auth(storage.session_and_auth.at(session_id));
+    // for uncommitted
+    if (session_and_auth.contains(session_id))
+        call_for_each_auth(session_and_auth.at(session_id));
+}
+
 namespace
 {
 
@@ -927,7 +951,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}};
 
         Coordination::ACLs node_acls;
-        if (!fixupACL(request.acls, storage.session_and_auth[session_id], node_acls))
+        if (!fixupACL(request.acls, session_id, storage.uncommitted_state, node_acls))
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZINVALIDACL}};
 
         if (request.is_ephemeral)
@@ -1533,10 +1557,8 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}};
 
 
-        auto & session_auth_ids = storage.session_and_auth[session_id];
         Coordination::ACLs node_acls;
-
-        if (!fixupACL(request.acls, session_auth_ids, node_acls))
+        if (!fixupACL(request.acls, session_id, uncommitted_state, node_acls))
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZINVALIDACL}};
 
         std::vector<KeeperStorage::Delta> new_deltas
@@ -1840,7 +1862,7 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc
             return {KeeperStorage::Delta{zxid, Coordination::Error::ZAUTHFAILED}};
 
         std::vector<KeeperStorage::Delta> new_deltas;
-        auto auth_digest = generateDigest(auth_request.data);
+        auto auth_digest = KeeperStorage::generateDigest(auth_request.data);
         if (auth_digest == storage.superdigest)
         {
             KeeperStorage::AuthID auth{"super", ""};
@@ -2420,5 +2442,12 @@ void KeeperStorage::recalculateStats()
     container.recalculateDataSize();
 }
 
+String KeeperStorage::generateDigest(const String & userdata)
+{
+    std::vector<String> user_password;
+    boost::split(user_password, userdata, [](char character) { return character == ':'; });
+    return user_password[0] + ":" + base64Encode(getSHA1(userdata));
+}
+
 
 }
diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h
index be528072df4..cfacdfc84de 100644
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@@ -105,6 +105,8 @@ public:
         return first.value == second.value;
     }
 
+    static String generateDigest(const String & userdata);
+
     struct RequestForSession
     {
         int64_t session_id;
@@ -263,6 +265,8 @@ public:
             return check_auth(auth_it->second);
         }
 
+        void forEachAuthInSession(int64_t session_id, std::function<void(const AuthID &)> func) const;
+
         std::shared_ptr<Node> tryGetNodeFromStorage(StringRef path) const;
 
         std::unordered_map<int64_t, std::list<const AuthID *>> session_and_auth;

From e8bf96d126fe08c7264bcec7effc8410aa1284a8 Mon Sep 17 00:00:00 2001
From: Aleksei Filatov <alexfvk@yandex-team.ru>
Date: Mon, 10 Apr 2023 13:02:43 +0300
Subject: [PATCH 250/277] Add unit tests

---
 src/Coordination/tests/gtest_coordination.cpp | 107 ++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index 895d563327e..68e68ca1fa7 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -1579,6 +1579,113 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove)
 }
 
 
+TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitted)
+{
+    using namespace Coordination;
+    using namespace DB;
+
+    ChangelogDirTest snapshots("./snapshots");
+    CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
+    ResponsesQueue queue(std::numeric_limits<size_t>::max());
+    SnapshotsQueue snapshots_queue{1};
+
+    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr);
+    state_machine->init();
+
+    String user_auth_data = "test_user:test_password";
+    String digest = KeeperStorage::generateDigest(user_auth_data);
+
+    std::shared_ptr<ZooKeeperAuthRequest> auth_req = std::make_shared<ZooKeeperAuthRequest>();
+    auth_req->scheme = "digest";
+    auth_req->data = user_auth_data;
+
+    // Add auth data to the session
+    auto auth_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), auth_req);
+    state_machine->pre_commit(1, auth_entry->get_buf());
+
+    // Create a node with 'auth' scheme for ACL
+    String node_path = "/hello";
+    std::shared_ptr<ZooKeeperCreateRequest> create_req = std::make_shared<ZooKeeperCreateRequest>();
+    create_req->path = node_path;
+    // When 'auth' scheme is used the creator must have been authenticated by the server (for example, using 'digest' scheme) before it can
+    // create nodes with this ACL.
+    create_req->acls = {{.permissions = 31, .scheme = "auth", .id = ""}};
+    auto create_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), create_req);
+    state_machine->pre_commit(2, create_entry->get_buf());
+
+    const auto & uncommitted_state = state_machine->getStorage().uncommitted_state;
+    ASSERT_TRUE(uncommitted_state.nodes.contains(node_path));
+
+    // commit log entries
+    state_machine->commit(1, auth_entry->get_buf());
+    state_machine->commit(2, create_entry->get_buf());
+
+    auto node = uncommitted_state.getNode(node_path);
+    ASSERT_NE(node, nullptr);
+    auto acls = uncommitted_state.getACLs(node_path);
+    ASSERT_EQ(acls.size(), 1);
+    EXPECT_EQ(acls[0].scheme, "digest");
+    EXPECT_EQ(acls[0].id, digest);
+    EXPECT_EQ(acls[0].permissions, 31);
+}
+
+TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted)
+{
+    using namespace Coordination;
+    using namespace DB;
+
+    ChangelogDirTest snapshots("./snapshots");
+    CoordinationSettingsPtr settings = std::make_shared<CoordinationSettings>();
+    ResponsesQueue queue(std::numeric_limits<size_t>::max());
+    SnapshotsQueue snapshots_queue{1};
+
+    auto state_machine = std::make_shared<KeeperStateMachine>(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr);
+    state_machine->init();
+
+    String user_auth_data = "test_user:test_password";
+    String digest = KeeperStorage::generateDigest(user_auth_data);
+
+    std::shared_ptr<ZooKeeperAuthRequest> auth_req = std::make_shared<ZooKeeperAuthRequest>();
+    auth_req->scheme = "digest";
+    auth_req->data = user_auth_data;
+
+    // Add auth data to the session
+    auto auth_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), auth_req);
+    state_machine->pre_commit(1, auth_entry->get_buf());
+
+    // Create a node
+    String node_path = "/hello";
+    std::shared_ptr<ZooKeeperCreateRequest> create_req = std::make_shared<ZooKeeperCreateRequest>();
+    create_req->path = node_path;
+    auto create_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), create_req);
+    state_machine->pre_commit(2, create_entry->get_buf());
+
+    // Set ACL with 'auth' scheme for ACL
+    std::shared_ptr<ZooKeeperSetACLRequest> set_acl_req = std::make_shared<ZooKeeperSetACLRequest>();
+    set_acl_req->path = node_path;
+    // When 'auth' scheme is used the creator must have been authenticated by the server (for example, using 'digest' scheme) before it can
+    // set this ACL.
+    set_acl_req->acls = {{.permissions = 31, .scheme = "auth", .id = ""}};
+    auto set_acl_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), set_acl_req);
+    state_machine->pre_commit(3, set_acl_entry->get_buf());
+
+    // commit all entries
+    state_machine->commit(1, auth_entry->get_buf());
+    state_machine->commit(2, create_entry->get_buf());
+    state_machine->commit(2, set_acl_entry->get_buf());
+
+    const auto & uncommitted_state = state_machine->getStorage().uncommitted_state;
+    auto node = uncommitted_state.getNode(node_path);
+
+    ASSERT_NE(node, nullptr);
+    auto acls = uncommitted_state.getACLs(node_path);
+    ASSERT_EQ(acls.size(), 1);
+    EXPECT_EQ(acls[0].scheme, "digest");
+    EXPECT_EQ(acls[0].id, digest);
+    EXPECT_EQ(acls[0].permissions, 31);
+}
+
+
 TEST_P(CoordinationTest, TestRotateIntervalChanges)
 {
     using namespace Coordination;

From d564fe4e6446f503250d02f806c1f3eab30cad9c Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Mon, 10 Apr 2023 10:54:37 +0000
Subject: [PATCH 251/277] Fixing build

---
 src/Storages/System/StorageSystemZooKeeper.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index c96c8ec60ae..e21bd887ecb 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -356,7 +356,7 @@ static void extractPathImpl(const ActionsDAG::Node & node, Paths & res, ContextP
         if (!isPathNode(node.children.at(0)))
             return;
 
-        auto value = node.children.at(1);
+        const auto * value = node.children.at(1);
         if (!value->column)
             return;
 
@@ -504,7 +504,7 @@ void ReadFromSystemZooKeeper::fillData(MutableColumns & res_columns) const
 }
 
 ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(const Block & header, SelectQueryInfo & query_info, ContextPtr context_)
-    : SourceStepWithFilter({.header = std::move(header)})
+    : SourceStepWithFilter({.header = header})
     , storage_limits(query_info.storage_limits)
     , context(std::move(context_))
 {

From 450e8207285b411f614564011a2845b9e22aaa6b Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com>
Date: Mon, 10 Apr 2023 16:18:48 +0200
Subject: [PATCH 252/277] Docs: Add SETTINGS to executable table function

---
 docs/en/sql-reference/table-functions/executable.md | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/table-functions/executable.md b/docs/en/sql-reference/table-functions/executable.md
index 22c74eb8cfa..5a24c3ab11d 100644
--- a/docs/en/sql-reference/table-functions/executable.md
+++ b/docs/en/sql-reference/table-functions/executable.md
@@ -20,7 +20,7 @@ A key advantage between ordinary UDF functions and the `executable` table functi
 The `executable` table function requires three parameters and accepts an optional list of input queries:
 
 ```sql
-executable(script_name, format, structure, [input_query...])
+executable(script_name, format, structure, [input_query...] [,SETTINGS ...])
 ```
 
 - `script_name`: the file name of the script. saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting)
@@ -83,6 +83,15 @@ The response looks like:
 └────┴────────────┘
 ```
 
+## Settings
+
+-   `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Default value is `false`.
+-   `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`.
+-   `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10.
+-   `command_termination_timeout` — executable script should contain main read-write loop. After table function is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10.
+-   `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000.
+-   `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000.
+
 ## Passing Query Results to a Script
 
 Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable.md#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function:
@@ -94,4 +103,4 @@ SELECT * FROM executable(
     'id UInt64, sentiment Float32',
     (SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20)
 );
-```
\ No newline at end of file
+```

From d50c6a9e1b546a109c044cc6a2af3d445f75f519 Mon Sep 17 00:00:00 2001
From: Dale Mcdiarmid <dale@clickhouse.com>
Date: Mon, 10 Apr 2023 15:23:00 +0100
Subject: [PATCH 253/277] update blog references

---
 docs/en/engines/database-engines/postgresql.md            | 1 +
 docs/en/engines/table-engines/integrations/postgresql.md  | 2 ++
 .../mergetree-family/aggregatingmergetree.md              | 4 ++++
 .../table-engines/mergetree-family/invertedindexes.md     | 4 ++++
 .../table-engines/mergetree-family/summingmergetree.md    | 4 ++++
 docs/en/engines/table-engines/special/keepermap.md        | 4 ++++
 docs/en/getting-started/example-datasets/github.md        | 8 +++++---
 docs/en/operations/opentelemetry.md                       | 4 ++++
 docs/en/operations/query-cache.md                         | 4 ++++
 docs/en/operations/utilities/clickhouse-local.md          | 4 ++++
 docs/en/sql-reference/aggregate-functions/combinators.md  | 5 +++++
 docs/en/sql-reference/data-types/aggregatefunction.md     | 5 +++++
 docs/en/sql-reference/data-types/map.md                   | 5 +++++
 docs/en/sql-reference/statements/alter/delete.md          | 4 ++++
 docs/en/sql-reference/statements/alter/index.md           | 4 ++++
 docs/en/sql-reference/statements/alter/update.md          | 5 +++++
 docs/en/sql-reference/statements/create/view.md           | 1 +
 docs/en/sql-reference/statements/delete.md                | 4 ++++
 docs/en/sql-reference/statements/select/join.md           | 4 ++++
 docs/en/sql-reference/table-functions/postgresql.md       | 2 ++
 20 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md
index 939995a61c5..95d6a6192e3 100644
--- a/docs/en/engines/database-engines/postgresql.md
+++ b/docs/en/engines/database-engines/postgresql.md
@@ -140,3 +140,4 @@ DESCRIBE TABLE test_database.test_table;
 ## Related content
 
 - Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
+- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2)
diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md
index 2222d1fc016..2fcc0b10e78 100644
--- a/docs/en/engines/table-engines/integrations/postgresql.md
+++ b/docs/en/engines/table-engines/integrations/postgresql.md
@@ -177,4 +177,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
 -   [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql)
 
 ## Related content
+
 - Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
+- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2)
diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
index 6591f666244..2b8b43802ea 100644
--- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md
@@ -122,3 +122,7 @@ FROM test.mv_visits
 GROUP BY StartDate
 ORDER BY StartDate;
 ```
+
+## Related Content
+
+- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states)
diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
index 701615495de..31f5a87a2b6 100644
--- a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
+++ b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md
@@ -191,3 +191,7 @@ is performance. In practice, users often search for multiple terms at once. For
 '%big%'` can be evaluated directly using an inverted index by forming the union of the row id lists for terms "little" and "big". This also
 means that the parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in the future).
 :::
+
+## Related Content
+
+- Blog: [Introducing Inverted Indices in ClickHouse](https://clickhouse.com/blog/clickhouse-search-with-inverted-indices)
diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
index d0078656b5d..d15323bbc88 100644
--- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
@@ -186,3 +186,7 @@ ARRAY JOIN
 When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`.
 
 For nested data structure, you do not need to specify its columns in the tuple of columns for summation.
+
+## Related Content
+
+- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states)
diff --git a/docs/en/engines/table-engines/special/keepermap.md b/docs/en/engines/table-engines/special/keepermap.md
index e5c4dea2339..a1c7009b712 100644
--- a/docs/en/engines/table-engines/special/keepermap.md
+++ b/docs/en/engines/table-engines/special/keepermap.md
@@ -112,3 +112,7 @@ If setting `keeper_map_strict_mode` is set to `true`, fetching and updating data
 ```sql
 ALTER TABLE keeper_map_table UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1;
 ```
+
+## Related content
+
+- Blog: [Building a Real-time Analytics Apps with ClickHouse and Hex](https://clickhouse.com/blog/building-real-time-applications-with-clickhouse-and-hex-notebook-keeper-engine)
diff --git a/docs/en/getting-started/example-datasets/github.md b/docs/en/getting-started/example-datasets/github.md
index e18c7dec1a6..02965ed5e33 100644
--- a/docs/en/getting-started/example-datasets/github.md
+++ b/docs/en/getting-started/example-datasets/github.md
@@ -2499,7 +2499,9 @@ LIMIT 20
 We welcome exact and improved solutions here.
 
 
-# Related Content
+## Related Content
 
-- [Git commits and our community](https://clickhouse.com/blog/clickhouse-git-community-commits)
-- [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits)
+- Blog: [Git commits and our community](https://clickhouse.com/blog/clickhouse-git-community-commits)
+- Blog: [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits)
+- Blog: [Building a Real-time Analytics Apps with ClickHouse and Hex](https://clickhouse.com/blog/building-real-time-applications-with-clickhouse-and-hex-notebook-keeper-engine)
+- Blog: [A Story of Open-source GitHub Activity using ClickHouse + Grafana](https://clickhouse.com/blog/introduction-to-clickhouse-and-grafana-webinar)
diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md
index 15185f7ae6b..70f64d08ba3 100644
--- a/docs/en/operations/opentelemetry.md
+++ b/docs/en/operations/opentelemetry.md
@@ -61,3 +61,7 @@ FROM system.opentelemetry_span_log
 ```
 
 In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive.
+
+## Related Content
+
+- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse)
diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md
index 7b106909cf0..e9301ef3051 100644
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@@ -124,3 +124,7 @@ Finally, entries in the query cache are not shared between users due to security
 row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
 be marked accessible by other users (i.e. shared) by supplying setting
 [query_cache_share_between_users](settings/settings.md#query-cache-share-between-users).
+
+## Related Content
+
+- Blog: [Introducing the ClickHouse Query Cache](https://clickhouse.com/blog/introduction-to-the-clickhouse-query-cache-and-design)
diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index 6363d9cab27..4439f823240 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -6,6 +6,10 @@ sidebar_label: clickhouse-local
 
 # clickhouse-local
 
+## Related Content
+
+- Blog: [Extracting, Converting, and Querying Data in Local Files using clickhouse-local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local)
+
 ## When to use clickhouse-local vs. ClickHouse
 
 `clickhouse-local` is an easy-to-use version of ClickHouse that is ideal for developers who need to perform fast processing on local and remote files using SQL without having to install a full database server. With `clickhouse-local`, developers can use SQL commands (using the [ClickHouse SQL dialect](../../sql-reference/index.md)) directly from the command line, providing a simple and efficient way to access ClickHouse features without the need for a full ClickHouse installation. One of the main benefits of `clickhouse-local` is that it is already included when installing [clickhouse-client](https://clickhouse.com/docs/en/integrations/sql-clients/clickhouse-client-local). This means that developers can get started with `clickhouse-local` quickly, without the need for a complex installation process.
diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md
index 704e88c6313..a28e58ca880 100644
--- a/docs/en/sql-reference/aggregate-functions/combinators.md
+++ b/docs/en/sql-reference/aggregate-functions/combinators.md
@@ -285,3 +285,8 @@ FROM people
 │ [3,2]  │ [11.5,12.949999809265137] │
 └────────┴───────────────────────────┘
 ```
+
+
+## Related Content
+
+- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states)
diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md
index d8547f03714..90dd8bd2311 100644
--- a/docs/en/sql-reference/data-types/aggregatefunction.md
+++ b/docs/en/sql-reference/data-types/aggregatefunction.md
@@ -63,3 +63,8 @@ SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP
 ## Usage Example
 
 See [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) engine description.
+
+
+## Related Content
+
+- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states)
diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md
index b0659746ba7..ad99bc75f24 100644
--- a/docs/en/sql-reference/data-types/map.md
+++ b/docs/en/sql-reference/data-types/map.md
@@ -108,3 +108,8 @@ Result:
 
 -   [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
 -   [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
+
+
+## Related content
+
+- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse)
diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md
index 30ed96c0b9c..21ae6a1e5d1 100644
--- a/docs/en/sql-reference/statements/alter/delete.md
+++ b/docs/en/sql-reference/statements/alter/delete.md
@@ -28,3 +28,7 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do
 -   [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations)
 -   [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
 -   [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting
+
+## Related content
+
+- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse)
diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md
index 5d7b92bd34d..cbec9572bb1 100644
--- a/docs/en/sql-reference/statements/alter/index.md
+++ b/docs/en/sql-reference/statements/alter/index.md
@@ -61,3 +61,7 @@ For all `ALTER` queries, if `alter_sync = 2` and some replicas are not active fo
 :::
 
 For `ALTER TABLE ... UPDATE|DELETE` queries the synchronicity is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting.
+
+## Related content
+
+- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse)
diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md
index 92f0f111b92..c0036c060eb 100644
--- a/docs/en/sql-reference/statements/alter/update.md
+++ b/docs/en/sql-reference/statements/alter/update.md
@@ -27,3 +27,8 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do
 -   [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations)
 -   [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
 -   [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting
+
+
+## Related content
+
+- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse)
diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 0def42259ab..d7b16a88f6b 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -364,3 +364,4 @@ The window view is useful in the following scenarios:
 ## Related Content
 
 - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse)
+- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse)
diff --git a/docs/en/sql-reference/statements/delete.md b/docs/en/sql-reference/statements/delete.md
index 149e7ab371f..d18f94031ae 100644
--- a/docs/en/sql-reference/statements/delete.md
+++ b/docs/en/sql-reference/statements/delete.md
@@ -55,3 +55,7 @@ With the described implementation now we can see what can negatively affect 'DEL
 - Table having a very large number of data parts
 - Having a lot of data in Compact parts—in a Compact part, all columns are stored in one file.
 
+
+## Related content
+
+- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse)
diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md
index ece60961aaf..62947dcefdd 100644
--- a/docs/en/sql-reference/statements/select/join.md
+++ b/docs/en/sql-reference/statements/select/join.md
@@ -18,6 +18,10 @@ FROM <left_table>
 
 Expressions from `ON` clause and columns from `USING` clause are called “join keys”. Unless otherwise stated, join produces a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from rows with matching “join keys”, which might produce results with much more rows than the source tables.
 
+## Related Content
+
+- Blog: [ClickHouse: A Blazingly Fast DBMS with Full SQL Join Support - Part 1](https://clickhouse.com/blog/clickhouse-fully-supports-joins)
+
 ## Supported Types of JOIN
 
 All standard [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) types are supported:
diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md
index 6cd13acaa77..975a04fa0de 100644
--- a/docs/en/sql-reference/table-functions/postgresql.md
+++ b/docs/en/sql-reference/table-functions/postgresql.md
@@ -133,4 +133,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
 -   [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql)
 
 ## Related content
+
 - Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres)
+- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2)

From 79b83c4fd2249daaaa606ab907bd72dc0a2afde8 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sat, 8 Apr 2023 06:47:21 +0200
Subject: [PATCH 254/277] Remove superfluous includes of logger_userful.h from
 headers

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 .../AggregateFunctionMaxIntersections.h       |  1 -
 .../AggregateFunctionSparkbar.h               |  1 -
 .../AggregateFunctionSumMap.h                 |  1 -
 src/BridgeHelper/IBridgeHelper.h              |  1 -
 src/BridgeHelper/XDBCBridgeHelper.h           |  1 -
 src/Client/Connection.cpp                     |  3 ++-
 src/Client/Connection.h                       |  1 -
 src/Client/LocalConnection.cpp                |  1 +
 src/Client/LocalConnection.h                  |  1 +
 src/Common/AsynchronousMetrics.cpp            |  1 +
 src/Common/CacheBase.h                        |  1 -
 src/Common/Config/ConfigProcessor.cpp         |  1 +
 src/Common/Config/ConfigProcessor.h           |  3 ++-
 src/Common/Config/YAMLParser.h                |  8 +++----
 src/Common/Config/configReadClient.cpp        |  1 +
 src/Common/DNSResolver.cpp                    |  1 +
 src/Common/DNSResolver.h                      |  3 ++-
 src/Common/ErrorHandlers.h                    |  1 -
 src/Common/FileChecker.cpp                    |  6 ++++-
 src/Common/FileChecker.h                      |  6 +++--
 src/Common/LRUCachePolicy.h                   |  6 ++---
 src/Common/OvercommitTracker.h                |  2 +-
 src/Common/PoolBase.h                         | 11 ++++++---
 src/Common/SLRUCachePolicy.h                  |  6 ++---
 src/Common/ThreadProfileEvents.cpp            |  1 +
 src/Common/ThreadProfileEvents.h              |  4 +++-
 src/Common/ThreadStatus.cpp                   |  2 ++
 src/Common/ZooKeeper/Common.h                 |  2 --
 src/Common/ZooKeeper/ZooKeeper.cpp            |  1 +
 src/Common/ZooKeeper/ZooKeeper.h              |  1 -
 src/Common/ZooKeeper/ZooKeeperLock.cpp        |  2 ++
 src/Common/ZooKeeper/ZooKeeperLock.h          |  3 ++-
 .../ZooKeeper/ZooKeeperWithFaultInjection.h   |  1 +
 src/Common/scope_guard_safe.h                 |  1 -
 src/Coordination/KeeperDispatcher.cpp         |  1 +
 src/Coordination/KeeperDispatcher.h           |  1 -
 src/Coordination/KeeperLogStore.cpp           |  1 +
 src/Coordination/KeeperLogStore.h             |  1 -
 src/Coordination/KeeperSnapshotManagerS3.h    |  1 -
 src/Coordination/KeeperStateMachine.cpp       |  3 ++-
 src/Coordination/KeeperStateMachine.h         |  1 -
 src/Coordination/KeeperStateManager.cpp       |  1 +
 src/Coordination/ZooKeeperDataReader.cpp      |  1 +
 src/Coordination/ZooKeeperDataReader.h        |  1 -
 src/Core/PostgreSQL/PoolWithFailover.cpp      |  1 +
 src/Core/PostgreSQL/PoolWithFailover.h        |  1 -
 src/Core/SortDescription.cpp                  |  1 +
 src/Daemon/BaseDaemon.h                       |  1 -
 src/Dictionaries/CacheDictionary.h            |  1 -
 src/Dictionaries/ExecutableDictionarySource.h |  1 -
 .../ExecutablePoolDictionarySource.h          |  1 -
 src/Dictionaries/IPAddressDictionary.cpp      |  1 +
 src/Dictionaries/IPAddressDictionary.h        |  1 -
 .../PostgreSQLDictionarySource.cpp            |  1 +
 src/Dictionaries/PostgreSQLDictionarySource.h |  1 -
 src/Disks/DiskLocal.h                         |  1 -
 .../IO/CachedOnDiskReadBufferFromFile.cpp     |  3 ++-
 src/Disks/IO/CachedOnDiskReadBufferFromFile.h |  1 -
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  1 -
 src/Disks/StoragePolicy.cpp                   |  1 +
 src/Disks/StoragePolicy.h                     |  1 -
 src/Functions/FunctionsDecimalArithmetics.h   |  1 -
 src/Functions/array/arrayEnumerateExtended.h  |  1 +
 src/Functions/array/arrayEnumerateRanked.h    |  1 +
 src/Functions/geometryConverters.h            |  1 -
 src/IO/S3/AWSLogger.cpp                       |  3 +++
 src/IO/S3/AWSLogger.h                         |  4 +++-
 src/IO/S3/Client.h                            |  1 -
 src/IO/S3/Credentials.h                       |  1 -
 src/IO/WriteBufferFromS3.h                    |  1 -
 src/Interpreters/Aggregator.cpp               | 23 ++++++++++---------
 src/Interpreters/Aggregator.h                 |  1 -
 src/Interpreters/Cache/FileCache.h            |  1 -
 .../Cache/LRUFileCachePriority.cpp            |  1 +
 src/Interpreters/Cache/LRUFileCachePriority.h |  1 -
 src/Interpreters/DNSCacheUpdater.cpp          |  1 +
 src/Interpreters/DirectJoin.cpp               |  4 ++--
 src/Interpreters/DirectJoin.h                 |  1 -
 src/Interpreters/ExternalLoader.cpp           |  3 ++-
 src/Interpreters/ExternalLoader.h             |  3 ++-
 src/Interpreters/FilesystemCacheLog.h         |  1 -
 src/Interpreters/FullSortingMergeJoin.h       |  1 +
 src/Interpreters/HashJoin.h                   |  1 -
 src/Interpreters/IJoin.h                      |  1 -
 src/Interpreters/InterserverCredentials.h     |  1 -
 src/Interpreters/MergeJoin.cpp                |  1 +
 src/Interpreters/Set.h                        |  1 -
 src/Interpreters/TableJoin.h                  |  1 -
 src/Interpreters/ThreadStatusExt.cpp          |  1 +
 src/Planner/Planner.cpp                       |  1 +
 .../Executors/CompletedPipelineExecutor.cpp   |  2 +-
 src/Processors/Executors/PipelineExecutor.cpp |  1 +
 src/Processors/Executors/PipelineExecutor.h   |  1 -
 .../PushingAsyncPipelineExecutor.cpp          |  3 ++-
 src/Processors/PingPongProcessor.h            |  1 -
 .../CreateSetAndFilterOnTheFlyStep.cpp        |  1 +
 src/Processors/Sources/ShellCommandSource.h   |  1 -
 .../AggregatingInOrderTransform.cpp           |  1 +
 .../Transforms/AggregatingTransform.cpp       |  1 +
 .../Transforms/AggregatingTransform.h         |  1 +
 .../Transforms/CreatingSetsTransform.cpp      |  1 +
 .../Transforms/MemoryBoundMerging.h           |  1 -
 .../Transforms/MergeJoinTransform.cpp         | 10 ++++++++
 .../Transforms/MergeJoinTransform.h           | 10 +-------
 .../Transforms/MergeSortingTransform.cpp      |  1 +
 .../Transforms/MergeSortingTransform.h        |  1 -
 .../Transforms/MergingAggregatedTransform.cpp |  1 +
 src/Server/HTTPHandlerFactory.h               |  1 -
 src/Server/PostgreSQLHandler.h                |  1 -
 src/Server/ProxyV1Handler.cpp                 |  1 +
 src/Server/ProxyV1Handler.h                   |  1 -
 src/Storages/Cache/RemoteCacheController.h    |  1 -
 .../FileLog/FileLogDirectoryWatcher.cpp       |  1 +
 .../FileLog/FileLogDirectoryWatcher.h         |  1 -
 src/Storages/HDFS/StorageHDFS.h               |  1 -
 src/Storages/Hive/StorageHive.h               |  1 -
 src/Storages/KVStorageUtils.h                 |  1 -
 src/Storages/MergeTree/AsyncBlockIDsCache.h   |  1 -
 src/Storages/MergeTree/DataPartsExchange.cpp  |  1 +
 src/Storages/MergeTree/DataPartsExchange.h    |  4 ++--
 src/Storages/MergeTree/IDataPartStorage.h     |  1 -
 .../MergeTree/IMergedBlockOutputStream.cpp    |  1 +
 .../MergeTree/MergeTreeBackgroundExecutor.cpp |  3 ++-
 .../MergeTree/MergeTreeBackgroundExecutor.h   |  3 +--
 .../MergeTreeBaseSelectProcessor.cpp          |  1 +
 .../MergeTreeInOrderSelectProcessor.h         |  1 +
 .../MergeTree/MergeTreePartsMover.cpp         |  1 +
 .../MergeTree/MergeTreePrefetchedReadPool.cpp |  1 +
 .../MergeTree/MergeTreeRangeReader.cpp        |  1 +
 src/Storages/MergeTree/MergeTreeRangeReader.h |  1 -
 src/Storages/MergeTree/MergeTreeReadPool.cpp  |  3 ++-
 .../MergeTreeReverseSelectProcessor.h         |  1 +
 .../MergeTree/MergeTreeSequentialSource.cpp   |  1 +
 .../MergeTree/MergeTreeWriteAheadLog.cpp      |  3 ++-
 .../MergeTree/MergedBlockOutputStream.cpp     |  1 +
 .../PartMovesBetweenShardsOrchestrator.h      |  1 -
 src/Storages/MergeTree/PartitionPruner.cpp    |  1 +
 .../MergeTree/ReplicatedMergeMutateTaskBase.h |  1 -
 .../ReplicatedMergeTreeAttachThread.h         |  1 -
 .../ReplicatedMergeTreeCleanupThread.h        |  1 -
 .../ReplicatedMergeTreePartCheckThread.h      |  1 -
 .../ReplicatedMergeTreeRestartingThread.h     |  1 -
 .../MaterializedPostgreSQLConsumer.h          |  1 -
 src/Storages/StorageDistributed.h             |  1 -
 src/Storages/StorageExecutable.h              |  1 -
 src/Storages/StorageFile.h                    |  1 -
 src/Storages/StorageKeeperMap.h               |  1 +
 src/Storages/StorageS3.h                      |  1 -
 src/Storages/StorageSet.cpp                   |  1 +
 src/Storages/System/StorageSystemReplicas.cpp |  1 +
 150 files changed, 142 insertions(+), 130 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
index 2c54293eeec..5074e491f60 100644
--- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
+++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 #include <base/sort.h>
 
 #include <DataTypes/DataTypesNumber.h>
diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.h b/src/AggregateFunctions/AggregateFunctionSparkbar.h
index 78f7e9fcefa..30e107bc4db 100644
--- a/src/AggregateFunctions/AggregateFunctionSparkbar.h
+++ b/src/AggregateFunctions/AggregateFunctionSparkbar.h
@@ -11,7 +11,6 @@
 #include <IO/WriteHelpers.h>
 #include <Columns/ColumnString.h>
 #include <Common/PODArray.h>
-#include <Common/logger_useful.h>
 #include <IO/ReadBufferFromString.h>
 #include <Common/HashTable/HashMap.h>
 #include <Columns/IColumn.h>
diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h
index f51ec423c69..b30f5ff5220 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.h
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.h
@@ -18,7 +18,6 @@
 #include <AggregateFunctions/IAggregateFunction.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <map>
-#include <Common/logger_useful.h>
 #include <Common/ClickHouseRevision.h>
 
 
diff --git a/src/BridgeHelper/IBridgeHelper.h b/src/BridgeHelper/IBridgeHelper.h
index d4762087cc1..272d97c8a78 100644
--- a/src/BridgeHelper/IBridgeHelper.h
+++ b/src/BridgeHelper/IBridgeHelper.h
@@ -5,7 +5,6 @@
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Poco/Net/HTTPRequest.h>
 #include <Common/ShellCommand.h>
-#include <Common/logger_useful.h>
 
 
 namespace DB
diff --git a/src/BridgeHelper/XDBCBridgeHelper.h b/src/BridgeHelper/XDBCBridgeHelper.h
index 00a661a1fc4..44104f26f63 100644
--- a/src/BridgeHelper/XDBCBridgeHelper.h
+++ b/src/BridgeHelper/XDBCBridgeHelper.h
@@ -11,7 +11,6 @@
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/BridgeProtocolVersion.h>
 #include <Common/ShellCommand.h>
-#include <Common/logger_useful.h>
 #include <IO/ConnectionTimeouts.h>
 #include <base/range.h>
 #include <BridgeHelper/IBridgeHelper.h>
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 6643a94c3bc..11bba4f1448 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -22,7 +22,8 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/OpenSSLHelpers.h>
 #include <Common/randomSeed.h>
-#include "Core/Block.h"
+#include <Common/logger_useful.h>
+#include <Core/Block.h>
 #include <Interpreters/ClientInfo.h>
 #include <Interpreters/OpenTelemetrySpanLog.h>
 #include <Compression/CompressionFactory.h>
diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index b86567e2ed0..5f79b365199 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 
 #include <Poco/Net/StreamSocket.h>
 
diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp
index 712ff5f5a31..4fb80f0ea04 100644
--- a/src/Client/LocalConnection.cpp
+++ b/src/Client/LocalConnection.cpp
@@ -6,6 +6,7 @@
 #include <Processors/Executors/PushingAsyncPipelineExecutor.h>
 #include <Storages/IStorage.h>
 #include <Common/ConcurrentBoundedQueue.h>
+#include <Common/CurrentThread.h>
 #include <Core/Protocol.h>
 
 
diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h
index 3e6fc007fb9..fb8f9003364 100644
--- a/src/Client/LocalConnection.h
+++ b/src/Client/LocalConnection.h
@@ -7,6 +7,7 @@
 #include <Interpreters/Session.h>
 #include <Interpreters/ProfileEventsExt.h>
 #include <Storages/ColumnsDescription.h>
+#include <Common/CurrentThread.h>
 
 
 namespace DB
diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp
index 99073d79bcd..32d54b7644e 100644
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@@ -3,6 +3,7 @@
 #include <Common/setThreadName.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/filesystemHelpers.h>
+#include <Common/logger_useful.h>
 #include <IO/UncompressedCache.h>
 #include <IO/MMappedFileCache.h>
 #include <IO/ReadHelpers.h>
diff --git a/src/Common/CacheBase.h b/src/Common/CacheBase.h
index 4ae313d7ecf..b176170cc1f 100644
--- a/src/Common/CacheBase.h
+++ b/src/Common/CacheBase.h
@@ -12,7 +12,6 @@
 #include <mutex>
 #include <unordered_map>
 
-#include <Common/logger_useful.h>
 #include <base/defines.h>
 
 
diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp
index b632ea95928..5bbc8eae0de 100644
--- a/src/Common/Config/ConfigProcessor.cpp
+++ b/src/Common/Config/ConfigProcessor.cpp
@@ -21,6 +21,7 @@
 #include <Common/Exception.h>
 #include <Common/getResource.h>
 #include <Common/XMLUtils.h>
+#include <Common/logger_useful.h>
 #include <base/errnoToString.h>
 #include <base/sort.h>
 #include <IO/WriteBufferFromString.h>
diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h
index aa8ac71446f..0ca3e46db88 100644
--- a/src/Common/Config/ConfigProcessor.h
+++ b/src/Common/Config/ConfigProcessor.h
@@ -16,9 +16,10 @@
 #include <Poco/DirectoryIterator.h>
 #include <Poco/ConsoleChannel.h>
 #include <Poco/Util/AbstractConfiguration.h>
-#include <Common/logger_useful.h>
 
 
+namespace Poco { class Logger; }
+
 namespace zkutil
 {
     class ZooKeeperNodeCache;
diff --git a/src/Common/Config/YAMLParser.h b/src/Common/Config/YAMLParser.h
index b986fc2d895..a00972b813c 100644
--- a/src/Common/Config/YAMLParser.h
+++ b/src/Common/Config/YAMLParser.h
@@ -2,11 +2,11 @@
 
 #include "config.h"
 
-#include <string>
-
+#include <Common/ErrorCodes.h>
+#include <Common/Exception.h>
+#include <base/types.h>
 #include <Poco/DOM/Document.h>
-#include "Poco/DOM/AutoPtr.h"
-#include <Common/logger_useful.h>
+#include <Poco/DOM/AutoPtr.h>
 
 #if USE_YAML_CPP
 
diff --git a/src/Common/Config/configReadClient.cpp b/src/Common/Config/configReadClient.cpp
index e5308bc3bc7..44d338c07af 100644
--- a/src/Common/Config/configReadClient.cpp
+++ b/src/Common/Config/configReadClient.cpp
@@ -4,6 +4,7 @@
 #include "ConfigProcessor.h"
 #include <filesystem>
 #include <iostream>
+#include <base/types.h>
 
 namespace fs = std::filesystem;
 
diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp
index 81e2624d6db..b6a68bdfb45 100644
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@@ -3,6 +3,7 @@
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
 #include <Common/thread_local_rng.h>
+#include <Common/logger_useful.h>
 #include <Core/Names.h>
 #include <base/types.h>
 #include <Poco/Net/IPAddress.h>
diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h
index a05456d3de8..84715b392a8 100644
--- a/src/Common/DNSResolver.h
+++ b/src/Common/DNSResolver.h
@@ -5,9 +5,10 @@
 #include <base/types.h>
 #include <Core/Names.h>
 #include <boost/noncopyable.hpp>
-#include <Common/logger_useful.h>
 
 
+namespace Poco { class Logger; }
+
 namespace DB
 {
 
diff --git a/src/Common/ErrorHandlers.h b/src/Common/ErrorHandlers.h
index f55b6c83a69..301377bff83 100644
--- a/src/Common/ErrorHandlers.h
+++ b/src/Common/ErrorHandlers.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <Poco/ErrorHandler.h>
-#include <Common/logger_useful.h>
 #include <Common/Exception.h>
 
 
diff --git a/src/Common/FileChecker.cpp b/src/Common/FileChecker.cpp
index aa6b9c90a4c..a6e37654ff1 100644
--- a/src/Common/FileChecker.cpp
+++ b/src/Common/FileChecker.cpp
@@ -1,5 +1,7 @@
 #include <Common/FileChecker.h>
 #include <Common/escapeForFileName.h>
+#include <Common/logger_useful.h>
+#include <Common/ErrorCodes.h>
 #include <Disks/IDisk.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/ReadBufferFromFile.h>
@@ -25,7 +27,9 @@ FileChecker::FileChecker(const String & file_info_path_) : FileChecker(nullptr,
 {
 }
 
-FileChecker::FileChecker(DiskPtr disk_, const String & file_info_path_) : disk(std::move(disk_))
+FileChecker::FileChecker(DiskPtr disk_, const String & file_info_path_)
+    : disk(std::move(disk_))
+    , log(&Poco::Logger::get("FileChecker"))
 {
     setPath(file_info_path_);
     try
diff --git a/src/Common/FileChecker.h b/src/Common/FileChecker.h
index 1beab31ec8f..bb0383e4b56 100644
--- a/src/Common/FileChecker.h
+++ b/src/Common/FileChecker.h
@@ -1,8 +1,10 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 #include <Storages/CheckResults.h>
+#include <map>
+#include <base/types.h>
 
+namespace Poco { class Logger; }
 
 namespace DB
 {
@@ -46,7 +48,7 @@ private:
     size_t getRealFileSize(const String & path_) const;
 
     const DiskPtr disk;
-    const Poco::Logger * log = &Poco::Logger::get("FileChecker");
+    const Poco::Logger * log;
 
     String files_info_path;
     std::map<String, size_t> map;
diff --git a/src/Common/LRUCachePolicy.h b/src/Common/LRUCachePolicy.h
index 4aee2135af7..49c2fc4541b 100644
--- a/src/Common/LRUCachePolicy.h
+++ b/src/Common/LRUCachePolicy.h
@@ -5,8 +5,6 @@
 #include <list>
 #include <unordered_map>
 
-#include <Common/logger_useful.h>
-
 namespace DB
 {
 /// Cache policy LRU evicts entries which are not used for a long time.
@@ -174,7 +172,7 @@ private:
             auto it = cells.find(key);
             if (it == cells.end())
             {
-                LOG_ERROR(&Poco::Logger::get("LRUCache"), "LRUCache became inconsistent. There must be a bug in it.");
+                // Queue became inconsistent
                 abort();
             }
 
@@ -192,7 +190,7 @@ private:
 
         if (current_size_in_bytes > (1ull << 63))
         {
-            LOG_ERROR(&Poco::Logger::get("LRUCache"), "LRUCache became inconsistent. There must be a bug in it.");
+            // Queue became inconsistent
             abort();
         }
     }
diff --git a/src/Common/OvercommitTracker.h b/src/Common/OvercommitTracker.h
index 598b877ef3c..f40a70fe7cd 100644
--- a/src/Common/OvercommitTracker.h
+++ b/src/Common/OvercommitTracker.h
@@ -1,7 +1,7 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 #include <base/types.h>
+#include <Core/Types.h>
 #include <boost/core/noncopyable.hpp>
 #include <Poco/Logger.h>
 #include <cassert>
diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h
index 96a18ee6591..a9c595c440c 100644
--- a/src/Common/PoolBase.h
+++ b/src/Common/PoolBase.h
@@ -144,12 +144,17 @@ public:
                 return Entry(*items.back());
             }
 
-            LOG_INFO(log, "No free connections in pool. Waiting.");
-
             if (timeout < 0)
+            {
+                LOG_INFO(log, "No free connections in pool. Waiting undefinitelly.");
                 available.wait(lock);
+            }
             else
-                available.wait_for(lock, std::chrono::microseconds(timeout));
+            {
+                auto timeout_ms = std::chrono::microseconds(timeout);
+                LOG_INFO(log, "No free connections in pool. Waiting {} ms.", timeout_ms.count());
+                available.wait_for(lock, timeout_ms);
+            }
         }
     }
 
diff --git a/src/Common/SLRUCachePolicy.h b/src/Common/SLRUCachePolicy.h
index e36bca83c61..810c8c335a0 100644
--- a/src/Common/SLRUCachePolicy.h
+++ b/src/Common/SLRUCachePolicy.h
@@ -5,8 +5,6 @@
 #include <list>
 #include <unordered_map>
 
-#include <Common/logger_useful.h>
-
 namespace DB
 {
 
@@ -236,7 +234,7 @@ private:
             auto it = cells.find(key);
             if (it == cells.end())
             {
-                LOG_ERROR(&Poco::Logger::get("SLRUCache"), "SLRUCache became inconsistent. There must be a bug in it.");
+                // Queue became inconsistent
                 abort();
             }
 
@@ -264,7 +262,7 @@ private:
 
         if (current_size_in_bytes > (1ull << 63))
         {
-            LOG_ERROR(&Poco::Logger::get("SLRUCache"), "SLRUCache became inconsistent. There must be a bug in it.");
+            // Queue became inconsistent
             abort();
         }
     }
diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp
index 76a4d8b1adf..a94fd81559a 100644
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@@ -23,6 +23,7 @@
 #include <boost/algorithm/string/split.hpp>
 
 #include <base/errnoToString.h>
+#include <Common/logger_useful.h>
 
 
 namespace ProfileEvents
diff --git a/src/Common/ThreadProfileEvents.h b/src/Common/ThreadProfileEvents.h
index 5a03a9e8555..c4703a69998 100644
--- a/src/Common/ThreadProfileEvents.h
+++ b/src/Common/ThreadProfileEvents.h
@@ -2,11 +2,13 @@
 
 #include <base/types.h>
 #include <base/getThreadId.h>
+#include <base/defines.h>
+#include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <pthread.h>
-#include <Common/logger_useful.h>
+#include <boost/noncopyable.hpp>
 
 
 #if defined(OS_LINUX)
diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp
index 1b783aa9ec4..ac6cfdc2cd7 100644
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@@ -2,6 +2,8 @@
 #include <Common/ThreadProfileEvents.h>
 #include <Common/QueryProfiler.h>
 #include <Common/ThreadStatus.h>
+#include <Common/CurrentThread.h>
+#include <Common/logger_useful.h>
 #include <base/errnoToString.h>
 #include <Interpreters/Context.h>
 
diff --git a/src/Common/ZooKeeper/Common.h b/src/Common/ZooKeeper/Common.h
index a2956706c8f..1a1328588e3 100644
--- a/src/Common/ZooKeeper/Common.h
+++ b/src/Common/ZooKeeper/Common.h
@@ -3,12 +3,10 @@
 #include <functional>
 
 #include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>
 
 namespace zkutil
 {
 
 using GetZooKeeper = std::function<ZooKeeperPtr()>;
-using GetZooKeeperWithFaultInjection = std::function<Coordination::ZooKeeperWithFaultInjection::Ptr()>;
 
 }
diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index 78f9e3da83b..a8da0dff0cc 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -15,6 +15,7 @@
 #include "Common/ZooKeeper/IKeeper.h"
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/Exception.h>
+#include <Common/logger_useful.h>
 
 #include <Poco/Net/NetException.h>
 #include <Poco/Net/DNS.h>
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index fc4fe95d9b6..8e7639b8cc1 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -7,7 +7,6 @@
 #include <memory>
 #include <mutex>
 #include <string>
-#include <Common/logger_useful.h>
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/Stopwatch.h>
diff --git a/src/Common/ZooKeeper/ZooKeeperLock.cpp b/src/Common/ZooKeeper/ZooKeeperLock.cpp
index a52c942a35f..6ee1c380efb 100644
--- a/src/Common/ZooKeeper/ZooKeeperLock.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperLock.cpp
@@ -1,4 +1,6 @@
 #include <Common/ZooKeeper/ZooKeeperLock.h>
+#include <Common/logger_useful.h>
+#include <Common/ErrorCodes.h>
 #include <filesystem>
 
 namespace DB
diff --git a/src/Common/ZooKeeper/ZooKeeperLock.h b/src/Common/ZooKeeper/ZooKeeperLock.h
index 755ca1333b8..146527c6c94 100644
--- a/src/Common/ZooKeeper/ZooKeeperLock.h
+++ b/src/Common/ZooKeeper/ZooKeeperLock.h
@@ -3,7 +3,8 @@
 #include <Common/ZooKeeper/KeeperException.h>
 #include <memory>
 #include <string>
-#include <Common/logger_useful.h>
+
+namespace Poco { class Logger; }
 
 namespace zkutil
 {
diff --git a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
index a39a083cf33..130590ceb40 100644
--- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
+++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
@@ -4,6 +4,7 @@
 #include <Common/ZooKeeper/Types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
+#include <Common/logger_useful.h>
 #include <Common/randomSeed.h>
 
 namespace DB
diff --git a/src/Common/scope_guard_safe.h b/src/Common/scope_guard_safe.h
index f098fd95f00..2befb58870a 100644
--- a/src/Common/scope_guard_safe.h
+++ b/src/Common/scope_guard_safe.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <base/scope_guard.h>
-#include <Common/logger_useful.h>
 #include <Common/LockMemoryExceptionInThread.h>
 
 /// Same as SCOPE_EXIT() but block the MEMORY_LIMIT_EXCEEDED errors.
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index 1828182751d..2cde7c2465e 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -10,6 +10,7 @@
 #include <Common/checkStackSize.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/ProfileEvents.h>
+#include <Common/logger_useful.h>
 
 #include <future>
 #include <chrono>
diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h
index 77b5510cbb3..4b8b134cf8f 100644
--- a/src/Coordination/KeeperDispatcher.h
+++ b/src/Coordination/KeeperDispatcher.h
@@ -9,7 +9,6 @@
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/Exception.h>
-#include <Common/logger_useful.h>
 #include <functional>
 #include <Coordination/KeeperServer.h>
 #include <Coordination/CoordinationSettings.h>
diff --git a/src/Coordination/KeeperLogStore.cpp b/src/Coordination/KeeperLogStore.cpp
index d1bd2f9db18..10619a44517 100644
--- a/src/Coordination/KeeperLogStore.cpp
+++ b/src/Coordination/KeeperLogStore.cpp
@@ -1,5 +1,6 @@
 #include <Coordination/KeeperLogStore.h>
 #include <IO/CompressionMethod.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Coordination/KeeperLogStore.h b/src/Coordination/KeeperLogStore.h
index 108241e024e..e48e3a32463 100644
--- a/src/Coordination/KeeperLogStore.h
+++ b/src/Coordination/KeeperLogStore.h
@@ -4,7 +4,6 @@
 #include <mutex>
 #include <Core/Types.h>
 #include <Coordination/Changelog.h>
-#include <Common/logger_useful.h>
 #include <base/defines.h>
 
 namespace DB
diff --git a/src/Coordination/KeeperSnapshotManagerS3.h b/src/Coordination/KeeperSnapshotManagerS3.h
index 197f528b192..eff7868bba9 100644
--- a/src/Coordination/KeeperSnapshotManagerS3.h
+++ b/src/Coordination/KeeperSnapshotManagerS3.h
@@ -9,7 +9,6 @@
 #if USE_AWS_S3
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Common/ThreadPool.h>
-#include <Common/logger_useful.h>
 
 #include <string>
 #endif
diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp
index 632aaec6b54..7c55739a96f 100644
--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@@ -8,9 +8,10 @@
 #include <Coordination/WriteBufferFromNuraftBuffer.h>
 #include <IO/ReadHelpers.h>
 #include <sys/mman.h>
-#include "Common/ZooKeeper/ZooKeeperCommon.h"
+#include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
 #include <Common/ProfileEvents.h>
+#include <Common/logger_useful.h>
 #include "Coordination/KeeperStorage.h"
 
 
diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h
index e4f0295db99..5af5bc05b0f 100644
--- a/src/Coordination/KeeperStateMachine.h
+++ b/src/Coordination/KeeperStateMachine.h
@@ -8,7 +8,6 @@
 
 #include <libnuraft/nuraft.hxx>
 #include <Common/ConcurrentBoundedQueue.h>
-#include <Common/logger_useful.h>
 
 
 namespace DB
diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp
index cfb3519e597..70687ba471c 100644
--- a/src/Coordination/KeeperStateManager.cpp
+++ b/src/Coordination/KeeperStateManager.cpp
@@ -7,6 +7,7 @@
 #include <Common/isLocalAddress.h>
 #include <IO/ReadHelpers.h>
 #include <Common/getMultipleKeysFromConfig.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp
index 5fa67a60b4b..2d4f64e033f 100644
--- a/src/Coordination/ZooKeeperDataReader.cpp
+++ b/src/Coordination/ZooKeeperDataReader.cpp
@@ -6,6 +6,7 @@
 
 #include <IO/ReadHelpers.h>
 #include <Common/ZooKeeper/ZooKeeperIO.h>
+#include <Common/logger_useful.h>
 #include <IO/ReadBufferFromFile.h>
 #include <Coordination/pathUtils.h>
 
diff --git a/src/Coordination/ZooKeeperDataReader.h b/src/Coordination/ZooKeeperDataReader.h
index 6da6fd498af..8fd86ba99e2 100644
--- a/src/Coordination/ZooKeeperDataReader.h
+++ b/src/Coordination/ZooKeeperDataReader.h
@@ -1,7 +1,6 @@
 #pragma once
 #include <string>
 #include <Coordination/KeeperStorage.h>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp
index 22cd88c0764..3655681c515 100644
--- a/src/Core/PostgreSQL/PoolWithFailover.cpp
+++ b/src/Core/PostgreSQL/PoolWithFailover.cpp
@@ -6,6 +6,7 @@
 #include <Common/parseRemoteDescription.h>
 #include <Common/Exception.h>
 #include <Common/quoteString.h>
+#include <Common/logger_useful.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 
diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h
index afef2933d29..bf9c34e6723 100644
--- a/src/Core/PostgreSQL/PoolWithFailover.h
+++ b/src/Core/PostgreSQL/PoolWithFailover.h
@@ -8,7 +8,6 @@
 #include "ConnectionHolder.h"
 #include <mutex>
 #include <Poco/Util/AbstractConfiguration.h>
-#include <Common/logger_useful.h>
 #include <Storages/ExternalDataSourceConfiguration.h>
 #include <Storages/StoragePostgreSQL.h>
 
diff --git a/src/Core/SortDescription.cpp b/src/Core/SortDescription.cpp
index 66ca1539b71..ae6aedf202d 100644
--- a/src/Core/SortDescription.cpp
+++ b/src/Core/SortDescription.cpp
@@ -4,6 +4,7 @@
 #include <Common/JSONBuilder.h>
 #include <Common/SipHash.h>
 #include <Common/typeid_cast.h>
+#include <Common/logger_useful.h>
 
 #if USE_EMBEDDED_COMPILER
 #include <DataTypes/Native.h>
diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h
index f90f403364b..7aa1e8ad1a0 100644
--- a/src/Daemon/BaseDaemon.h
+++ b/src/Daemon/BaseDaemon.h
@@ -16,7 +16,6 @@
 #include <Poco/Util/ServerApplication.h>
 #include <Poco/Net/SocketAddress.h>
 #include <base/types.h>
-#include <Common/logger_useful.h>
 #include <base/getThreadId.h>
 #include <Daemon/GraphiteWriter.h>
 #include <Common/Config/ConfigProcessor.h>
diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h
index e19c4a66b1f..5203415005f 100644
--- a/src/Dictionaries/CacheDictionary.h
+++ b/src/Dictionaries/CacheDictionary.h
@@ -9,7 +9,6 @@
 
 #include <pcg_random.hpp>
 
-#include <Common/logger_useful.h>
 
 #include <Common/randomSeed.h>
 #include <Common/ThreadPool.h>
diff --git a/src/Dictionaries/ExecutableDictionarySource.h b/src/Dictionaries/ExecutableDictionarySource.h
index 0456d3cafef..c7067a62893 100644
--- a/src/Dictionaries/ExecutableDictionarySource.h
+++ b/src/Dictionaries/ExecutableDictionarySource.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 
 #include <Core/Block.h>
 #include <Interpreters/Context.h>
diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.h b/src/Dictionaries/ExecutablePoolDictionarySource.h
index 1fc10d18b76..e8cc6e83406 100644
--- a/src/Dictionaries/ExecutablePoolDictionarySource.h
+++ b/src/Dictionaries/ExecutablePoolDictionarySource.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 
 #include <Core/Block.h>
 #include <Interpreters/Context.h>
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index 803f607a3a7..ff1c784750b 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -5,6 +5,7 @@
 #include <Common/IPv6ToBinary.h>
 #include <Common/memcmpSmall.h>
 #include <Common/typeid_cast.h>
+#include <Common/logger_useful.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesDecimal.h>
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index e1fabb89a7e..67827c6524e 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -11,7 +11,6 @@
 #include <Columns/ColumnVector.h>
 #include <Poco/Net/IPAddress.h>
 #include <base/StringRef.h>
-#include <Common/logger_useful.h>
 #include "DictionaryStructure.h"
 #include "IDictionary.h"
 #include "IDictionarySource.h"
diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp
index 9f254da0b11..8ec78308392 100644
--- a/src/Dictionaries/PostgreSQLDictionarySource.cpp
+++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp
@@ -13,6 +13,7 @@
 #include <Interpreters/Context.h>
 #include <QueryPipeline/QueryPipeline.h>
 #include <Storages/ExternalDataSourceConfiguration.h>
+#include <Common/logger_useful.h>
 #endif
 
 
diff --git a/src/Dictionaries/PostgreSQLDictionarySource.h b/src/Dictionaries/PostgreSQLDictionarySource.h
index 8ecf56a9430..1305333458b 100644
--- a/src/Dictionaries/PostgreSQLDictionarySource.h
+++ b/src/Dictionaries/PostgreSQLDictionarySource.h
@@ -8,7 +8,6 @@
 #include "ExternalQueryBuilder.h"
 #include <Core/Block.h>
 #include <Common/LocalDateTime.h>
-#include <Common/logger_useful.h>
 #include <Core/PostgreSQL/PoolWithFailover.h>
 
 
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index 14e29904422..d6182463ebf 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 #include <Disks/DiskLocalCheckThread.h>
 #include <Disks/IDisk.h>
 #include <IO/ReadBufferFromFile.h>
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 5eaee2e3026..f48935da7bf 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -4,8 +4,9 @@
 #include <IO/ReadBufferFromFile.h>
 #include <base/scope_guard.h>
 #include <Common/assert_cast.h>
-#include <base/hex.h>
 #include <Common/getRandomASCIIString.h>
+#include <Common/logger_useful.h>
+#include <base/hex.h>
 #include <Interpreters/Context.h>
 
 
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
index 14e8ea6c7e7..d3c265a522b 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <Interpreters/Cache/FileCache.h>
-#include <Common/logger_useful.h>
 #include <IO/SeekableReadBuffer.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/ReadSettings.h>
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index bcdc97983be..b49f4dafef0 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -9,7 +9,6 @@
 #include <memory>
 #include <Storages/StorageS3Settings.h>
 #include <Common/MultiVersion.h>
-#include <Common/logger_useful.h>
 
 
 namespace DB
diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp
index ec0f201b801..f4be8b8fe86 100644
--- a/src/Disks/StoragePolicy.cpp
+++ b/src/Disks/StoragePolicy.cpp
@@ -7,6 +7,7 @@
 #include <Common/escapeForFileName.h>
 #include <Common/formatReadable.h>
 #include <Common/quoteString.h>
+#include <Common/logger_useful.h>
 
 #include <set>
 
diff --git a/src/Disks/StoragePolicy.h b/src/Disks/StoragePolicy.h
index 7e6aff7bbda..69cfb830818 100644
--- a/src/Disks/StoragePolicy.h
+++ b/src/Disks/StoragePolicy.h
@@ -10,7 +10,6 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/Exception.h>
 #include <Common/formatReadable.h>
-#include <Common/logger_useful.h>
 
 #include <memory>
 #include <mutex>
diff --git a/src/Functions/FunctionsDecimalArithmetics.h b/src/Functions/FunctionsDecimalArithmetics.h
index aa4afc68707..79e10d215a9 100644
--- a/src/Functions/FunctionsDecimalArithmetics.h
+++ b/src/Functions/FunctionsDecimalArithmetics.h
@@ -10,7 +10,6 @@
 #include <Functions/castTypeToEither.h>
 #include <IO/WriteHelpers.h>
 
-#include <Common/logger_useful.h>
 #include <Poco/Logger.h>
 #include <Loggers/Loggers.h>
 
diff --git a/src/Functions/array/arrayEnumerateExtended.h b/src/Functions/array/arrayEnumerateExtended.h
index 3f145c05b54..cf38afcfa5a 100644
--- a/src/Functions/array/arrayEnumerateExtended.h
+++ b/src/Functions/array/arrayEnumerateExtended.h
@@ -8,6 +8,7 @@
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnString.h>
 #include <Interpreters/AggregationCommon.h>
+#include <Interpreters/Context_fwd.h>
 #include <Common/HashTable/ClearableHashMap.h>
 #include <Common/ColumnsHashing.h>
 
diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h
index 8a348c07421..0733f1e2d43 100644
--- a/src/Functions/array/arrayEnumerateRanked.h
+++ b/src/Functions/array/arrayEnumerateRanked.h
@@ -9,6 +9,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
 #include <Interpreters/AggregationCommon.h>
+#include <Interpreters/Context_fwd.h>
 #include <Common/ColumnsHashing.h>
 #include <Common/HashTable/ClearableHashMap.h>
 
diff --git a/src/Functions/geometryConverters.h b/src/Functions/geometryConverters.h
index 85e887dd498..97162fa9dd0 100644
--- a/src/Functions/geometryConverters.h
+++ b/src/Functions/geometryConverters.h
@@ -18,7 +18,6 @@
 #include <Interpreters/castColumn.h>
 
 #include <cmath>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/IO/S3/AWSLogger.cpp b/src/IO/S3/AWSLogger.cpp
index 48c30ccf881..d6162823aee 100644
--- a/src/IO/S3/AWSLogger.cpp
+++ b/src/IO/S3/AWSLogger.cpp
@@ -2,7 +2,10 @@
 
 #if USE_AWS_S3
 
+#include <Core/SettingsEnums.h>
+#include <Common/logger_useful.h>
 #include <aws/core/utils/logging/LogLevel.h>
+#include <Poco/Logger.h>
 
 namespace
 {
diff --git a/src/IO/S3/AWSLogger.h b/src/IO/S3/AWSLogger.h
index 897c0e8d964..fdb6eed1f86 100644
--- a/src/IO/S3/AWSLogger.h
+++ b/src/IO/S3/AWSLogger.h
@@ -4,8 +4,10 @@
 
 #if USE_AWS_S3
 #include <aws/core/utils/logging/LogSystemInterface.h>
+#include <base/types.h>
+#include <unordered_map>
 
-#include <Common/logger_useful.h>
+namespace Poco { class Logger; }
 
 namespace DB::S3
 {
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index 5c68fca6f04..369558ae105 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -4,7 +4,6 @@
 
 #if USE_AWS_S3
 
-#include <Common/logger_useful.h>
 #include <Common/assert_cast.h>
 #include <base/scope_guard.h>
 
diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h
index cd9072f9765..22cca76109f 100644
--- a/src/IO/S3/Credentials.h
+++ b/src/IO/S3/Credentials.h
@@ -9,7 +9,6 @@
 #    include <aws/core/auth/AWSCredentialsProvider.h>
 #    include <aws/core/auth/AWSCredentialsProviderChain.h>
 
-#    include <Common/logger_useful.h>
 
 #    include <IO/S3/PocoHTTPClient.h>
 
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index 7c3c439f9c3..5fa934b886e 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -9,7 +9,6 @@
 #include <list>
 
 #include <base/types.h>
-#include <Common/logger_useful.h>
 #include <IO/BufferWithOwnMemory.h>
 #include <IO/WriteBuffer.h>
 #include <IO/WriteSettings.h>
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index d6fbf072d05..03227d8663a 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -8,9 +8,6 @@
 #endif
 
 #include <base/sort.h>
-#include <Common/Stopwatch.h>
-#include <Common/setThreadName.h>
-#include <Common/formatReadable.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeLowCardinality.h>
@@ -21,14 +18,6 @@
 #include <IO/WriteBufferFromFile.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <Interpreters/Aggregator.h>
-#include <Common/CacheBase.h>
-#include <Common/MemoryTracker.h>
-#include <Common/CurrentThread.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/typeid_cast.h>
-#include <Common/assert_cast.h>
-#include <Common/JSONBuilder.h>
-#include <Common/filesystemHelpers.h>
 #include <AggregateFunctions/AggregateFunctionArray.h>
 #include <AggregateFunctions/AggregateFunctionState.h>
 #include <IO/Operators.h>
@@ -37,6 +26,18 @@
 #include <Core/ProtocolDefines.h>
 #include <Disks/TemporaryFileOnDisk.h>
 #include <Interpreters/TemporaryDataOnDisk.h>
+#include <Common/Stopwatch.h>
+#include <Common/setThreadName.h>
+#include <Common/formatReadable.h>
+#include <Common/logger_useful.h>
+#include <Common/CacheBase.h>
+#include <Common/MemoryTracker.h>
+#include <Common/CurrentThread.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/typeid_cast.h>
+#include <Common/assert_cast.h>
+#include <Common/JSONBuilder.h>
+#include <Common/filesystemHelpers.h>
 #include <Common/scope_guard_safe.h>
 
 #include <Parsers/ASTSelectQuery.h>
diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h
index e2f76f31466..5fb94c5f4e8 100644
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@@ -5,7 +5,6 @@
 #include <mutex>
 #include <type_traits>
 
-#include <Common/logger_useful.h>
 
 #include <base/StringRef.h>
 #include <Common/Arena.h>
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index afafa39c4c6..85c91e8033e 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -12,7 +12,6 @@
 #include <boost/noncopyable.hpp>
 
 #include <Core/Types.h>
-#include <Common/logger_useful.h>
 #include <Common/ThreadPool.h>
 #include <IO/ReadSettings.h>
 #include <Interpreters/Cache/IFileCachePriority.h>
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index c20379e1fc1..4987d143f52 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -1,5 +1,6 @@
 #include <Interpreters/Cache/LRUFileCachePriority.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/logger_useful.h>
 
 namespace CurrentMetrics
 {
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 2345d3c47db..2a3c4f3c801 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -2,7 +2,6 @@
 
 #include <list>
 #include <Interpreters/Cache/IFileCachePriority.h>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Interpreters/DNSCacheUpdater.cpp b/src/Interpreters/DNSCacheUpdater.cpp
index bf88d19b7ef..c6a38d85acb 100644
--- a/src/Interpreters/DNSCacheUpdater.cpp
+++ b/src/Interpreters/DNSCacheUpdater.cpp
@@ -2,6 +2,7 @@
 
 #include <Interpreters/Context.h>
 #include <Common/DNSResolver.h>
+#include <Common/logger_useful.h>
 
 
 namespace DB
diff --git a/src/Interpreters/DirectJoin.cpp b/src/Interpreters/DirectJoin.cpp
index e148db1d8e6..cfefd7c5a91 100644
--- a/src/Interpreters/DirectJoin.cpp
+++ b/src/Interpreters/DirectJoin.cpp
@@ -1,7 +1,7 @@
 #include <Interpreters/DirectJoin.h>
-
-#include <Columns/ColumnNullable.h>
 #include <Interpreters/castColumn.h>
+#include <Columns/ColumnNullable.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h
index bdbd155dc36..644b66a9d99 100644
--- a/src/Interpreters/DirectJoin.h
+++ b/src/Interpreters/DirectJoin.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 
 #include <Core/Block.h>
 
diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp
index 04a116ec0c7..05418003dba 100644
--- a/src/Interpreters/ExternalLoader.cpp
+++ b/src/Interpreters/ExternalLoader.cpp
@@ -9,8 +9,9 @@
 #include <Common/randomSeed.h>
 #include <Common/setThreadName.h>
 #include <Common/StatusInfo.h>
-#include <base/chrono_io.h>
 #include <Common/scope_guard_safe.h>
+#include <Common/logger_useful.h>
+#include <base/chrono_io.h>
 #include <boost/range/adaptor/map.hpp>
 #include <boost/range/algorithm/copy.hpp>
 #include <unordered_set>
diff --git a/src/Interpreters/ExternalLoader.h b/src/Interpreters/ExternalLoader.h
index 3ce7c40ef03..49b4ea77e0d 100644
--- a/src/Interpreters/ExternalLoader.h
+++ b/src/Interpreters/ExternalLoader.h
@@ -6,10 +6,11 @@
 #include <base/types.h>
 #include <Interpreters/IExternalLoadable.h>
 #include <Interpreters/IExternalLoaderConfigRepository.h>
-#include <Common/logger_useful.h>
 #include <base/scope_guard.h>
 #include <Common/ExternalLoaderStatus.h>
+#include <Core/Types.h>
 
+namespace Poco { class Logger; }
 
 namespace DB
 {
diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h
index bf5361ef324..1b22d561c51 100644
--- a/src/Interpreters/FilesystemCacheLog.h
+++ b/src/Interpreters/FilesystemCacheLog.h
@@ -8,7 +8,6 @@
 #include <DataTypes/DataTypeTuple.h>
 #include <Interpreters/SystemLog.h>
 #include <Interpreters/TransactionVersionMetadata.h>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h
index a94d7a7dfc6..7318d1d24a1 100644
--- a/src/Interpreters/FullSortingMergeJoin.h
+++ b/src/Interpreters/FullSortingMergeJoin.h
@@ -4,6 +4,7 @@
 #include <Interpreters/TableJoin.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeLowCardinality.h>
+#include <Common/logger_useful.h>
 #include <Poco/Logger.h>
 
 namespace DB
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index b29b6e617c8..79fac60d8bc 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -16,7 +16,6 @@
 #include <Common/HashTable/HashMap.h>
 #include <Common/HashTable/FixedHashMap.h>
 #include <Storages/TableLockHolder.h>
-#include <Common/logger_useful.h>
 
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnFixedString.h>
diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h
index 69d69ce30a6..83067b0eab7 100644
--- a/src/Interpreters/IJoin.h
+++ b/src/Interpreters/IJoin.h
@@ -7,7 +7,6 @@
 #include <Core/Block.h>
 #include <Columns/IColumn.h>
 #include <Common/Exception.h>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Interpreters/InterserverCredentials.h b/src/Interpreters/InterserverCredentials.h
index c4370c83e9b..33176c943a4 100644
--- a/src/Interpreters/InterserverCredentials.h
+++ b/src/Interpreters/InterserverCredentials.h
@@ -2,7 +2,6 @@
 
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Common/Exception.h>
-#include <Common/logger_useful.h>
 #include <unordered_set>
 
 namespace DB
diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp
index 07f5ae31ed6..d31510c2fb5 100644
--- a/src/Interpreters/MergeJoin.cpp
+++ b/src/Interpreters/MergeJoin.cpp
@@ -3,6 +3,7 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnLowCardinality.h>
 
+#include <Common/logger_useful.h>
 #include <Core/SortCursor.h>
 #include <Formats/TemporaryFileStreamLegacy.h>
 #include <DataTypes/DataTypeNullable.h>
diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h
index 00eff614c7c..e593f71cfbe 100644
--- a/src/Interpreters/Set.h
+++ b/src/Interpreters/Set.h
@@ -8,7 +8,6 @@
 #include <Storages/MergeTree/BoolMask.h>
 
 #include <Common/SharedMutex.h>
-#include <Common/logger_useful.h>
 
 
 namespace DB
diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h
index 95471885a2a..99b683b7713 100644
--- a/src/Interpreters/TableJoin.h
+++ b/src/Interpreters/TableJoin.h
@@ -19,7 +19,6 @@
 #include <utility>
 #include <memory>
 #include <base/types.h>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index 72ce08bf653..2e051030e38 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -19,6 +19,7 @@
 #include <Common/setThreadName.h>
 #include <Common/noexcept_scope.h>
 #include <Common/DateLUT.h>
+#include <Common/logger_useful.h>
 #include <base/errnoToString.h>
 
 #if defined(OS_LINUX)
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index a8e0d80ce8c..08efa6653a7 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1,6 +1,7 @@
 #include <Planner/Planner.h>
 
 #include <Core/ProtocolDefines.h>
+#include <Common/logger_useful.h>
 
 #include <DataTypes/DataTypeString.h>
 
diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp
index e624ecd52de..acbb4098145 100644
--- a/src/Processors/Executors/CompletedPipelineExecutor.cpp
+++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp
@@ -5,8 +5,8 @@
 #include <Poco/Event.h>
 #include <Common/setThreadName.h>
 #include <Common/ThreadPool.h>
-#include <iostream>
 #include <Common/scope_guard_safe.h>
+#include <Common/CurrentThread.h>
 
 namespace DB
 {
diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index 994051fd697..441a66b16d7 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -10,6 +10,7 @@
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/Context.h>
 #include <Common/scope_guard_safe.h>
+#include <Common/logger_useful.h>
 #include <Common/Exception.h>
 #include <Common/OpenTelemetryTraceContext.h>
 
diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h
index 147e5b2744a..e6d33e2849b 100644
--- a/src/Processors/Executors/PipelineExecutor.h
+++ b/src/Processors/Executors/PipelineExecutor.h
@@ -3,7 +3,6 @@
 #include <Processors/IProcessor.h>
 #include <Processors/Executors/ExecutorTasks.h>
 #include <Common/EventCounter.h>
-#include <Common/logger_useful.h>
 #include <Common/ThreadPool.h>
 #include <Common/ConcurrencyControl.h>
 
diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
index 3aec7608e6d..e026dbc50d6 100644
--- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
+++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp
@@ -5,8 +5,9 @@
 #include <QueryPipeline/ReadProgressCallback.h>
 #include <Common/ThreadPool.h>
 #include <Common/setThreadName.h>
-#include <Poco/Event.h>
 #include <Common/scope_guard_safe.h>
+#include <Common/CurrentThread.h>
+#include <Poco/Event.h>
 
 namespace DB
 {
diff --git a/src/Processors/PingPongProcessor.h b/src/Processors/PingPongProcessor.h
index 0f2fffbd70f..976b29f0d8a 100644
--- a/src/Processors/PingPongProcessor.h
+++ b/src/Processors/PingPongProcessor.h
@@ -3,7 +3,6 @@
 #include <Processors/IProcessor.h>
 #include <base/unit.h>
 #include <Processors/Chunk.h>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp
index 07137e87736..fe362f64b96 100644
--- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp
+++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp
@@ -4,6 +4,7 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <IO/Operators.h>
 #include <Common/JSONBuilder.h>
+#include <Common/logger_useful.h>
 #include <Core/ColumnWithTypeAndName.h>
 #include <Core/ColumnsWithTypeAndName.h>
 #include <Processors/IProcessor.h>
diff --git a/src/Processors/Sources/ShellCommandSource.h b/src/Processors/Sources/ShellCommandSource.h
index 80ba1d59adf..b9afaa345cf 100644
--- a/src/Processors/Sources/ShellCommandSource.h
+++ b/src/Processors/Sources/ShellCommandSource.h
@@ -2,7 +2,6 @@
 
 #include <memory>
 
-#include <Common/logger_useful.h>
 #include <base/BorrowedObjectPool.h>
 
 #include <Common/ShellCommand.h>
diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp
index 95e9b953840..3d5a910f054 100644
--- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp
+++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp
@@ -2,6 +2,7 @@
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Core/SortCursor.h>
+#include <Common/logger_useful.h>
 #include <Interpreters/sortBlock.h>
 #include <base/range.h>
 
diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp
index 69eeb76bf53..4bd000797a6 100644
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@@ -5,6 +5,7 @@
 #include <QueryPipeline/Pipe.h>
 #include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
 #include <Core/ProtocolDefines.h>
+#include <Common/logger_useful.h>
 
 #include <Processors/Transforms/SquashingChunksTransform.h>
 
diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h
index 048b69adae6..beb23a91c88 100644
--- a/src/Processors/Transforms/AggregatingTransform.h
+++ b/src/Processors/Transforms/AggregatingTransform.h
@@ -7,6 +7,7 @@
 #include <Common/setThreadName.h>
 #include <Common/scope_guard_safe.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/CurrentThread.h>
 
 namespace CurrentMetrics
 {
diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp
index 6a8d08cb661..f65e72c2723 100644
--- a/src/Processors/Transforms/CreatingSetsTransform.cpp
+++ b/src/Processors/Transforms/CreatingSetsTransform.cpp
@@ -6,6 +6,7 @@
 #include <Interpreters/IJoin.h>
 #include <Storages/IStorage.h>
 
+#include <Common/logger_useful.h>
 #include <iomanip>
 
 
diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h
index 3193a07a0bd..607087fb39c 100644
--- a/src/Processors/Transforms/MemoryBoundMerging.h
+++ b/src/Processors/Transforms/MemoryBoundMerging.h
@@ -6,7 +6,6 @@
 #include <Processors/Transforms/AggregatingTransform.h>
 
 #include <Poco/Logger.h>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp
index abeef0f9a47..46abd5c891d 100644
--- a/src/Processors/Transforms/MergeJoinTransform.cpp
+++ b/src/Processors/Transforms/MergeJoinTransform.cpp
@@ -302,6 +302,16 @@ MergeJoinAlgorithm::MergeJoinAlgorithm(
     }
 }
 
+void MergeJoinAlgorithm::logElapsed(double seconds)
+{
+    LOG_TRACE(log,
+        "Finished pocessing in {} seconds"
+        ", left: {} blocks, {} rows; right: {} blocks, {} rows"
+        ", max blocks loaded to memory: {}",
+        seconds, stat.num_blocks[0], stat.num_rows[0], stat.num_blocks[1], stat.num_rows[1],
+        stat.max_blocks_loaded);
+}
+
 static void prepareChunk(Chunk & chunk)
 {
     if (!chunk)
diff --git a/src/Processors/Transforms/MergeJoinTransform.h b/src/Processors/Transforms/MergeJoinTransform.h
index 58ac652cb18..6bf4484df24 100644
--- a/src/Processors/Transforms/MergeJoinTransform.h
+++ b/src/Processors/Transforms/MergeJoinTransform.h
@@ -233,15 +233,7 @@ public:
     virtual void consume(Input & input, size_t source_num) override;
     virtual Status merge() override;
 
-    void logElapsed(double seconds)
-    {
-        LOG_TRACE(log,
-            "Finished pocessing in {} seconds"
-            ", left: {} blocks, {} rows; right: {} blocks, {} rows"
-            ", max blocks loaded to memory: {}",
-            seconds, stat.num_blocks[0], stat.num_rows[0], stat.num_blocks[1], stat.num_rows[1],
-            stat.max_blocks_loaded);
-    }
+    void logElapsed(double seconds);
 
 private:
     std::optional<Status> handleAnyJoinState();
diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp
index eebdd678a4b..ecf14a81c00 100644
--- a/src/Processors/Transforms/MergeSortingTransform.cpp
+++ b/src/Processors/Transforms/MergeSortingTransform.cpp
@@ -3,6 +3,7 @@
 #include <Processors/Merges/MergingSortedTransform.h>
 #include <Common/ProfileEvents.h>
 #include <Common/formatReadable.h>
+#include <Common/logger_useful.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/ReadBufferFromFile.h>
 #include <Compression/CompressedReadBuffer.h>
diff --git a/src/Processors/Transforms/MergeSortingTransform.h b/src/Processors/Transforms/MergeSortingTransform.h
index c64c93393ce..e8c180b6903 100644
--- a/src/Processors/Transforms/MergeSortingTransform.h
+++ b/src/Processors/Transforms/MergeSortingTransform.h
@@ -5,7 +5,6 @@
 #include <Common/filesystemHelpers.h>
 #include <Disks/TemporaryFileOnDisk.h>
 #include <Interpreters/TemporaryDataOnDisk.h>
-#include <Common/logger_useful.h>
 
 
 namespace DB
diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp
index 9d0be86ff83..e4955d06859 100644
--- a/src/Processors/Transforms/MergingAggregatedTransform.cpp
+++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp
@@ -1,6 +1,7 @@
 #include <Processors/Transforms/MergingAggregatedTransform.h>
 #include <Processors/Transforms/AggregatingTransform.h>
 #include <Processors/Transforms/AggregatingInOrderTransform.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h
index ebdfa954bf7..76460dcc4a8 100644
--- a/src/Server/HTTPHandlerFactory.h
+++ b/src/Server/HTTPHandlerFactory.h
@@ -6,7 +6,6 @@
 #include <Server/HTTPHandlerRequestFilter.h>
 #include <Server/HTTPRequestHandlerFactoryMain.h>
 #include <Common/StringUtils/StringUtils.h>
-#include <Common/logger_useful.h>
 
 #include <Poco/Util/AbstractConfiguration.h>
 
diff --git a/src/Server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h
index 6fc128e3883..f20af3df02c 100644
--- a/src/Server/PostgreSQLHandler.h
+++ b/src/Server/PostgreSQLHandler.h
@@ -4,7 +4,6 @@
 #include "config.h"
 #include <Core/PostgreSQLProtocol.h>
 #include <Poco/Net/TCPServerConnection.h>
-#include <Common/logger_useful.h>
 #include "IServer.h"
 
 #if USE_SSL
diff --git a/src/Server/ProxyV1Handler.cpp b/src/Server/ProxyV1Handler.cpp
index cd5fe29112a..56621940a23 100644
--- a/src/Server/ProxyV1Handler.cpp
+++ b/src/Server/ProxyV1Handler.cpp
@@ -1,6 +1,7 @@
 #include <Server/ProxyV1Handler.h>
 #include <Poco/Net/NetException.h>
 #include <Common/NetException.h>
+#include <Common/logger_useful.h>
 #include <Interpreters/Context.h>
 
 
diff --git a/src/Server/ProxyV1Handler.h b/src/Server/ProxyV1Handler.h
index e56f4cd3545..b50c2acbc55 100644
--- a/src/Server/ProxyV1Handler.h
+++ b/src/Server/ProxyV1Handler.h
@@ -3,7 +3,6 @@
 #include <Poco/Net/TCPServerConnection.h>
 #include <Server/IServer.h>
 #include <Server/TCPProtocolStackData.h>
-#include <Common/logger_useful.h>
 
 
 namespace DB
diff --git a/src/Storages/Cache/RemoteCacheController.h b/src/Storages/Cache/RemoteCacheController.h
index 18732acc273..fafe363bbd4 100644
--- a/src/Storages/Cache/RemoteCacheController.h
+++ b/src/Storages/Cache/RemoteCacheController.h
@@ -9,7 +9,6 @@
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteBufferFromFileBase.h>
 #include <Storages/Cache/IRemoteFileMetadata.h>
-#include <Common/logger_useful.h>
 #include <Poco/Logger.h>
 #include <Common/ErrorCodes.h>
 
diff --git a/src/Storages/FileLog/FileLogDirectoryWatcher.cpp b/src/Storages/FileLog/FileLogDirectoryWatcher.cpp
index 3c5342a1e83..9d488616e85 100644
--- a/src/Storages/FileLog/FileLogDirectoryWatcher.cpp
+++ b/src/Storages/FileLog/FileLogDirectoryWatcher.cpp
@@ -1,4 +1,5 @@
 #include <Storages/FileLog/FileLogDirectoryWatcher.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Storages/FileLog/FileLogDirectoryWatcher.h b/src/Storages/FileLog/FileLogDirectoryWatcher.h
index 4368f284c9d..9b7afcf8e12 100644
--- a/src/Storages/FileLog/FileLogDirectoryWatcher.h
+++ b/src/Storages/FileLog/FileLogDirectoryWatcher.h
@@ -2,7 +2,6 @@
 
 #include <Storages/FileLog/DirectoryWatcherBase.h>
 
-#include <Common/logger_useful.h>
 
 #include <memory>
 #include <mutex>
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index 585f5df6ceb..b123834e981 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -8,7 +8,6 @@
 #include <Storages/IStorage.h>
 #include <Storages/Cache/SchemaCache.h>
 #include <Poco/URI.h>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h
index 363042621c7..b4afb2421b1 100644
--- a/src/Storages/Hive/StorageHive.h
+++ b/src/Storages/Hive/StorageHive.h
@@ -7,7 +7,6 @@
 #include <Poco/URI.h>
 #include <ThriftHiveMetastore.h>
 
-#include <Common/logger_useful.h>
 #include <Interpreters/Context.h>
 #include <Storages/IStorage.h>
 #include <Storages/HDFS/HDFSCommon.h>
diff --git a/src/Storages/KVStorageUtils.h b/src/Storages/KVStorageUtils.h
index 0574539f4c7..c3bb2aefa62 100644
--- a/src/Storages/KVStorageUtils.h
+++ b/src/Storages/KVStorageUtils.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 #include <Storages/SelectQueryInfo.h>
 
 #include <Interpreters/PreparedSets.h>
diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.h b/src/Storages/MergeTree/AsyncBlockIDsCache.h
index a661d00f8a6..91d549a0501 100644
--- a/src/Storages/MergeTree/AsyncBlockIDsCache.h
+++ b/src/Storages/MergeTree/AsyncBlockIDsCache.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/logger_useful.h>
 #include <Core/BackgroundSchedulePool.h>
 
 #include <chrono>
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index c6efe9c9589..a80870448f1 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -5,6 +5,7 @@
 #include <Formats/NativeWriter.h>
 #include <Disks/SingleDiskVolume.h>
 #include <Disks/createVolume.h>
+#include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/HTTPCommon.h>
 #include <IO/S3Common.h>
 #include <Server/HTTP/HTMLForm.h>
diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h
index 20c15039a2d..57fd0d5cff0 100644
--- a/src/Storages/MergeTree/DataPartsExchange.h
+++ b/src/Storages/MergeTree/DataPartsExchange.h
@@ -1,13 +1,12 @@
 #pragma once
 
-#include "Storages/MergeTree/MergeTreePartInfo.h"
+#include <Storages/MergeTree/MergeTreePartInfo.h>
 #include <Interpreters/InterserverIOHandler.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/IStorage_fwd.h>
 #include <IO/HashingWriteBuffer.h>
 #include <IO/copyData.h>
 #include <IO/ConnectionTimeouts.h>
-#include <IO/ReadWriteBufferFromHTTP.h>
 #include <Common/Throttler.h>
 
 
@@ -21,6 +20,7 @@ namespace DB
 {
 
 class StorageReplicatedMergeTree;
+class PooledReadWriteBufferFromHTTP;
 
 namespace DataPartsExchange
 {
diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h
index 4d7212eb17b..2d61f9ee77b 100644
--- a/src/Storages/MergeTree/IDataPartStorage.h
+++ b/src/Storages/MergeTree/IDataPartStorage.h
@@ -11,7 +11,6 @@
 #include <memory>
 #include <optional>
 #include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/ZooKeeper/ZooKeeperWithFaultInjection.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp
index 37da6014d1b..21bead2864a 100644
--- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp
@@ -1,6 +1,7 @@
 #include <Storages/MergeTree/IMergedBlockOutputStream.h>
 #include <Storages/MergeTree/MergeTreeIOSettings.h>
 #include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
index f2efbb2ba8d..6f1e41d2791 100644
--- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
@@ -1,12 +1,13 @@
 #include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
+#include <Storages/MergeTree/BackgroundJobsAssignee.h>
 
 #include <algorithm>
 
 #include <Common/ThreadPool.h>
 #include <Common/setThreadName.h>
 #include <Common/Exception.h>
-#include <Storages/MergeTree/BackgroundJobsAssignee.h>
 #include <Common/noexcept_scope.h>
+#include <Common/logger_useful.h>
 
 
 namespace CurrentMetrics
diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
index 1ed4fc2dabc..9bfea32c7f9 100644
--- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
@@ -9,12 +9,11 @@
 #include <variant>
 #include <utility>
 
-
 #include <boost/circular_buffer.hpp>
 #include <boost/noncopyable.hpp>
+#include <Poco/Event.h>
 
 #include <Common/CurrentMetrics.h>
-#include <Common/logger_useful.h>
 #include <Common/ThreadPool_fwd.h>
 #include <Common/Stopwatch.h>
 #include <base/defines.h>
diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index a41b697f9e6..24980cdb82c 100644
--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -6,6 +6,7 @@
 #include <Storages/MergeTree/RequestResponse.h>
 #include <Columns/FilterDescription.h>
 #include <Common/ElapsedTimeProfileEventIncrement.h>
+#include <Common/logger_useful.h>
 #include <Common/typeid_cast.h>
 #include <DataTypes/DataTypeNothing.h>
 #include <DataTypes/DataTypeNullable.h>
diff --git a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h
index f7c3f294658..da332c64232 100644
--- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h
+++ b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h
@@ -1,5 +1,6 @@
 #pragma once
 #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp
index 6f83a82e4e6..e1da57744b3 100644
--- a/src/Storages/MergeTree/MergeTreePartsMover.cpp
+++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp
@@ -1,5 +1,6 @@
 #include <Storages/MergeTree/MergeTreePartsMover.h>
 #include <Storages/MergeTree/MergeTreeData.h>
+#include <Common/logger_useful.h>
 
 #include <set>
 #include <boost/algorithm/string/join.hpp>
diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
index 114039f7ff7..e184f84528b 100644
--- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp
@@ -8,6 +8,7 @@
 #include <Interpreters/threadPoolCallbackRunner.h>
 #include <Interpreters/Context.h>
 #include <Common/ElapsedTimeProfileEventIncrement.h>
+#include <Common/logger_useful.h>
 #include <IO/Operators.h>
 #include <base/getThreadId.h>
 
diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index 5cba3e34169..3f8a90ca175 100644
--- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -3,6 +3,7 @@
 #include <Columns/ColumnConst.h>
 #include <Columns/ColumnsCommon.h>
 #include <Common/TargetSpecific.h>
+#include <Common/logger_useful.h>
 #include <Core/UUID.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h
index 5ffd464cfe2..f180787c869 100644
--- a/src/Storages/MergeTree/MergeTreeRangeReader.h
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.h
@@ -1,6 +1,5 @@
 #pragma once
 #include <Core/Block.h>
-#include <Common/logger_useful.h>
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnsCommon.h>
 #include <Columns/FilterDescription.h>
diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp
index 54a040724fc..29be06b4e6a 100644
--- a/src/Storages/MergeTree/MergeTreeReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp
@@ -1,8 +1,9 @@
 #include <Storages/MergeTree/MergeTreeReadPool.h>
 #include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 #include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
-#include "Common/Stopwatch.h"
+#include <Common/Stopwatch.h>
 #include <Common/formatReadable.h>
+#include <Common/logger_useful.h>
 #include <base/range.h>
 
 
diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h
index fd25748050a..618f2855695 100644
--- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h
+++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h
@@ -1,5 +1,6 @@
 #pragma once
 #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
+#include <Common/logger_useful.h>
 
 
 namespace DB
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 4539e0b36c5..ef6013f5c65 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -9,6 +9,7 @@
 #include <Interpreters/Context.h>
 #include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/FilterStep.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
index fabf2acdad3..93e07a38bb1 100644
--- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp
@@ -4,15 +4,16 @@
 #include <Storages/MergeTree/MergeTreeDataWriter.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <Storages/MergeTree/MergeTreeDataPartState.h>
+#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
 #include <IO/MemoryReadWriteBuffer.h>
 #include <IO/ReadHelpers.h>
 #include <IO/copyData.h>
 #include <Interpreters/Context.h>
+#include <Common/logger_useful.h>
 #include <Poco/JSON/JSON.h>
 #include <Poco/JSON/Object.h>
 #include <Poco/JSON/Stringifier.h>
 #include <Poco/JSON/Parser.h>
-#include "Storages/MergeTree/DataPartStorageOnDiskFull.h"
 #include <sys/time.h>
 
 namespace DB
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index ced43ae25b0..715c218c2db 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -2,6 +2,7 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/MergeTreeTransaction.h>
 #include <Parsers/queryToString.h>
+#include <Common/logger_useful.h>
 
 
 namespace DB
diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h
index 24454b897af..af21022953c 100644
--- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h
+++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <vector>
-#include <Common/logger_useful.h>
 #include <base/types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Core/UUID.h>
diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp
index 61293888f10..35b2d5db3b5 100644
--- a/src/Storages/MergeTree/PartitionPruner.cpp
+++ b/src/Storages/MergeTree/PartitionPruner.cpp
@@ -1,4 +1,5 @@
 #include <Storages/MergeTree/PartitionPruner.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
index fb8f01437ef..a7bf1290274 100644
--- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
+++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 
 #include <Storages/MergeTree/IExecutableTask.h>
 #include <Storages/MergeTree/ReplicatedMergeTreeQueue.h>
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h
index a491a06d6a5..222b30b519b 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.h
@@ -3,7 +3,6 @@
 #include <thread>
 #include <Core/BackgroundSchedulePool.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/logger_useful.h>
 
 namespace DB
 {
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
index 35838625bbe..76b9ee4a575 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h
@@ -3,7 +3,6 @@
 #include <base/types.h>
 #include <Common/ZooKeeper/Types.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/logger_useful.h>
 #include <Common/randomSeed.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <thread>
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
index 170b2ca1f60..e7b0e224d9b 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h
@@ -9,7 +9,6 @@
 #include <boost/noncopyable.hpp>
 #include <Poco/Event.h>
 #include <base/types.h>
-#include <Common/logger_useful.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <Storages/CheckResults.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h
index b5314de9dcc..8f5721e440d 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <Poco/Event.h>
-#include <Common/logger_useful.h>
 #include <Core/BackgroundSchedulePool.h>
 #include <base/types.h>
 #include <thread>
diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h
index 3412e6e422f..02cbedb4dd5 100644
--- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h
+++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h
@@ -5,7 +5,6 @@
 
 #include <Core/BackgroundSchedulePool.h>
 #include <Core/Names.h>
-#include <Common/logger_useful.h>
 #include <Storages/IStorage.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Databases/PostgreSQL/fetchPostgreSQLTableStructure.h>
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 3a7fae44708..417fb6447bf 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -8,7 +8,6 @@
 #include <Common/SimpleIncrement.h>
 #include <Client/ConnectionPool.h>
 #include <Client/ConnectionPoolWithFailover.h>
-#include <Common/logger_useful.h>
 #include <Common/ActionBlocker.h>
 #include <Interpreters/Cluster.h>
 
diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h
index 2393920fa3c..37455385675 100644
--- a/src/Storages/StorageExecutable.h
+++ b/src/Storages/StorageExecutable.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <Common/logger_useful.h>
 #include <Storages/IStorage.h>
 #include <Processors/Sources/ShellCommandSource.h>
 #include <Storages/ExecutableSettings.h>
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index 03b3aacb67f..8d622fdf3a2 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -3,7 +3,6 @@
 #include <Storages/IStorage.h>
 #include <Storages/Cache/SchemaCache.h>
 
-#include <Common/logger_useful.h>
 
 #include <atomic>
 #include <shared_mutex>
diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h
index f71ff3cc65a..552e6b35fe8 100644
--- a/src/Storages/StorageKeeperMap.h
+++ b/src/Storages/StorageKeeperMap.h
@@ -7,6 +7,7 @@
 #include <Storages/IStorage.h>
 #include <Storages/StorageInMemoryMetadata.h>
 #include <Common/PODArray_fwd.h>
+#include <Common/logger_useful.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 
 #include <span>
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index f4d915e9c55..982d4e4681e 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -14,7 +14,6 @@
 #include <Processors/ISource.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Poco/URI.h>
-#include <Common/logger_useful.h>
 #include <IO/S3/getObjectInfo.h>
 #include <IO/CompressionMethod.h>
 #include <Interpreters/Context.h>
diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp
index 7c5ba497ec9..e11b71d8777 100644
--- a/src/Storages/StorageSet.cpp
+++ b/src/Storages/StorageSet.cpp
@@ -9,6 +9,7 @@
 #include <Disks/IDisk.h>
 #include <Common/formatReadable.h>
 #include <Common/StringUtils/StringUtils.h>
+#include <Common/logger_useful.h>
 #include <Interpreters/Set.h>
 #include <Processors/Sinks/SinkToStorage.h>
 #include <Parsers/ASTCreateQuery.h>
diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index 240d452fe29..8832d9d4983 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -11,6 +11,7 @@
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <Common/typeid_cast.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/ThreadPool.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
 
 

From 8b0adf58e52d78c2fe0a734e383bd0b4e4d22c34 Mon Sep 17 00:00:00 2001
From: Thom O'Connor <thom@clickhouse.com>
Date: Mon, 10 Apr 2023 10:17:43 -0600
Subject: [PATCH 255/277] Update formats.md

Including an Avro example with DESCRIBE url in S3
---
 docs/en/interfaces/formats.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index de8357345db..157a852332f 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -1877,6 +1877,13 @@ Column names must:
 
 Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings-formats.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings-formats.md/#output_format_avro_sync_interval) respectively.
 
+### Example Data {#example-data-avro}
+
+Using the ClickHouse [DESCRIBE](/docs/en/sql-reference/statements/describe-table) function, you can quickly view the inferred format of an Avro file like the following example. This example includes the URL of a publicly accessible Avro file in the ClickHouse S3 public bucket:
+
+``` DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro');
+```
+
 ## AvroConfluent {#data-format-avro-confluent}
 
 AvroConfluent supports decoding single-object Avro messages commonly used with [Kafka](https://kafka.apache.org/) and [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html).

From 7cd4009c3d8811cc5f13244edbbf430da14da854 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Mon, 10 Apr 2023 20:23:56 +0200
Subject: [PATCH 256/277] Fix flakiness of test_store_cleanup in case of image
 rebuild

The log level will be substituted from "test" to "trace" in case of the
tag is not "latest", the assumption behind this I guess is that it
should not try to use "test" log level for older versions.

But, it could have per-PR image in case of changes in the Dockerfile, so
it is better to check for self.with_installed_binary, since actually any
parameters except this will use new clickhouse binary anyway.

CI: https://s3.amazonaws.com/clickhouse-test-reports/48596/a1272e8536265929255fdf5020836f057859e425/integration_tests__tsan__[1/6].html
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 tests/integration/helpers/cluster.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index b2aedfce3ca..a5788ce36eb 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -4161,9 +4161,10 @@ class ClickHouseInstance:
         logging.debug("Copy common configuration from helpers")
         # The file is named with 0_ prefix to be processed before other configuration overloads.
         if self.copy_common_configs:
-            need_fix_log_level = self.tag != "latest"
             write_embedded_config(
-                "0_common_instance_config.xml", self.config_d_dir, need_fix_log_level
+                "0_common_instance_config.xml",
+                self.config_d_dir,
+                self.with_installed_binary,
             )
 
         write_embedded_config("0_common_instance_users.xml", users_d_dir)

From a6c99e8e843c324153d9ffeba8a67fb58a14ead9 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 10 Apr 2023 18:59:50 +0000
Subject: [PATCH 257/277] add IPv4 to supertype calculation

---
 src/DataTypes/getLeastSupertype.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp
index 3c33289c304..783326c25e9 100644
--- a/src/DataTypes/getLeastSupertype.cpp
+++ b/src/DataTypes/getLeastSupertype.cpp
@@ -3,6 +3,7 @@
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <Common/typeid_cast.h>
+#include "Core/Types.h"
 
 #include <DataTypes/getLeastSupertype.h>
 
@@ -88,7 +89,7 @@ DataTypePtr getNumericType(const TypeIndexSet & types)
             maximize(max_bits_of_unsigned_integer, 8);
         else if (type == TypeIndex::UInt16)
             maximize(max_bits_of_unsigned_integer, 16);
-        else if (type == TypeIndex::UInt32)
+        else if (type == TypeIndex::UInt32 || type == TypeIndex::IPv4)
             maximize(max_bits_of_unsigned_integer, 32);
         else if (type == TypeIndex::UInt64)
             maximize(max_bits_of_unsigned_integer, 64);

From e6d96c27f249936ae4caa92650788f7ff0febab4 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 10 Apr 2023 19:16:02 +0000
Subject: [PATCH 258/277] unnecessary include

---
 src/DataTypes/getLeastSupertype.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp
index 783326c25e9..9d42d82ce91 100644
--- a/src/DataTypes/getLeastSupertype.cpp
+++ b/src/DataTypes/getLeastSupertype.cpp
@@ -3,7 +3,6 @@
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <Common/typeid_cast.h>
-#include "Core/Types.h"
 
 #include <DataTypes/getLeastSupertype.h>
 

From 23a087945209ecb92f74f1fdc4c7c952f13841e9 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Mon, 10 Apr 2023 21:17:08 +0200
Subject: [PATCH 259/277] Remove strange code

---
 src/Storages/StorageFile.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index 03b3aacb67f..27cf40a0bf1 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -14,8 +14,6 @@ namespace DB
 
 class StorageFile final : public IStorage
 {
-friend class partitionedstoragefilesink;
-
 public:
     struct CommonArguments : public WithContext
     {

From 32174e9a6143e404643273d63e959357f998bd57 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 10 Apr 2023 22:47:59 +0000
Subject: [PATCH 260/277] test is added

---
 .../0_stateless/02713_ip4_uint_compare.reference       |  1 +
 tests/queries/0_stateless/02713_ip4_uint_compare.sql   | 10 ++++++++++
 2 files changed, 11 insertions(+)
 create mode 100644 tests/queries/0_stateless/02713_ip4_uint_compare.reference
 create mode 100644 tests/queries/0_stateless/02713_ip4_uint_compare.sql

diff --git a/tests/queries/0_stateless/02713_ip4_uint_compare.reference b/tests/queries/0_stateless/02713_ip4_uint_compare.reference
new file mode 100644
index 00000000000..fdc2de3fbcb
--- /dev/null
+++ b/tests/queries/0_stateless/02713_ip4_uint_compare.reference
@@ -0,0 +1 @@
+1	0	1	1	1	1	0
diff --git a/tests/queries/0_stateless/02713_ip4_uint_compare.sql b/tests/queries/0_stateless/02713_ip4_uint_compare.sql
new file mode 100644
index 00000000000..141fa1371b6
--- /dev/null
+++ b/tests/queries/0_stateless/02713_ip4_uint_compare.sql
@@ -0,0 +1,10 @@
+WITH toIPv4('127.0.0.10') AS ip
+SELECT
+    ip = 2130706442::UInt32,
+    ip = 0::UInt32,
+    ip < 2130706443::UInt32,
+    ip > 2130706441::UInt32,
+    ip <= 2130706442::UInt32,
+    ip >= 2130706442::UInt32,
+    ip != 2130706442::UInt32;
+    
\ No newline at end of file

From 7581982e0a6af04ad953bb68ae151fefdfc1593f Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 10 Apr 2023 22:50:09 +0000
Subject: [PATCH 261/277] fix test

---
 tests/queries/0_stateless/02713_ip4_uint_compare.sql | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/queries/0_stateless/02713_ip4_uint_compare.sql b/tests/queries/0_stateless/02713_ip4_uint_compare.sql
index 141fa1371b6..ec8d6584329 100644
--- a/tests/queries/0_stateless/02713_ip4_uint_compare.sql
+++ b/tests/queries/0_stateless/02713_ip4_uint_compare.sql
@@ -7,4 +7,3 @@ SELECT
     ip <= 2130706442::UInt32,
     ip >= 2130706442::UInt32,
     ip != 2130706442::UInt32;
-    
\ No newline at end of file

From 9de55d9ccc6d95daa9df63dccd8c94650634e463 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 11 Apr 2023 06:07:58 +0000
Subject: [PATCH 262/277] Rename tests to fix style check

---
 ...e_diff_aliases.reference => 02710_date_diff_aliases.reference} | 0
 .../{25342_date_diff_aliases.sql => 02710_date_diff_aliases.sql}  | 0
 ...{25343_trim_aliases.reference => 02711_trim_aliases.reference} | 0
 .../{25343_trim_aliases.sql => 02711_trim_aliases.sql}            | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/{25342_date_diff_aliases.reference => 02710_date_diff_aliases.reference} (100%)
 rename tests/queries/0_stateless/{25342_date_diff_aliases.sql => 02710_date_diff_aliases.sql} (100%)
 rename tests/queries/0_stateless/{25343_trim_aliases.reference => 02711_trim_aliases.reference} (100%)
 rename tests/queries/0_stateless/{25343_trim_aliases.sql => 02711_trim_aliases.sql} (100%)

diff --git a/tests/queries/0_stateless/25342_date_diff_aliases.reference b/tests/queries/0_stateless/02710_date_diff_aliases.reference
similarity index 100%
rename from tests/queries/0_stateless/25342_date_diff_aliases.reference
rename to tests/queries/0_stateless/02710_date_diff_aliases.reference
diff --git a/tests/queries/0_stateless/25342_date_diff_aliases.sql b/tests/queries/0_stateless/02710_date_diff_aliases.sql
similarity index 100%
rename from tests/queries/0_stateless/25342_date_diff_aliases.sql
rename to tests/queries/0_stateless/02710_date_diff_aliases.sql
diff --git a/tests/queries/0_stateless/25343_trim_aliases.reference b/tests/queries/0_stateless/02711_trim_aliases.reference
similarity index 100%
rename from tests/queries/0_stateless/25343_trim_aliases.reference
rename to tests/queries/0_stateless/02711_trim_aliases.reference
diff --git a/tests/queries/0_stateless/25343_trim_aliases.sql b/tests/queries/0_stateless/02711_trim_aliases.sql
similarity index 100%
rename from tests/queries/0_stateless/25343_trim_aliases.sql
rename to tests/queries/0_stateless/02711_trim_aliases.sql

From c3f976c58a4eec4b2cc280fe6ad0f62f2b54db07 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 11 Apr 2023 06:19:10 +0000
Subject: [PATCH 263/277] Move space check out of loop

---
 src/Functions/parseDateTime.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index 12e2e113af4..05d18a736e3 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -1055,8 +1055,11 @@ namespace
 
             static Pos mysqlMicrosecond(Pos cur, Pos end, const String & fragment, DateTime & /*date*/)
             {
+                checkSpace(cur, end, 6, "mysqlMicrosecond requires size >= 6", fragment);
+
                 for (size_t i = 0; i < 6; ++i)
-                    cur = assertNumber<NeedCheckSpace::Yes>(cur, end, fragment);
+                    cur = assertNumber<NeedCheckSpace::No>(cur, end, fragment);
+
                 return cur;
             }
 

From 4b61685c6469a674a8f7f492d9a94fa0b98a8b64 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 11 Apr 2023 07:29:38 +0000
Subject: [PATCH 264/277] Minor refactoring of formatDateTime()

Don't explicitly specify literal length when the called functions can
compute it.
---
 src/Functions/formatDateTime.cpp | 44 ++++++++++++++++----------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index d6275a54c75..168404eaf01 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -978,15 +978,15 @@ public:
             instructions.push_back(std::move(instruction));
         };
 
-        auto add_extra_shift_or_literal_instruction = [&](size_t amount, std::string_view literal)
+        auto add_extra_shift_or_literal_instruction = [&](std::string_view literal)
         {
             if (mysql_with_only_fixed_length_formatters)
-                add_extra_shift(amount);
+                add_extra_shift(literal.size());
             else
                 add_literal_instruction(literal);
         };
 
-        auto add_time_instruction = [&]([[maybe_unused]] typename Instruction<T>::FuncMysql && func, [[maybe_unused]] size_t amount, [[maybe_unused]] std::string_view literal)
+        auto add_time_instruction = [&]([[maybe_unused]] typename Instruction<T>::FuncMysql && func, [[maybe_unused]] std::string_view literal)
         {
             /// DateTime/DateTime64 --> insert instruction
             /// Other types cannot provide the requested data --> write out template
@@ -997,7 +997,7 @@ public:
                 instructions.push_back(std::move(instruction));
             }
             else
-                add_extra_shift_or_literal_instruction(amount, literal);
+                add_extra_shift_or_literal_instruction(literal);
         };
 
         Pos pos = format.data();
@@ -1012,7 +1012,7 @@ public:
                 if (pos < percent_pos)
                 {
                     /// Handle characters before next %
-                    add_extra_shift_or_literal_instruction(percent_pos - pos, std::string_view(pos, percent_pos - pos));
+                    add_extra_shift_or_literal_instruction(std::string_view(pos, percent_pos - pos));
                     out_template += String(pos, percent_pos - pos);
                 }
 
@@ -1107,7 +1107,7 @@ public:
                         else
                         {
                             static constexpr std::string_view val = "00";
-                            add_time_instruction(&Instruction<T>::mysqlMinute, 2, val);
+                            add_time_instruction(&Instruction<T>::mysqlMinute, val);
                             out_template += val;
                         }
                         break;
@@ -1260,7 +1260,7 @@ public:
                     case 'p':
                     {
                         static constexpr std::string_view val = "AM";
-                        add_time_instruction(&Instruction<T>::mysqlAMPM, 2, val);
+                        add_time_instruction(&Instruction<T>::mysqlAMPM, val);
                         out_template += val;
                         break;
                     }
@@ -1269,7 +1269,7 @@ public:
                     case 'r':
                     {
                         static constexpr std::string_view val = "12:00 AM";
-                        add_time_instruction(&Instruction<T>::mysqlHHMM12, 8, val);
+                        add_time_instruction(&Instruction<T>::mysqlHHMM12, val);
                         out_template += val;
                         break;
                     }
@@ -1278,7 +1278,7 @@ public:
                     case 'R':
                     {
                         static constexpr std::string_view val = "00:00";
-                        add_time_instruction(&Instruction<T>::mysqlHHMM24, 5, val);
+                        add_time_instruction(&Instruction<T>::mysqlHHMM24, val);
                         out_template += val;
                         break;
                     }
@@ -1287,7 +1287,7 @@ public:
                     case 's':
                     {
                         static constexpr std::string_view val = "00";
-                        add_time_instruction(&Instruction<T>::mysqlSecond, 2, val);
+                        add_time_instruction(&Instruction<T>::mysqlSecond, val);
                         out_template += val;
                         break;
                     }
@@ -1296,7 +1296,7 @@ public:
                     case 'S':
                     {
                         static constexpr std::string_view val = "00";
-                        add_time_instruction(&Instruction<T>::mysqlSecond, 2, val);
+                        add_time_instruction(&Instruction<T>::mysqlSecond, val);
                         out_template += val;
                         break;
                     }
@@ -1305,7 +1305,7 @@ public:
                     case 'T':
                     {
                         static constexpr std::string_view val = "00:00:00";
-                        add_time_instruction(&Instruction<T>::mysqlISO8601Time, 8, val);
+                        add_time_instruction(&Instruction<T>::mysqlISO8601Time, val);
                         out_template += val;
                         break;
                     }
@@ -1314,7 +1314,7 @@ public:
                     case 'h':
                     {
                         static constexpr std::string_view val = "12";
-                        add_time_instruction(&Instruction<T>::mysqlHour12, 2, val);
+                        add_time_instruction(&Instruction<T>::mysqlHour12, val);
                         out_template += val;
                         break;
                     }
@@ -1323,7 +1323,7 @@ public:
                     case 'H':
                     {
                         static constexpr std::string_view val = "00";
-                        add_time_instruction(&Instruction<T>::mysqlHour24, 2, val);
+                        add_time_instruction(&Instruction<T>::mysqlHour24, val);
                         out_template += val;
                         break;
                     }
@@ -1332,7 +1332,7 @@ public:
                     case 'i':
                     {
                         static constexpr std::string_view val = "00";
-                        add_time_instruction(&Instruction<T>::mysqlMinute, 2, val);
+                        add_time_instruction(&Instruction<T>::mysqlMinute, val);
                         out_template += val;
                         break;
                     }
@@ -1341,7 +1341,7 @@ public:
                     case 'I':
                     {
                         static constexpr std::string_view val = "12";
-                        add_time_instruction(&Instruction<T>::mysqlHour12, 2, val);
+                        add_time_instruction(&Instruction<T>::mysqlHour12, val);
                         out_template += val;
                         break;
                     }
@@ -1350,7 +1350,7 @@ public:
                     case 'k':
                     {
                         static constexpr std::string_view val = "00";
-                        add_time_instruction(&Instruction<T>::mysqlHour24, 2, val);
+                        add_time_instruction(&Instruction<T>::mysqlHour24, val);
                         out_template += val;
                         break;
                     }
@@ -1359,7 +1359,7 @@ public:
                     case 'l':
                     {
                         static constexpr std::string_view val = "12";
-                        add_time_instruction(&Instruction<T>::mysqlHour12, 2, val);
+                        add_time_instruction(&Instruction<T>::mysqlHour12, val);
                         out_template += val;
                         break;
                     }
@@ -1367,7 +1367,7 @@ public:
                     case 't':
                     {
                         static constexpr std::string_view val = "\t";
-                        add_extra_shift_or_literal_instruction(1, val);
+                        add_extra_shift_or_literal_instruction(val);
                         out_template += val;
                         break;
                     }
@@ -1375,7 +1375,7 @@ public:
                     case 'n':
                     {
                         static constexpr std::string_view val = "\n";
-                        add_extra_shift_or_literal_instruction(1, val);
+                        add_extra_shift_or_literal_instruction(val);
                         out_template += val;
                         break;
                     }
@@ -1384,7 +1384,7 @@ public:
                     case '%':
                     {
                         static constexpr std::string_view val = "%";
-                        add_extra_shift_or_literal_instruction(1, val);
+                        add_extra_shift_or_literal_instruction(val);
                         out_template += val;
                         break;
                     }
@@ -1411,7 +1411,7 @@ public:
             else
             {
                 /// Handle characters after last %
-                add_extra_shift_or_literal_instruction(end - pos, std::string_view(pos, end - pos));
+                add_extra_shift_or_literal_instruction(std::string_view(pos, end - pos));
                 out_template += String(pos, end - pos);
                 break;
             }

From dd2364361c08bccab5f872e63a1e267827a7fe44 Mon Sep 17 00:00:00 2001
From: Aleksei Filatov <68555560+aalexfvk@users.noreply.github.com>
Date: Tue, 11 Apr 2023 10:45:30 +0300
Subject: [PATCH 265/277] Update src/Coordination/tests/gtest_coordination.cpp

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Coordination/tests/gtest_coordination.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index 68e68ca1fa7..b1bea8ddf24 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -1672,7 +1672,7 @@ TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted)
     // commit all entries
     state_machine->commit(1, auth_entry->get_buf());
     state_machine->commit(2, create_entry->get_buf());
-    state_machine->commit(2, set_acl_entry->get_buf());
+    state_machine->commit(3, set_acl_entry->get_buf());
 
     const auto & uncommitted_state = state_machine->getStorage().uncommitted_state;
     auto node = uncommitted_state.getNode(node_path);

From 363b97fab8a789742153d63f04ec4738a0c42ec3 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 11 Apr 2023 11:45:29 +0200
Subject: [PATCH 266/277] refine some messages of exception in regexp tree

---
 src/Dictionaries/RegExpTreeDictionary.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp
index c072ba78d46..4db88631a2c 100644
--- a/src/Dictionaries/RegExpTreeDictionary.cpp
+++ b/src/Dictionaries/RegExpTreeDictionary.cpp
@@ -272,7 +272,7 @@ void RegExpTreeDictionary::initGraph()
         if (value->parent_id == 0) // this is root node.
             initTopologyOrder(id, visited, topology_id);
     if (topology_order.size() != regex_nodes.size())
-        throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Invalid Regex tree");
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The topology order cannot match the number of regex nodes. This is likely a internal bug.");
 }
 
 void RegExpTreeDictionary::initTopologyOrder(UInt64 node_idx, std::set<UInt64> & visited, UInt64 & topology_id)
@@ -280,7 +280,7 @@ void RegExpTreeDictionary::initTopologyOrder(UInt64 node_idx, std::set<UInt64> &
     visited.insert(node_idx);
     for (UInt64 child_idx : regex_nodes[node_idx]->children)
         if (visited.contains(child_idx))
-            throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "Invalid Regex tree. The input tree is cyclical");
+            throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "The regexp tree is cyclical. Please check your config.");
         else
             initTopologyOrder(child_idx, visited, topology_id);
     topology_order[node_idx] = topology_id++;

From 57a1919594b5affdc27e0093fc93e7c44585438c Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 11 Apr 2023 12:40:04 +0200
Subject: [PATCH 267/277] Partially revert e0252db8d and fix pr-bugfix labeling

---
 tests/ci/run_check.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index 8dc136e01f4..de56137b7dd 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -35,8 +35,9 @@ LABELS = {
     "pr-backward-incompatible": ["Backward Incompatible Change"],
     "pr-bugfix": [
         "Bug Fix",
-        "Bug Fix (user-visible misbehaviour in official stable release)",
-        "Bug Fix (user-visible misbehavior in official stable release)",
+        "Bug Fix (user-visible misbehavior in an official stable release)",
+        "Bug Fix (user-visible misbehaviour in official stable or prestable release)",
+        "Bug Fix (user-visible misbehavior in official stable or prestable release)",
     ],
     "pr-build": [
         "Build/Testing/Packaging Improvement",

From 4605d71947077067c0fed62ae3e0cb2179c1ef52 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 11 Apr 2023 11:17:02 +0000
Subject: [PATCH 268/277] Fix build src/Interpreters/InterpreterInsertQuery.h

---
 src/Interpreters/InterpreterInsertQuery.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h
index f60d6567d74..5bff472270d 100644
--- a/src/Interpreters/InterpreterInsertQuery.h
+++ b/src/Interpreters/InterpreterInsertQuery.h
@@ -4,6 +4,7 @@
 #include <Interpreters/IInterpreter.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Storages/StorageInMemoryMetadata.h>
+#include <Common/ThreadStatus.h>
 
 namespace DB
 {

From 30659c5b48cf10adf936ade1f1a23c9187febacf Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 11 Apr 2023 11:46:53 +0000
Subject: [PATCH 269/277] Fix build ThreadGroupPtr

---
 src/Processors/Transforms/buildPushingToViewsChain.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h
index a2e7e39ff23..c7effa77d5b 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.h
+++ b/src/Processors/Transforms/buildPushingToViewsChain.h
@@ -7,6 +7,7 @@
 #include <Storages/IStorage.h>
 #include <Processors/Sinks/SinkToStorage.h>
 #include <Common/Stopwatch.h>
+#include <Common/ThreadStatus.h>
 
 namespace Poco
 {

From 24453759719b15c8681e7f838a44e7e81b9d58ee Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 11 Apr 2023 14:11:24 +0200
Subject: [PATCH 270/277] Fix flaky test test_drop_replica_and_achieve_quorum

---
 tests/integration/test_quorum_inserts/test.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/tests/integration/test_quorum_inserts/test.py b/tests/integration/test_quorum_inserts/test.py
index 779d1a69dcc..4dbd530dd17 100644
--- a/tests/integration/test_quorum_inserts/test.py
+++ b/tests/integration/test_quorum_inserts/test.py
@@ -144,25 +144,6 @@ def test_drop_replica_and_achieve_quorum(started_cluster):
         )
     )
 
-    print("Now we can insert some other data.")
-    zero.query(
-        "INSERT INTO test_drop_replica_and_achieve_quorum(a,d) VALUES (2, '2012-02-02')"
-    )
-
-    assert TSV("1\t2011-01-01\n2\t2012-02-02\n") == TSV(
-        zero.query("SELECT * FROM test_drop_replica_and_achieve_quorum ORDER BY a")
-    )
-    assert TSV("1\t2011-01-01\n2\t2012-02-02\n") == TSV(
-        first.query("SELECT * FROM test_drop_replica_and_achieve_quorum ORDER BY a")
-    )
-    assert TSV("1\t2011-01-01\n2\t2012-02-02\n") == TSV(
-        second.query("SELECT * FROM test_drop_replica_and_achieve_quorum ORDER BY a")
-    )
-
-    zero.query(
-        "DROP TABLE IF EXISTS test_drop_replica_and_achieve_quorum ON CLUSTER cluster"
-    )
-
 
 @pytest.mark.parametrize(("add_new_data"), [False, True])
 def test_insert_quorum_with_drop_partition(started_cluster, add_new_data):

From c29aa0cd49e635b4b2f929c9519a9f4d055af57f Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 11 Apr 2023 15:34:33 +0200
Subject: [PATCH 271/277] Fail the build on a wrong changelog category

---
 tests/ci/run_check.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index de56137b7dd..b7dc78af4d7 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -31,6 +31,8 @@ SUBMODULE_CHANGED_LABEL = "submodule changed"
 
 # They are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
 # updated accordingly
+# The following lists are append only, try to avoid editing them
+# They atill could be cleaned out after the decent time though.
 LABELS = {
     "pr-backward-incompatible": ["Backward Incompatible Change"],
     "pr-bugfix": [
@@ -190,6 +192,9 @@ def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]:
     ):
         return "", category
 
+    if category not in CATEGORY_TO_LABEL:
+        return f"Category '{category}' is not valid", ""
+
     if not entry:
         return f"Changelog entry required for category '{category}'", category
 

From c6907f2a327894480f93487f3a5e13c3d0613f71 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Tue, 11 Apr 2023 09:53:40 -0400
Subject: [PATCH 272/277] add Observability to spell list

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 8f72f07d7ec..4eb0e9dc42d 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -108,6 +108,7 @@ NULLIF
 NVME
 NYPD
 NuRaft
+Observability
 OLAP
 OLTP
 ObjectId

From cdceac662412e968096de800c35c47f650d2996a Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 11 Apr 2023 16:10:52 +0200
Subject: [PATCH 273/277] Reduce number of return statements

---
 tests/ci/run_check.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index b7dc78af4d7..4f38007cb03 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -132,6 +132,7 @@ def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]:
 
     category = ""
     entry = ""
+    description_error = ""
 
     i = 0
     while i < len(lines):
@@ -183,22 +184,19 @@ def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]:
             i += 1
 
     if not category:
-        return "Changelog category is empty", category
-
+        description_error = "Changelog category is empty"
     # Filter out the PR categories that are not for changelog.
-    if re.match(
+    elif re.match(
         r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
         category,
     ):
-        return "", category
+        pass  # to not check the rest of the conditions
+    elif category not in CATEGORY_TO_LABEL:
+        description_error, category = f"Category '{category}' is not valid", ""
+    elif not entry:
+        description_error = f"Changelog entry required for category '{category}'"
 
-    if category not in CATEGORY_TO_LABEL:
-        return f"Category '{category}' is not valid", ""
-
-    if not entry:
-        return f"Changelog entry required for category '{category}'", category
-
-    return "", category
+    return description_error, category
 
 
 if __name__ == "__main__":

From bf28be8837f7cbeb9883b943cd04aa74678620c1 Mon Sep 17 00:00:00 2001
From: Han Fei <hanfei19910905@gmail.com>
Date: Tue, 11 Apr 2023 17:07:44 +0200
Subject: [PATCH 274/277] fix 02504_regexp_dictionary_table_source

---
 src/Dictionaries/RegExpTreeDictionary.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp
index 4db88631a2c..c56c611d02f 100644
--- a/src/Dictionaries/RegExpTreeDictionary.cpp
+++ b/src/Dictionaries/RegExpTreeDictionary.cpp
@@ -271,14 +271,16 @@ void RegExpTreeDictionary::initGraph()
     for (const auto & [id, value]: regex_nodes)
         if (value->parent_id == 0) // this is root node.
             initTopologyOrder(id, visited, topology_id);
+    /// If there is a cycle and all nodes have a parent, this condition will be met.
     if (topology_order.size() != regex_nodes.size())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The topology order cannot match the number of regex nodes. This is likely a internal bug.");
+        throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "The regexp tree is cyclical. Please check your config.");
 }
 
 void RegExpTreeDictionary::initTopologyOrder(UInt64 node_idx, std::set<UInt64> & visited, UInt64 & topology_id)
 {
     visited.insert(node_idx);
     for (UInt64 child_idx : regex_nodes[node_idx]->children)
+        /// there is a cycle when dfs the graph.
         if (visited.contains(child_idx))
             throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "The regexp tree is cyclical. Please check your config.");
         else

From 38e89b892a3b95e337441e83b872da3cb919ebe4 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 11 Apr 2023 20:31:35 +0000
Subject: [PATCH 275/277] Fix roundAge()

---
 src/Functions/roundAge.cpp                   | 3 +--
 tests/queries/0_stateless/00968_roundAge.sql | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp
index d2503bb6938..5c6f9b65279 100644
--- a/src/Functions/roundAge.cpp
+++ b/src/Functions/roundAge.cpp
@@ -15,8 +15,7 @@ struct RoundAgeImpl
 
     static inline ResultType apply(A x)
     {
-        return x < 1 ? 0
-            : (x < 18 ? 17
+        return (x < 18 ? 0
             : (x < 25 ? 18
             : (x < 35 ? 25
             : (x < 45 ? 35
diff --git a/tests/queries/0_stateless/00968_roundAge.sql b/tests/queries/0_stateless/00968_roundAge.sql
index c8e5a5579f2..f092dbf6560 100644
--- a/tests/queries/0_stateless/00968_roundAge.sql
+++ b/tests/queries/0_stateless/00968_roundAge.sql
@@ -1,7 +1,8 @@
 SELECT roundAge(0);
+SELECT roundAge(12);
 SELECT roundAge(18);
 SELECT roundAge(25);
 SELECT roundAge(35);
 SELECT roundAge(45);
 SELECT roundAge(55);
-SELECT roundAge(56);
\ No newline at end of file
+SELECT roundAge(56);

From 58d98fa21f2e8de2ada4414c42f65d8314c2d48b Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 12 Apr 2023 07:57:00 +0000
Subject: [PATCH 276/277] Revert "Fix roundAge()"

This reverts commit 38e89b892a3b95e337441e83b872da3cb919ebe4.
---
 src/Functions/roundAge.cpp                   | 3 ++-
 tests/queries/0_stateless/00968_roundAge.sql | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp
index 5c6f9b65279..d2503bb6938 100644
--- a/src/Functions/roundAge.cpp
+++ b/src/Functions/roundAge.cpp
@@ -15,7 +15,8 @@ struct RoundAgeImpl
 
     static inline ResultType apply(A x)
     {
-        return (x < 18 ? 0
+        return x < 1 ? 0
+            : (x < 18 ? 17
             : (x < 25 ? 18
             : (x < 35 ? 25
             : (x < 45 ? 35
diff --git a/tests/queries/0_stateless/00968_roundAge.sql b/tests/queries/0_stateless/00968_roundAge.sql
index f092dbf6560..c8e5a5579f2 100644
--- a/tests/queries/0_stateless/00968_roundAge.sql
+++ b/tests/queries/0_stateless/00968_roundAge.sql
@@ -1,8 +1,7 @@
 SELECT roundAge(0);
-SELECT roundAge(12);
 SELECT roundAge(18);
 SELECT roundAge(25);
 SELECT roundAge(35);
 SELECT roundAge(45);
 SELECT roundAge(55);
-SELECT roundAge(56);
+SELECT roundAge(56);
\ No newline at end of file

From 9c653197a4630b8a32b2e816ec1deb507c81a370 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 12 Apr 2023 08:05:46 +0000
Subject: [PATCH 277/277] Update roundAge() docs

---
 docs/en/sql-reference/functions/rounding-functions.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index 01ee720cfd3..40ff958e0ae 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -194,7 +194,14 @@ Accepts a number. If the number is less than one, it returns 0. Otherwise, it ro
 
 ## roundAge(num)
 
-Accepts a number. If the number is less than 18, it returns 0. Otherwise, it rounds the number down to a number from the set: 18, 25, 35, 45, 55. 
+Accepts a number. If the number is
+-   smaller than 1, it returns 0,
+-   between 1 and 17, it returns 17,
+-   between 18 and 24, it returns 18,
+-   between 25 and 34, it returns 25,
+-   between 35 and 44, it returns 35,
+-   between 45 and 54, it returns 45,
+-   larger than 55, it returns 55.
 
 ## roundDown(num, arr)