Merge branch 'master' into tighten-limits-functional-tests

2024-09-19 16:20:50 +00:00 · 2024-08-15 03:27:27 +02:00 · 2024-08-15 03:27:27 +02:00 · 96f74cdd22
commit 96f74cdd22
parent 10e58a5cbb cef5d80090
123 changed files with 4114 additions and 1904 deletions
--- a/.github/workflows/create_release.yml
+++ b/.github/workflows/create_release.yml
@ -129,9 +129,9 @@ jobs:
        if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
        shell: bash
        run: |
-          python3 ./tests/ci/create_release.py --set-progress-completed
          git reset --hard HEAD
          git checkout "$GITHUB_REF_NAME"
+          python3 ./tests/ci/create_release.py --set-progress-completed
      - name: Create GH Release
        if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
        shell: bash
--- a/base/base/cgroupsv2.cpp
+++ b/base/base/cgroupsv2.cpp
@ -27,27 +27,6 @@ bool cgroupsV2Enabled()
 #endif
 }

-bool cgroupsV2MemoryControllerEnabled()
-{
-#if defined(OS_LINUX)
-    chassert(cgroupsV2Enabled());
-    /// According to https://docs.kernel.org/admin-guide/cgroup-v2.html, file "cgroup.controllers" defines which controllers are available
-    /// for the current + child cgroups. The set of available controllers can be restricted from level to level using file
-    /// "cgroups.subtree_control". It is therefore sufficient to check the bottom-most nested "cgroup.controllers" file.
-    fs::path cgroup_dir = cgroupV2PathOfProcess();
-    if (cgroup_dir.empty())
-        return false;
-    std::ifstream controllers_file(cgroup_dir / "cgroup.controllers");
-    if (!controllers_file.is_open())
-        return false;
-    std::string controllers;
-    std::getline(controllers_file, controllers);
-    return controllers.find("memory") != std::string::npos;
-#else
-    return false;
-#endif
-}
-
 fs::path cgroupV2PathOfProcess()
 {
 #if defined(OS_LINUX)
@ -71,3 +50,28 @@ fs::path cgroupV2PathOfProcess()
    return {};
 #endif
 }
+
+std::optional<std::string> getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name)
+{
+#if defined(OS_LINUX)
+    if (!cgroupsV2Enabled())
+        return {};
+
+    fs::path current_cgroup = cgroupV2PathOfProcess();
+    if (current_cgroup.empty())
+        return {};
+
+    /// Return the bottom-most nested file. If there is no such file at the current
+    /// level, try again at the parent level as settings are inherited.
+    while (current_cgroup != default_cgroups_mount.parent_path())
+    {
+        const auto path = current_cgroup / file_name;
+        if (fs::exists(path))
+            return {current_cgroup};
+        current_cgroup = current_cgroup.parent_path();
+    }
+    return {};
+#else
+    return {};
+#endif
+}
--- a/base/base/cgroupsv2.h
+++ b/base/base/cgroupsv2.h
@ -1,6 +1,7 @@
 #pragma once

 #include <filesystem>
+#include <string_view>

 #if defined(OS_LINUX)
 /// I think it is possible to mount the cgroups hierarchy somewhere else (e.g. when in containers).
@ -11,11 +12,11 @@ static inline const std::filesystem::path default_cgroups_mount = "/sys/fs/cgrou
 /// Is cgroups v2 enabled on the system?
 bool cgroupsV2Enabled();

-/// Is the memory controller of cgroups v2 enabled on the system?
-/// Assumes that cgroupsV2Enabled() is enabled.
-bool cgroupsV2MemoryControllerEnabled();
-
 /// Detects which cgroup v2 the process belongs to and returns the filesystem path to the cgroup.
 /// Returns an empty path the cgroup cannot be determined.
 /// Assumes that cgroupsV2Enabled() is enabled.
 std::filesystem::path cgroupV2PathOfProcess();
+
+/// Returns the most nested cgroup dir containing the specified file.
+/// If cgroups v2 is not enabled - returns an empty optional.
+std::optional<std::string> getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name);
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@ -19,9 +19,6 @@ std::optional<uint64_t> getCgroupsV2MemoryLimit()
    if (!cgroupsV2Enabled())
        return {};

-    if (!cgroupsV2MemoryControllerEnabled())
-        return {};
-
    std::filesystem::path current_cgroup = cgroupV2PathOfProcess();
    if (current_cgroup.empty())
        return {};
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@ -2,11 +2,11 @@

 # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54489)
+SET(VERSION_REVISION 54490)
 SET(VERSION_MAJOR 24)
-SET(VERSION_MINOR 8)
+SET(VERSION_MINOR 9)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af)
-SET(VERSION_DESCRIBE v24.8.1.1-testing)
-SET(VERSION_STRING 24.8.1.1)
+SET(VERSION_GITHASH e02b434d2fc0c4fbee29ca675deab7474d274608)
+SET(VERSION_DESCRIBE v24.9.1.1-testing)
+SET(VERSION_STRING 24.9.1.1)
 # end of autochange
--- a/contrib/libunwind
+++ b/contrib/libunwind
@ -1 +1 @@
-Subproject commit a89d904befea07814628c6ce0b44083c4e149c62
+Subproject commit 601db0b0e03018c01710470a37703b618f9cf08b
--- a/docs/changelogs/v24.3.7.30-lts.md
+++ b/docs/changelogs/v24.3.7.30-lts.md
@ -0,0 +1,29 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.3.7.30-lts (c8a28cf4331) FIXME as compared to v24.3.6.48-lts (b2d33c3c45d)
+
+#### Improvement
+* Backported in [#68103](https://github.com/ClickHouse/ClickHouse/issues/68103): Distinguish booleans and integers while parsing values for custom settings: ``` SET custom_a = true; SET custom_b = 1; ```. [#62206](https://github.com/ClickHouse/ClickHouse/pull/62206) ([Vitaly Baranov](https://github.com/vitlibar)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+* Backported in [#67931](https://github.com/ClickHouse/ClickHouse/issues/67931): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Backported in [#68062](https://github.com/ClickHouse/ClickHouse/issues/68062): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
+* Backported in [#67812](https://github.com/ClickHouse/ClickHouse/issues/67812): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Backported in [#67848](https://github.com/ClickHouse/ClickHouse/issues/67848): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
+* Backported in [#68271](https://github.com/ClickHouse/ClickHouse/issues/68271): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#67806](https://github.com/ClickHouse/ClickHouse/issues/67806): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
+* Backported in [#67834](https://github.com/ClickHouse/ClickHouse/issues/67834): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Backported in [#68206](https://github.com/ClickHouse/ClickHouse/issues/68206): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Backported in [#68089](https://github.com/ClickHouse/ClickHouse/issues/68089): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
+* Backported in [#68120](https://github.com/ClickHouse/ClickHouse/issues/68120): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Update version after release. [#67676](https://github.com/ClickHouse/ClickHouse/pull/67676) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Backported in [#68074](https://github.com/ClickHouse/ClickHouse/issues/68074): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
+
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@ -75,6 +75,8 @@ public:
            const String & default_database_,
            const String & user_,
            const String & password_,
+            const String & proto_send_chunked_,
+            const String & proto_recv_chunked_,
            const String & quota_key_,
            const String & stage,
            bool randomize_,
@ -128,7 +130,9 @@ public:
            connections.emplace_back(std::make_unique<ConnectionPool>(
                concurrency,
                cur_host, cur_port,
-                default_database_, user_, password_, quota_key_,
+                default_database_, user_, password_,
+                proto_send_chunked_, proto_recv_chunked_,
+                quota_key_,
                /* cluster_= */ "",
                /* cluster_secret_= */ "",
                /* client_name_= */ std::string(DEFAULT_CLIENT_NAME),
@ -662,6 +666,50 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)

        Strings hosts = options.count("host") ? options["host"].as<Strings>() : Strings({"localhost"});

+        String proto_send_chunked {"notchunked"};
+        String proto_recv_chunked {"notchunked"};
+
+        if (options.count("proto_caps"))
+        {
+            std::string proto_caps_str = options["proto_caps"].as<std::string>();
+
+            std::vector<std::string_view> proto_caps;
+            splitInto<','>(proto_caps, proto_caps_str);
+
+            for (auto cap_str : proto_caps)
+            {
+                std::string direction;
+
+                if (cap_str.starts_with("send_"))
+                {
+                    direction = "send";
+                    cap_str = cap_str.substr(std::string_view("send_").size());
+                }
+                else if (cap_str.starts_with("recv_"))
+                {
+                    direction = "recv";
+                    cap_str = cap_str.substr(std::string_view("recv_").size());
+                }
+
+                if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional")
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str);
+
+                if (direction.empty())
+                {
+                    proto_send_chunked = cap_str;
+                    proto_recv_chunked = cap_str;
+                }
+                else
+                {
+                    if (direction == "send")
+                        proto_send_chunked = cap_str;
+                    else
+                        proto_recv_chunked = cap_str;
+                }
+            }
+        }
+
+
        Benchmark benchmark(
            options["concurrency"].as<unsigned>(),
            options["delay"].as<double>(),
@ -673,6 +721,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
            options["database"].as<std::string>(),
            options["user"].as<std::string>(),
            options["password"].as<std::string>(),
+            proto_send_chunked,
+            proto_recv_chunked,
            options["quota_key"].as<std::string>(),
            options["stage"].as<std::string>(),
            options.count("randomize"),
--- a/programs/client/clickhouse-client.xml
+++ b/programs/client/clickhouse-client.xml
@ -38,6 +38,21 @@
        <production>{display_name} \e[1;31m:)\e[0m </production> <!-- if it matched to the substring "production" in the server display name -->
    </prompt_by_server_display_name>

+    <!-- Chunked capabilities for native protocol by client.
+         Can be enabled separately for send and receive channels.
+         Supported modes:
+         - chunked - client will only work with server supporting chunked protocol;
+         - chunked_optional - client prefer server to enable chunked protocol, but can switch to notchunked if server does not support this;
+         - notchunked - client will only work with server supporting notchunked protocol (current default);
+         - notchunked_optional - client prefer server notchunked protocol, but can switch to chunked if server does not support this.
+     -->
+    <!--
+    <proto_caps>
+        <send>chunked_optional</send>
+        <recv>chunked_optional</recv>
+    </proto_caps>
+    -->
+
    <!--
        Settings adjustable via command-line parameters
        can take their defaults from that config file, see examples:
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -150,6 +150,21 @@
    -->
    <tcp_port>9000</tcp_port>

+    <!-- Chunked capabilities for native protocol by server.
+         Can be enabled separately for send and receive channels.
+         Supported modes:
+         - chunked - server requires from client to have chunked enabled;
+         - chunked_optional - server supports both chunked and notchunked protocol;
+         - notchunked - server requires from client notchunked protocol (current default);
+         - notchunked_optional - server supports both chunked and notchunked protocol.
+     -->
+    <!--
+    <proto_caps>
+        <send>notchunked_optional</send>
+        <recv>notchunked_optional</recv>
+    </proto_caps>
+    -->
+
    <!-- Compatibility with MySQL protocol.
         ClickHouse will pretend to be MySQL for applications connecting to this port.
    -->
--- a/src/Client/ClientApplicationBase.cpp
+++ b/src/Client/ClientApplicationBase.cpp
@ -158,6 +158,8 @@ void ClientApplicationBase::init(int argc, char ** argv)

        ("config-file,C", po::value<std::string>(), "config-file path")

+        ("proto_caps", po::value<std::string>(), "enable/disable chunked protocol: chunked_optional, notchunked, notchunked_optional, send_chunked, send_chunked_optional, send_notchunked, send_notchunked_optional, recv_chunked, recv_chunked_optional, recv_notchunked, recv_notchunked_optional")
+
        ("query,q", po::value<std::vector<std::string>>()->multitoken(), R"(Query. Can be specified multiple times (--query "SELECT 1" --query "SELECT 2") or once with multiple comma-separated queries (--query "SELECT 1; SELECT 2;"). In the latter case, INSERT queries with non-VALUE format must be separated by empty lines.)")
        ("queries-file", po::value<std::vector<std::string>>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
        ("multiquery,n", "Obsolete, does nothing")
@ -337,6 +339,41 @@ void ClientApplicationBase::init(int argc, char ** argv)
    if (options.count("server_logs_file"))
        server_logs_file = options["server_logs_file"].as<std::string>();

+    if (options.count("proto_caps"))
+    {
+        std::string proto_caps_str = options["proto_caps"].as<std::string>();
+
+        std::vector<std::string_view> proto_caps;
+        splitInto<','>(proto_caps, proto_caps_str);
+
+        for (auto cap_str : proto_caps)
+        {
+            std::string direction;
+
+            if (cap_str.starts_with("send_"))
+            {
+                direction = "send";
+                cap_str = cap_str.substr(std::string_view("send_").size());
+            }
+            else if (cap_str.starts_with("recv_"))
+            {
+                direction = "recv";
+                cap_str = cap_str.substr(std::string_view("recv_").size());
+            }
+
+            if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional")
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str);
+
+            if (direction.empty())
+            {
+                config().setString("proto_caps.send", std::string(cap_str));
+                config().setString("proto_caps.recv", std::string(cap_str));
+            }
+            else
+                config().setString("proto_caps." + direction, std::string(cap_str));
+        }
+    }
+
    query_processing_stage = QueryProcessingStage::fromString(options["stage"].as<std::string>());
    query_kind = parseQueryKind(options["query_kind"].as<std::string>());
    profile_events.print = options.count("print-profile-events");
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -73,9 +73,11 @@
 #include <limits>
 #include <map>
 #include <memory>
+#include <string_view>
 #include <unordered_map>

 #include <Common/config_version.h>
+#include <base/find_symbols.h>
 #include "config.h"
 #include <IO/ReadHelpers.h>
 #include <Processors/Formats/Impl/ValuesBlockInputFormat.h>
@ -914,6 +916,8 @@ void ClientBase::processTextAsSingleQuery(const String & full_query)
    }
    catch (Exception & e)
    {
+        if (server_exception)
+            server_exception->rethrow();
        if (!is_interactive)
            e.addMessage("(in query: {})", full_query);
        throw;
@ -1032,19 +1036,28 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
            query_interrupt_handler.start(signals_before_stop);
            SCOPE_EXIT({ query_interrupt_handler.stop(); });

-            connection->sendQuery(
-                connection_parameters.timeouts,
-                query,
-                query_parameters,
-                client_context->getCurrentQueryId(),
-                query_processing_stage,
-                &client_context->getSettingsRef(),
-                &client_context->getClientInfo(),
-                true,
-                [&](const Progress & progress) { onProgress(progress); });
+            try {
+                connection->sendQuery(
+                    connection_parameters.timeouts,
+                    query,
+                    query_parameters,
+                    client_context->getCurrentQueryId(),
+                    query_processing_stage,
+                    &client_context->getSettingsRef(),
+                    &client_context->getClientInfo(),
+                    true,
+                    [&](const Progress & progress) { onProgress(progress); });
+
+                if (send_external_tables)
+                    sendExternalTables(parsed_query);
+            }
+            catch (const NetException &)
+            {
+                // We still want to attempt to process whatever we already received or can receive (socket receive buffer can be not empty)
+                receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel);
+                throw;
+            }

-            if (send_external_tables)
-                sendExternalTables(parsed_query);
            receiveResult(parsed_query, signals_before_stop, settings.partial_result_on_first_cancel);

            break;
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@ -5,8 +5,6 @@
 #include <Core/Settings.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Compression/CompressedWriteBuffer.h>
-#include <IO/ReadBufferFromPocoSocket.h>
-#include <IO/WriteBufferFromPocoSocket.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
@ -85,6 +83,7 @@ Connection::~Connection()
 Connection::Connection(const String & host_, UInt16 port_,
    const String & default_database_,
    const String & user_, const String & password_,
+    const String & proto_send_chunked_, const String & proto_recv_chunked_,
    [[maybe_unused]] const SSHKey & ssh_private_key_,
    const String & jwt_,
    const String & quota_key_,
@ -95,6 +94,7 @@ Connection::Connection(const String & host_, UInt16 port_,
    Protocol::Secure secure_)
    : host(host_), port(port_), default_database(default_database_)
    , user(user_), password(password_)
+    , proto_send_chunked(proto_send_chunked_), proto_recv_chunked(proto_recv_chunked_)
 #if USE_SSH
    , ssh_private_key(ssh_private_key_)
 #endif
@ -211,10 +211,10 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
                , tcp_keep_alive_timeout_in_sec);
        }

-        in = std::make_shared<ReadBufferFromPocoSocket>(*socket);
+        in = std::make_shared<ReadBufferFromPocoSocketChunked>(*socket);
        in->setAsyncCallback(async_callback);

-        out = std::make_shared<WriteBufferFromPocoSocket>(*socket);
+        out = std::make_shared<WriteBufferFromPocoSocketChunked>(*socket);
        out->setAsyncCallback(async_callback);
        connected = true;
        setDescription();
@ -222,9 +222,61 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
        sendHello();
        receiveHello(timeouts.handshake_timeout);

+        if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+        {
+            /// Client side of chunked protocol negotiation.
+            /// Server advertises its protocol capabilities (separate for send and receive channels) by sending
+            /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional.
+            /// Not optional types are strict meaning that server only supports this type, optional means that
+            /// server prefer this type but capable to work in opposite.
+            /// Client selects which type it is going to communicate based on the settings from config or arguments,
+            /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake.
+            /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example
+            /// server strictly requires chunked protocol but client's settings only allows notchunked protocol) - in such case
+            /// client should interrupt this connection. However if client continues with incompatible protocol type request, server
+            /// will send appropriate exception and disconnect client.
+
+            auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction)
+            {
+                bool chunked_srv = chunked_srv_str.starts_with("chunked");
+                bool optional_srv = chunked_srv_str.ends_with("_optional");
+                bool chunked_cl = chunked_cl_str.starts_with("chunked");
+                bool optional_cl = chunked_cl_str.ends_with("_optional");
+
+                if (optional_srv)
+                    return chunked_cl;
+                if (optional_cl)
+                    return chunked_srv;
+                if (chunked_cl != chunked_srv)
+                    throw NetException(
+                        ErrorCodes::NETWORK_ERROR,
+                        "Incompatible protocol: {} set to {}, server requires {}",
+                        direction,
+                        chunked_cl ? "chunked" : "notchunked",
+                        chunked_srv ? "chunked" : "notchunked");
+
+                return chunked_srv;
+            };
+
+            proto_send_chunked = is_chunked(proto_recv_chunked_srv, proto_send_chunked, "send") ? "chunked" : "notchunked";
+            proto_recv_chunked = is_chunked(proto_send_chunked_srv, proto_recv_chunked, "recv") ? "chunked" : "notchunked";
+        }
+        else
+        {
+            if (proto_send_chunked == "chunked" || proto_recv_chunked == "chunked")
+                throw NetException(
+                        ErrorCodes::NETWORK_ERROR,
+                        "Incompatible protocol: server's version is too old and doesn't support chunked protocol while client settings require it.");
+        }
+
        if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM)
            sendAddendum();

+        if (proto_send_chunked == "chunked")
+            out->enableChunked();
+        if (proto_recv_chunked == "chunked")
+            in->enableChunked();
+
        LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.",
            server_name, server_version_major, server_version_minor, server_version_patch);
    }
@ -393,6 +445,13 @@ void Connection::sendAddendum()
 {
    if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_QUOTA_KEY)
        writeStringBinary(quota_key, *out);
+
+    if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+    {
+        writeStringBinary(proto_send_chunked, *out);
+        writeStringBinary(proto_recv_chunked, *out);
+    }
+
    out->next();
 }

@ -472,6 +531,12 @@ void Connection::receiveHello(const Poco::Timespan & handshake_timeout)
        else
            server_version_patch = server_revision;

+        if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+        {
+            readStringBinary(proto_send_chunked_srv, *in);
+            readStringBinary(proto_recv_chunked_srv, *in);
+        }
+
        if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES)
        {
            UInt64 rules_size;
@ -611,6 +676,7 @@ bool Connection::ping(const ConnectionTimeouts & timeouts)

        UInt64 pong = 0;
        writeVarUInt(Protocol::Client::Ping, *out);
+        out->finishChunk();
        out->next();

        if (in->eof())
@ -660,6 +726,7 @@ TablesStatusResponse Connection::getTablesStatus(const ConnectionTimeouts & time

    writeVarUInt(Protocol::Client::TablesStatusRequest, *out);
    request.write(*out, server_revision);
+    out->finishChunk();
    out->next();

    UInt64 response_type = 0;
@ -813,6 +880,8 @@ void Connection::sendQuery(
    block_profile_events_in.reset();
    block_out.reset();

+    out->finishChunk();
+
    /// Send empty block which means end of data.
    if (!with_pending_data)
    {
@ -829,6 +898,7 @@ void Connection::sendCancel()
        return;

    writeVarUInt(Protocol::Client::Cancel, *out);
+    out->finishChunk();
    out->next();
 }

@ -854,7 +924,10 @@ void Connection::sendData(const Block & block, const String & name, bool scalar)
    size_t prev_bytes = out->count();

    block_out->write(block);
-    maybe_compressed_out->next();
+    if (maybe_compressed_out != out)
+        maybe_compressed_out->next();
+    if (!block)
+        out->finishChunk();
    out->next();

    if (throttler)
@ -865,6 +938,7 @@ void Connection::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
 {
    writeVarUInt(Protocol::Client::IgnoredPartUUIDs, *out);
    writeVectorBinary(uuids, *out);
+    out->finishChunk();
    out->next();
 }

@ -874,6 +948,7 @@ void Connection::sendReadTaskResponse(const String & response)
    writeVarUInt(Protocol::Client::ReadTaskResponse, *out);
    writeVarUInt(DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION, *out);
    writeStringBinary(response, *out);
+    out->finishChunk();
    out->next();
 }

@ -882,6 +957,7 @@ void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & resp
 {
    writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out);
    response.serialize(*out);
+    out->finishChunk();
    out->next();
 }

@ -899,6 +975,8 @@ void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String
        copyData(input, *out);
    else
        copyData(input, *out, size);
+
+    out->finishChunk();
    out->next();
 }

@ -927,6 +1005,8 @@ void Connection::sendScalarsData(Scalars & data)
        sendData(elem.second, elem.first, true /* scalar */);
    }

+    out->finishChunk();
+
    out_bytes = out->count() - out_bytes;
    maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes;
    double elapsed = watch.elapsedSeconds();
@ -1069,13 +1149,13 @@ std::optional<Poco::Net::SocketAddress> Connection::getResolvedAddress() const

 bool Connection::poll(size_t timeout_microseconds)
 {
-    return static_cast<ReadBufferFromPocoSocket &>(*in).poll(timeout_microseconds);
+    return in->poll(timeout_microseconds);
 }


 bool Connection::hasReadPendingData() const
 {
-    return last_input_packet_type.has_value() || static_cast<const ReadBufferFromPocoSocket &>(*in).hasPendingData();
+    return last_input_packet_type.has_value() || in->hasBufferedData();
 }


@ -1349,6 +1429,8 @@ ServerConnectionPtr Connection::createConnection(const ConnectionParameters & pa
        parameters.default_database,
        parameters.user,
        parameters.password,
+        parameters.proto_send_chunked,
+        parameters.proto_recv_chunked,
        parameters.ssh_private_key,
        parameters.jwt,
        parameters.quota_key,
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@ -8,8 +8,8 @@
 #include <Core/Defines.h>


-#include <IO/ReadBufferFromPocoSocket.h>
-#include <IO/WriteBufferFromPocoSocket.h>
+#include <IO/ReadBufferFromPocoSocketChunked.h>
+#include <IO/WriteBufferFromPocoSocketChunked.h>

 #include <Interpreters/TablesStatus.h>
 #include <Interpreters/Context_fwd.h>
@ -52,6 +52,7 @@ public:
    Connection(const String & host_, UInt16 port_,
        const String & default_database_,
        const String & user_, const String & password_,
+        const String & proto_send_chunked_, const String & proto_recv_chunked_,
        const SSHKey & ssh_private_key_,
        const String & jwt_,
        const String & quota_key_,
@ -170,6 +171,10 @@ private:
    String default_database;
    String user;
    String password;
+    String proto_send_chunked;
+    String proto_recv_chunked;
+    String proto_send_chunked_srv;
+    String proto_recv_chunked_srv;
 #if USE_SSH
    SSHKey ssh_private_key;
 #endif
@ -209,8 +214,8 @@ private:
    String server_display_name;

    std::unique_ptr<Poco::Net::StreamSocket> socket;
-    std::shared_ptr<ReadBufferFromPocoSocket> in;
-    std::shared_ptr<WriteBufferFromPocoSocket> out;
+    std::shared_ptr<ReadBufferFromPocoSocketChunked> in;
+    std::shared_ptr<WriteBufferFromPocoSocketChunked> out;
    std::optional<UInt64> last_input_packet_type;

    String query_id;
--- a/src/Client/ConnectionParameters.cpp
+++ b/src/Client/ConnectionParameters.cpp
@ -107,6 +107,9 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
        }
    }

+    proto_send_chunked = config.getString("proto_caps.send", "notchunked");
+    proto_recv_chunked = config.getString("proto_caps.recv", "notchunked");
+
    quota_key = config.getString("quota_key", "");

    /// By default compression is disabled if address looks like localhost.
--- a/src/Client/ConnectionParameters.h
+++ b/src/Client/ConnectionParameters.h
@ -20,6 +20,8 @@ struct ConnectionParameters
    std::string default_database;
    std::string user;
    std::string password;
+    std::string proto_send_chunked = "notchunked";
+    std::string proto_recv_chunked = "notchunked";
    std::string quota_key;
    SSHKey ssh_private_key;
    std::string jwt;
--- a/src/Client/ConnectionPool.cpp
+++ b/src/Client/ConnectionPool.cpp
@ -13,6 +13,8 @@ ConnectionPoolPtr ConnectionPoolFactory::get(
    String default_database,
    String user,
    String password,
+    String proto_send_chunked,
+    String proto_recv_chunked,
    String quota_key,
    String cluster,
    String cluster_secret,
@ -22,7 +24,7 @@ ConnectionPoolPtr ConnectionPoolFactory::get(
    Priority priority)
 {
    Key key{
-        max_connections, host, port, default_database, user, password, quota_key, cluster, cluster_secret, client_name, compression, secure, priority};
+        max_connections, host, port, default_database, user, password, proto_send_chunked, proto_recv_chunked, quota_key, cluster, cluster_secret, client_name, compression, secure, priority};

    std::lock_guard lock(mutex);
    auto [it, inserted] = pools.emplace(key, ConnectionPoolPtr{});
@ -39,6 +41,8 @@ ConnectionPoolPtr ConnectionPoolFactory::get(
            default_database,
            user,
            password,
+            proto_send_chunked,
+            proto_recv_chunked,
            quota_key,
            cluster,
            cluster_secret,
--- a/src/Client/ConnectionPool.h
+++ b/src/Client/ConnectionPool.h
@ -73,6 +73,8 @@ public:
        const String & default_database_,
        const String & user_,
        const String & password_,
+        const String & proto_send_chunked_,
+        const String & proto_recv_chunked_,
        const String & quota_key_,
        const String & cluster_,
        const String & cluster_secret_,
@ -85,6 +87,8 @@ public:
        , default_database(default_database_)
        , user(user_)
        , password(password_)
+        , proto_send_chunked(proto_send_chunked_)
+        , proto_recv_chunked(proto_recv_chunked_)
        , quota_key(quota_key_)
        , cluster(cluster_)
        , cluster_secret(cluster_secret_)
@ -116,7 +120,9 @@ protected:
    {
        return std::make_shared<Connection>(
            host, port,
-            default_database, user, password, SSHKey(), /*jwt*/ "", quota_key,
+            default_database, user, password,
+            proto_send_chunked, proto_recv_chunked,
+            SSHKey(), /*jwt*/ "", quota_key,
            cluster, cluster_secret,
            client_name, compression, secure);
    }
@ -125,6 +131,8 @@ private:
    String default_database;
    String user;
    String password;
+    String proto_send_chunked;
+    String proto_recv_chunked;
    String quota_key;

    /// For inter-server authorization
@ -150,6 +158,8 @@ public:
        String default_database;
        String user;
        String password;
+        String proto_send_chunked;
+        String proto_recv_chunked;
        String quota_key;
        String cluster;
        String cluster_secret;
@ -173,6 +183,8 @@ public:
        String default_database,
        String user,
        String password,
+        String proto_send_chunked,
+        String proto_recv_chunked,
        String quota_key,
        String cluster,
        String cluster_secret,
@ -190,6 +202,7 @@ inline bool operator==(const ConnectionPoolFactory::Key & lhs, const ConnectionP
 {
    return lhs.max_connections == rhs.max_connections && lhs.host == rhs.host && lhs.port == rhs.port
        && lhs.default_database == rhs.default_database && lhs.user == rhs.user && lhs.password == rhs.password
+        && lhs.proto_send_chunked == rhs.proto_send_chunked && lhs.proto_recv_chunked == rhs.proto_recv_chunked
        && lhs.quota_key == rhs.quota_key
        && lhs.cluster == rhs.cluster && lhs.cluster_secret == rhs.cluster_secret && lhs.client_name == rhs.client_name
        && lhs.compression == rhs.compression && lhs.secure == rhs.secure && lhs.priority == rhs.priority;
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@ -1,18 +1,24 @@
-#include <Common/formatReadable.h>
 #include <Common/AsynchronousMetrics.h>
-#include <Common/Exception.h>
-#include <Common/setThreadName.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/filesystemHelpers.h>
-#include <Common/logger_useful.h>
-#include <IO/UncompressedCache.h>
+
 #include <IO/MMappedFileCache.h>
 #include <IO/ReadHelpers.h>
+#include <IO/UncompressedCache.h>
+#include <base/cgroupsv2.h>
 #include <base/errnoToString.h>
 #include <base/find_symbols.h>
 #include <base/getPageSize.h>
 #include <sys/resource.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/Exception.h>
+#include <Common/filesystemHelpers.h>
+#include <Common/formatReadable.h>
+#include <Common/logger_useful.h>
+#include <Common/setThreadName.h>
+
+#include <boost/locale/date_time_facet.hpp>
+
 #include <chrono>
+#include <string_view>

 #include "config.h"

@ -52,6 +58,12 @@ static std::unique_ptr<ReadBufferFromFilePRead> openFileIfExists(const std::stri
    return {};
 }

+static void openCgroupv2MetricFile(const std::string & filename, std::optional<ReadBufferFromFilePRead> & out)
+{
+    if (auto path = getCgroupsV2PathContainingFile(filename))
+        openFileIfExists((path.value() + filename).c_str(), out);
+};
+
 #endif


@ -63,21 +75,15 @@ AsynchronousMetrics::AsynchronousMetrics(
    , protocol_server_metrics_func(protocol_server_metrics_func_)
 {
 #if defined(OS_LINUX)
-    openFileIfExists("/proc/meminfo", meminfo);
-    openFileIfExists("/proc/loadavg", loadavg);
-    openFileIfExists("/proc/stat", proc_stat);
    openFileIfExists("/proc/cpuinfo", cpuinfo);
    openFileIfExists("/proc/sys/fs/file-nr", file_nr);
-    openFileIfExists("/proc/uptime", uptime);
    openFileIfExists("/proc/net/dev", net_dev);

    /// CGroups v2
-    openFileIfExists("/sys/fs/cgroup/memory.max", cgroupmem_limit_in_bytes);
-    if (cgroupmem_limit_in_bytes)
-    {
-        openFileIfExists("/sys/fs/cgroup/memory.current", cgroupmem_usage_in_bytes);
-    }
-    openFileIfExists("/sys/fs/cgroup/cpu.max", cgroupcpu_max);
+    openCgroupv2MetricFile("memory.max", cgroupmem_limit_in_bytes);
+    openCgroupv2MetricFile("memory.current", cgroupmem_usage_in_bytes);
+    openCgroupv2MetricFile("cpu.max", cgroupcpu_max);
+    openCgroupv2MetricFile("cpu.stat", cgroupcpu_stat);

    /// CGroups v1
    if (!cgroupmem_limit_in_bytes)
@ -90,6 +96,21 @@ AsynchronousMetrics::AsynchronousMetrics(
        openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_period_us", cgroupcpu_cfs_period);
        openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", cgroupcpu_cfs_quota);
    }
+    if (!cgroupcpu_stat)
+        openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat);
+
+    if (!cgroupcpu_stat && !cgroupcpuacct_stat)
+    {
+        /// The following metrics are not cgroup-aware and we've found cgroup-specific metric files for the similar metrics,
+        /// so we're better not reporting them at all to avoid confusion
+        openFileIfExists("/proc/loadavg", loadavg);
+        openFileIfExists("/proc/stat", proc_stat);
+        openFileIfExists("/proc/uptime", uptime);
+    }
+
+    /// The same story for memory metrics
+    if (!cgroupmem_limit_in_bytes)
+        openFileIfExists("/proc/meminfo", meminfo);

    openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count);
    openFileIfExists("/proc/self/maps", vm_maps);
@ -570,6 +591,151 @@ AsynchronousMetrics::NetworkInterfaceStatValues::operator-(const AsynchronousMet
 #endif


+#if defined(OS_LINUX)
+void AsynchronousMetrics::applyCPUMetricsUpdate(
+    AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier)
+{
+    new_values["OSUserTime" + cpu_suffix]
+        = {delta_values.user * multiplier,
+           "The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the "
+           "host machine, not just clickhouse-server."
+           " This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline "
+           "stalls, branch mispredictions, running another SMT core)."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSNiceTime" + cpu_suffix]
+        = {delta_values.nice * multiplier,
+           "The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all "
+           "the processes on the host machine, not just clickhouse-server."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSSystemTime" + cpu_suffix]
+        = {delta_values.system * multiplier,
+           "The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the "
+           "processes on the host machine, not just clickhouse-server."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSIdleTime" + cpu_suffix]
+        = {delta_values.idle * multiplier,
+           "The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This "
+           "is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+           " This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline "
+           "stalls, branch mispredictions, running another SMT core)."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSIOWaitTime" + cpu_suffix]
+        = {delta_values.iowait * multiplier,
+           "The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as "
+           "the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just "
+           "clickhouse-server."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSIrqTime" + cpu_suffix]
+        = {delta_values.irq * multiplier,
+           "The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the "
+           "processes on the host machine, not just clickhouse-server."
+           " A high number of this metric may indicate hardware misconfiguration or a very high network load."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSSoftIrqTime" + cpu_suffix]
+        = {delta_values.softirq * multiplier,
+           "The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the "
+           "processes on the host machine, not just clickhouse-server."
+           " A high number of this metric may indicate inefficient software running on the system."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSStealTime" + cpu_suffix]
+        = {delta_values.steal * multiplier,
+           "The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide "
+           "metric, it includes all the processes on the host machine, not just clickhouse-server."
+           " Not every virtualized environments present this metric, and most of them don't."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSGuestTime" + cpu_suffix]
+        = {delta_values.guest * multiplier,
+           "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man "
+           "procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+           " This metric is irrelevant for ClickHouse, but still exists for completeness."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+    new_values["OSGuestNiceTime" + cpu_suffix]
+        = {delta_values.guest_nice * multiplier,
+           "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest "
+           "was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host "
+           "machine, not just clickhouse-server."
+           " This metric is irrelevant for ClickHouse, but still exists for completeness."
+           " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across "
+           "them [0..num cores]."};
+}
+
+void AsynchronousMetrics::applyNormalizedCPUMetricsUpdate(
+    AsynchronousMetricValues & new_values, double num_cpus_to_normalize, const ProcStatValuesCPU & delta_values_all_cpus, double multiplier)
+{
+    chassert(num_cpus_to_normalize);
+
+    new_values["OSUserTimeNormalized"]
+        = {delta_values_all_cpus.user * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSNiceTimeNormalized"]
+        = {delta_values_all_cpus.nice * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSSystemTimeNormalized"]
+        = {delta_values_all_cpus.system * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSIdleTimeNormalized"]
+        = {delta_values_all_cpus.idle * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSIOWaitTimeNormalized"]
+        = {delta_values_all_cpus.iowait * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSIrqTimeNormalized"]
+        = {delta_values_all_cpus.irq * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of "
+           "the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSSoftIrqTimeNormalized"]
+        = {delta_values_all_cpus.softirq * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval "
+           "regardless of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSStealTimeNormalized"]
+        = {delta_values_all_cpus.steal * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSGuestTimeNormalized"]
+        = {delta_values_all_cpus.guest * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless "
+           "of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+    new_values["OSGuestNiceTimeNormalized"]
+        = {delta_values_all_cpus.guest_nice * multiplier / num_cpus_to_normalize,
+           "The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval "
+           "regardless of the number of cores."
+           " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is "
+           "non-uniform, and still get the average resource utilization metric."};
+}
+#endif
+
 void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
 {
    Stopwatch watch;
@ -831,7 +997,68 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
        new_values["CGroupMaxCPU"] = { max_cpu_cgroups, "The maximum number of CPU cores according to CGroups."};
    }

-    if (proc_stat)
+    if (cgroupcpu_stat || cgroupcpuacct_stat)
+    {
+        try
+        {
+            ReadBufferFromFilePRead & in = cgroupcpu_stat ? *cgroupcpu_stat : *cgroupcpuacct_stat;
+            ProcStatValuesCPU current_values{};
+
+            /// We re-read the file from the beginning each time
+            in.rewind();
+
+            while (!in.eof())
+            {
+                String name;
+                readStringUntilWhitespace(name, in);
+                skipWhitespaceIfAny(in);
+
+                /// `user_usec` for cgroup v2 and `user` for cgroup v1
+                if (name.starts_with("user"))
+                {
+                    readText(current_values.user, in);
+                    skipToNextLineOrEOF(in);
+                }
+                /// `system_usec` for cgroup v2 and `system` for cgroup v1
+                else if (name.starts_with("system"))
+                {
+                    readText(current_values.system, in);
+                    skipToNextLineOrEOF(in);
+                }
+                else
+                    skipToNextLineOrEOF(in);
+            }
+
+            if (!first_run)
+            {
+                auto get_clock_ticks = [&]()
+                {
+                    if (auto hz = sysconf(_SC_CLK_TCK); hz != -1)
+                        return hz;
+                    else
+                        throw ErrnoException(ErrorCodes::CANNOT_SYSCONF, "Cannot call 'sysconf' to obtain system HZ");
+                };
+                const auto cgroup_version_specific_divisor = cgroupcpu_stat ? 1e6 : get_clock_ticks();
+                const double multiplier = 1.0 / cgroup_version_specific_divisor
+                    / (std::chrono::duration_cast<std::chrono::nanoseconds>(time_since_previous_update).count() / 1e9);
+
+                const ProcStatValuesCPU delta_values = current_values - proc_stat_values_all_cpus;
+                applyCPUMetricsUpdate(new_values, /*cpu_suffix=*/"", delta_values, multiplier);
+                if (max_cpu_cgroups > 0)
+                    applyNormalizedCPUMetricsUpdate(new_values, max_cpu_cgroups, delta_values, multiplier);
+            }
+
+            proc_stat_values_all_cpus = current_values;
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+            openCgroupv2MetricFile("cpu.stat", cgroupcpu_stat);
+            if (!cgroupcpu_stat)
+                openFileIfExists("/sys/fs/cgroup/cpuacct/cpuacct.stat", cgroupcpuacct_stat);
+        }
+    }
+    else if (proc_stat)
    {
        try
        {
@ -886,43 +1113,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
                        else
                            delta_values_all_cpus = delta_values;

-                        new_values["OSUserTime" + cpu_suffix] = { delta_values.user * multiplier,
-                            "The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core)."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSNiceTime" + cpu_suffix] = { delta_values.nice * multiplier,
-                            "The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSSystemTime" + cpu_suffix] = { delta_values.system * multiplier,
-                            "The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSIdleTime" + cpu_suffix] = { delta_values.idle * multiplier,
-                            "The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core)."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSIOWaitTime" + cpu_suffix] = { delta_values.iowait * multiplier,
-                            "The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSIrqTime" + cpu_suffix] = { delta_values.irq * multiplier,
-                            "The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " A high number of this metric may indicate hardware misconfiguration or a very high network load."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSSoftIrqTime" + cpu_suffix] = { delta_values.softirq * multiplier,
-                            "The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " A high number of this metric may indicate inefficient software running on the system."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSStealTime" + cpu_suffix] = { delta_values.steal * multiplier,
-                            "The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " Not every virtualized environments present this metric, and most of them don't."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSGuestTime" + cpu_suffix] = { delta_values.guest * multiplier,
-                            "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " This metric is irrelevant for ClickHouse, but still exists for completeness."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
-                        new_values["OSGuestNiceTime" + cpu_suffix] = { delta_values.guest_nice * multiplier,
-                            "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
-                            " This metric is irrelevant for ClickHouse, but still exists for completeness."
-                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        applyCPUMetricsUpdate(new_values, cpu_suffix, delta_values, multiplier);
                    }

                    prev_values = current_values;
@ -978,38 +1169,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
                Float64 num_cpus_to_normalize = max_cpu_cgroups > 0 ? max_cpu_cgroups : num_cpus;

                if (num_cpus_to_normalize > 0)
-                {
-                    new_values["OSUserTimeNormalized"] = { delta_values_all_cpus.user * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSNiceTimeNormalized"] = { delta_values_all_cpus.nice * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSSystemTimeNormalized"] = { delta_values_all_cpus.system * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSIdleTimeNormalized"] = { delta_values_all_cpus.idle * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSIOWaitTimeNormalized"] = { delta_values_all_cpus.iowait * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSIrqTimeNormalized"] = { delta_values_all_cpus.irq * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSSoftIrqTimeNormalized"] = { delta_values_all_cpus.softirq * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSStealTimeNormalized"] = { delta_values_all_cpus.steal * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSGuestTimeNormalized"] = { delta_values_all_cpus.guest * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                    new_values["OSGuestNiceTimeNormalized"] = { delta_values_all_cpus.guest_nice * multiplier / num_cpus_to_normalize,
-                        "The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
-                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
-                }
+                    applyNormalizedCPUMetricsUpdate(new_values, num_cpus_to_normalize, delta_values_all_cpus, multiplier);
            }

            proc_stat_values_other = current_other_values;
@ -1042,8 +1202,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update)
            tryLogCurrentException(__PRETTY_FUNCTION__);
        }
    }
-
-    if (meminfo)
+    else if (meminfo)
    {
        try
        {
--- a/src/Common/AsynchronousMetrics.h
+++ b/src/Common/AsynchronousMetrics.h
@ -126,6 +126,8 @@ private:
    std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_period TSA_GUARDED_BY(data_mutex);
    std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_quota TSA_GUARDED_BY(data_mutex);
    std::optional<ReadBufferFromFilePRead> cgroupcpu_max TSA_GUARDED_BY(data_mutex);
+    std::optional<ReadBufferFromFilePRead> cgroupcpu_stat TSA_GUARDED_BY(data_mutex);
+    std::optional<ReadBufferFromFilePRead> cgroupcpuacct_stat TSA_GUARDED_BY(data_mutex);

    std::optional<ReadBufferFromFilePRead> vm_max_map_count TSA_GUARDED_BY(data_mutex);
    std::optional<ReadBufferFromFilePRead> vm_maps TSA_GUARDED_BY(data_mutex);
@ -221,6 +223,16 @@ private:
    void openBlockDevices();
    void openSensorsChips();
    void openEDAC();
+
+    void applyCPUMetricsUpdate(
+        AsynchronousMetricValues & new_values, const std::string & cpu_suffix, const ProcStatValuesCPU & delta_values, double multiplier);
+
+    void applyNormalizedCPUMetricsUpdate(
+        AsynchronousMetricValues & new_values,
+        double num_cpus_to_normalize,
+        const ProcStatValuesCPU & delta_values_all_cpus,
+        double multiplier);
+
 #endif

    void run();
--- a/src/Common/CgroupsMemoryUsageObserver.cpp
+++ b/src/Common/CgroupsMemoryUsageObserver.cpp
@ -144,31 +144,6 @@ private:
 /// - I did not test what happens if a host has v1 and v2 simultaneously enabled. I believe such
 ///   systems existed only for a short transition period.

-std::optional<std::string> getCgroupsV2Path()
-{
-    if (!cgroupsV2Enabled())
-        return {};
-
-    if (!cgroupsV2MemoryControllerEnabled())
-        return {};
-
-    fs::path current_cgroup = cgroupV2PathOfProcess();
-    if (current_cgroup.empty())
-        return {};
-
-    /// Return the bottom-most nested current memory file. If there is no such file at the current
-    /// level, try again at the parent level as memory settings are inherited.
-    while (current_cgroup != default_cgroups_mount.parent_path())
-    {
-        const auto current_path = current_cgroup / "memory.current";
-        const auto stat_path = current_cgroup / "memory.stat";
-        if (fs::exists(current_path) && fs::exists(stat_path))
-            return {current_cgroup};
-        current_cgroup = current_cgroup.parent_path();
-    }
-    return {};
-}
-
 std::optional<std::string> getCgroupsV1Path()
 {
    auto path = default_cgroups_mount / "memory/memory.stat";
@ -179,7 +154,7 @@ std::optional<std::string> getCgroupsV1Path()

 std::pair<std::string, CgroupsMemoryUsageObserver::CgroupsVersion> getCgroupsPath()
 {
-    auto v2_path = getCgroupsV2Path();
+    auto v2_path = getCgroupsV2PathContainingFile("memory.current");
    if (v2_path.has_value())
        return {*v2_path, CgroupsMemoryUsageObserver::CgroupsVersion::V2};

--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@ -307,7 +307,7 @@
    M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \
    M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \
    \
-    M(S3DiskNoKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \
+    M(DiskS3NoSuchKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \

 #ifdef APPLY_FOR_EXTERNAL_METRICS
    #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -209,8 +209,35 @@
    \
    M(Merge, "Number of launched background merges.") \
    M(MergedRows, "Rows read for background merges. This is the number of rows before merge.") \
+    M(MergedColumns, "Number of columns merged during the horizontal stage of merges.") \
+    M(GatheredColumns, "Number of columns gathered during the vertical stage of merges.") \
    M(MergedUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) that was read for background merges. This is the number before merge.") \
-    M(MergesTimeMilliseconds, "Total time spent for background merges.")\
+    M(MergeTotalMilliseconds, "Total time spent for background merges") \
+    M(MergeExecuteMilliseconds, "Total busy time spent for execution of background merges") \
+    M(MergeHorizontalStageTotalMilliseconds, "Total time spent for horizontal stage of background merges") \
+    M(MergeHorizontalStageExecuteMilliseconds, "Total busy time spent for execution of horizontal stage of background merges") \
+    M(MergeVerticalStageTotalMilliseconds, "Total time spent for vertical stage of background merges") \
+    M(MergeVerticalStageExecuteMilliseconds, "Total busy time spent for execution of vertical stage of background merges") \
+    M(MergeProjectionStageTotalMilliseconds, "Total time spent for projection stage of background merges") \
+    M(MergeProjectionStageExecuteMilliseconds, "Total busy time spent for execution of projection stage of background merges") \
+    \
+    M(MergingSortedMilliseconds, "Total time spent while merging sorted columns") \
+    M(AggregatingSortedMilliseconds, "Total time spent while aggregating sorted columns") \
+    M(CollapsingSortedMilliseconds, "Total time spent while collapsing sorted columns") \
+    M(ReplacingSortedMilliseconds, "Total time spent while replacing sorted columns") \
+    M(SummingSortedMilliseconds, "Total time spent while summing sorted columns") \
+    M(VersionedCollapsingSortedMilliseconds, "Total time spent while version collapsing sorted columns") \
+    M(GatheringColumnMilliseconds, "Total time spent while gathering columns for vertical merge") \
+    \
+    M(MutationTotalParts, "Number of total parts for which mutations tried to be applied") \
+    M(MutationUntouchedParts, "Number of total parts for which mutations tried to be applied but which was completely skipped according to predicate") \
+    M(MutatedRows, "Rows read for mutations. This is the number of rows before mutation") \
+    M(MutatedUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) that was read for mutations. This is the number before mutation.") \
+    M(MutationTotalMilliseconds, "Total time spent for mutations.") \
+    M(MutationExecuteMilliseconds, "Total busy time spent for execution of mutations.") \
+    M(MutationAllPartColumns, "Number of times when task to mutate all columns in part was created") \
+    M(MutationSomePartColumns, "Number of times when task to mutate some columns in part was created") \
+    M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections in mutations.") \
    \
    M(MergeTreeDataWriterRows, "Number of rows INSERTed to MergeTree tables.") \
    M(MergeTreeDataWriterUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) INSERTed to MergeTree tables.") \
@ -225,7 +252,6 @@
    M(MergeTreeDataWriterProjectionsCalculationMicroseconds, "Time spent calculating projections") \
    M(MergeTreeDataProjectionWriterSortingBlocksMicroseconds, "Time spent sorting blocks (for projection it might be a key different from table's sorting key)") \
    M(MergeTreeDataProjectionWriterMergingBlocksMicroseconds, "Time spent merging blocks") \
-    M(MutateTaskProjectionsCalculationMicroseconds, "Time spent calculating projections") \
    \
    M(InsertedWideParts, "Number of parts inserted in Wide format.") \
    M(InsertedCompactParts, "Number of parts inserted in Compact format.") \
--- a/src/Core/ProtocolDefines.h
+++ b/src/Core/ProtocolDefines.h
@ -83,6 +83,9 @@ static constexpr auto DBMS_MIN_REVISION_WITH_SYSTEM_KEYWORDS_TABLE = 54468;

 static constexpr auto DBMS_MIN_REVISION_WITH_ROWS_BEFORE_AGGREGATION = 54469;

+/// Packets size header
+static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54470;
+
 /// Version of ClickHouse TCP protocol.
 ///
 /// Should be incremented manually on protocol changes.
@ -90,6 +93,6 @@ static constexpr auto DBMS_MIN_REVISION_WITH_ROWS_BEFORE_AGGREGATION = 54469;
 /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
 /// later is just a number for server version (one number instead of commit SHA)
 /// for simplicity (sometimes it may be more convenient in some use cases).
-static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54469;
+static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54470;

 }
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -593,7 +593,6 @@ class IColumn;
    M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
    M(Bool, enable_lightweight_delete, true, "Enable lightweight DELETE mutations for mergetree tables.", 0) ALIAS(allow_experimental_lightweight_delete) \
    M(UInt64, lightweight_deletes_sync, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes", 0) \
-    M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete.", 0) \
    M(Bool, apply_deleted_mask, true, "Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios", 0) \
    M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
    M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@ -103,7 +103,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
            {"dictionary_validate_primary_key_type", false, false, "Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64."},
            {"collect_hash_table_stats_during_joins", false, true, "New setting."},
            {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."},
-            {"input_format_orc_reader_time_zone_name", "GMT", "GMT", "The time zone name for ORC row reader, the default ORC row reader's time zone is GMT."},            {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
+            {"input_format_orc_reader_time_zone_name", "GMT", "GMT", "The time zone name for ORC row reader, the default ORC row reader's time zone is GMT."},
            {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
            {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},
            {"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@ -175,7 +175,8 @@ IMPLEMENT_SETTING_ENUM(ParallelReplicasCustomKeyFilterType, ErrorCodes::BAD_ARGU

 IMPLEMENT_SETTING_ENUM(LightweightMutationProjectionMode, ErrorCodes::BAD_ARGUMENTS,
    {{"throw", LightweightMutationProjectionMode::THROW},
-     {"drop", LightweightMutationProjectionMode::DROP}})
+     {"drop", LightweightMutationProjectionMode::DROP},
+     {"rebuild", LightweightMutationProjectionMode::REBUILD}})

 IMPLEMENT_SETTING_ENUM(DeduplicateMergeProjectionMode, ErrorCodes::BAD_ARGUMENTS,
    {{"throw", DeduplicateMergeProjectionMode::THROW},
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@ -311,6 +311,7 @@ enum class LightweightMutationProjectionMode : uint8_t
 {
    THROW,
    DROP,
+    REBUILD,
 };

 DECLARE_SETTING_ENUM(LightweightMutationProjectionMode)
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@ -196,7 +196,7 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
            }
            else
            {
-                std::tuple<std::string, std::string, std::string, uint16_t, std::string, std::string, std::string> row;
+                std::tuple<std::string, std::string, std::string, uint16_t, std::string, std::string, std::string, std::string> row;
                while (stream >> row)
                {
                    const auto column_name = std::get<0>(row);
@ -206,13 +206,14 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList(
                        std::get<3>(row));

                    columns.push_back(NameAndTypePair(column_name, data_type));
-                    auto attgenerated = std::get<6>(row);
+                    auto attgenerated = std::get<7>(row);

                    attributes.emplace(
                        column_name,
                        PostgreSQLTableStructure::PGAttribute{
                            .atttypid = parse<int>(std::get<4>(row)),
                            .atttypmod = parse<int>(std::get<5>(row)),
+                            .attnum = parse<int>(std::get<6>(row)),
                            .atthasdef = false,
                            .attgenerated = attgenerated.empty() ? char{} : char(attgenerated[0]),
                            .attr_def = {}
@ -308,6 +309,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
           "attndims AS dims, " /// array dimensions
           "atttypid as type_id, "
           "atttypmod as type_modifier, "
+           "attnum as att_num, "
           "attgenerated as generated " /// if column has GENERATED
           "FROM pg_attribute "
           "WHERE attrelid = (SELECT oid FROM pg_class WHERE {}) "
@ -338,17 +340,29 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
            "WHERE adrelid = (SELECT oid FROM pg_class WHERE {});", where);

        pqxx::result result{tx.exec(attrdef_query)};
-        for (const auto row : result)
+        if (static_cast<uint64_t>(result.size()) > table.physical_columns->names.size())
        {
-            size_t adnum = row[0].as<int>();
-            if (!adnum || adnum > table.physical_columns->names.size())
+            throw Exception(ErrorCodes::LOGICAL_ERROR,
+                            "Received {} attrdef, but currently fetched columns list has {} columns",
+                            result.size(), table.physical_columns->attributes.size());
+        }
+
+        for (const auto & column_attrs : table.physical_columns->attributes)
+        {
+            if (column_attrs.second.attgenerated != 's') /// e.g. not a generated column
            {
-                throw Exception(ErrorCodes::LOGICAL_ERROR,
-                                "Received adnum {}, but currently fetched columns list has {} columns",
-                                adnum, table.physical_columns->attributes.size());
+                continue;
+            }
+
+            for (const auto row : result)
+            {
+                int adnum = row[0].as<int>();
+                if (column_attrs.second.attnum == adnum)
+                {
+                    table.physical_columns->attributes.at(column_attrs.first).attr_def = row[1].as<std::string>();
+                    break;
+                }
            }
-            const auto column_name = table.physical_columns->names[adnum - 1];
-            table.physical_columns->attributes.at(column_name).attr_def = row[1].as<std::string>();
        }
    }

--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
@ -16,6 +16,7 @@ struct PostgreSQLTableStructure
    {
        Int32 atttypid;
        Int32 atttypmod;
+        Int32 attnum;
        bool atthasdef;
        char attgenerated;
        std::string attr_def;
--- a/src/Dictionaries/ClickHouseDictionarySource.cpp
+++ b/src/Dictionaries/ClickHouseDictionarySource.cpp
@ -51,6 +51,8 @@ namespace
            configuration.db,
            configuration.user,
            configuration.password,
+            configuration.proto_send_chunked,
+            configuration.proto_recv_chunked,
            configuration.quota_key,
            "", /* cluster */
            "", /* cluster_secret */
@ -222,7 +224,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
        {
            validateNamedCollection(
                *named_collection, {}, ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>{
-                    "secure", "host", "hostname", "port", "user", "username", "password", "quota_key", "name",
+                    "secure", "host", "hostname", "port", "user", "username", "password", "proto_send_chunked", "proto_recv_chunked", "quota_key", "name",
                    "db", "database", "table","query", "where", "invalidate_query", "update_field", "update_lag"});

            const auto secure = named_collection->getOrDefault("secure", false);
@ -234,6 +236,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
                .host = host,
                .user = named_collection->getAnyOrDefault<String>({"user", "username"}, "default"),
                .password = named_collection->getOrDefault<String>("password", ""),
+                .proto_send_chunked = named_collection->getOrDefault<String>("proto_send_chunked", "notchunked"),
+                .proto_recv_chunked = named_collection->getOrDefault<String>("proto_recv_chunked", "notchunked"),
                .quota_key = named_collection->getOrDefault<String>("quota_key", ""),
                .db = named_collection->getAnyOrDefault<String>({"db", "database"}, default_database),
                .table = named_collection->getOrDefault<String>("table", ""),
@ -258,6 +262,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
                .host = host,
                .user = config.getString(settings_config_prefix + ".user", "default"),
                .password = config.getString(settings_config_prefix + ".password", ""),
+                .proto_send_chunked = config.getString(settings_config_prefix + ".proto_caps.send", "notchunked"),
+                .proto_recv_chunked = config.getString(settings_config_prefix + ".proto_caps.recv", "notchunked"),
                .quota_key = config.getString(settings_config_prefix + ".quota_key", ""),
                .db = config.getString(settings_config_prefix + ".db", default_database),
                .table = config.getString(settings_config_prefix + ".table", ""),
--- a/src/Dictionaries/ClickHouseDictionarySource.h
+++ b/src/Dictionaries/ClickHouseDictionarySource.h
@ -23,6 +23,8 @@ public:
        const std::string host;
        const std::string user;
        const std::string password;
+        const std::string proto_send_chunked;
+        const std::string proto_recv_chunked;
        const std::string quota_key;
        const std::string db;
        const std::string table;
--- a/src/IO/NetUtils.h
+++ b/src/IO/NetUtils.h
@ -0,0 +1,58 @@
+#pragma once
+
+#include <concepts>
+#include <bit>
+
+
+namespace DB
+{
+
+template<std::integral T>
+constexpr T netToHost(T value) noexcept
+{
+    if constexpr (std::endian::native != std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+template<std::integral T>
+constexpr T hostToNet(T value) noexcept
+{
+    if constexpr (std::endian::native != std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+template<std::integral T>
+constexpr T toLittleEndian(T value) noexcept
+{
+    if constexpr (std::endian::native == std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+template<std::integral T>
+constexpr T toBigEndian(T value) noexcept
+{
+    if constexpr (std::endian::native != std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+template<std::integral T>
+constexpr T fromLittleEndian(T value) noexcept
+{
+    if constexpr (std::endian::native == std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+template<std::integral T>
+constexpr T fromBigEndian(T value) noexcept
+{
+    if constexpr (std::endian::native != std::endian::big)
+        return std::byteswap(value);
+    return value;
+}
+
+}
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@ -32,7 +32,7 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

-bool ReadBufferFromPocoSocket::nextImpl()
+ssize_t ReadBufferFromPocoSocketBase::socketReceiveBytesImpl(char * ptr, size_t size)
 {
    ssize_t bytes_read = 0;
    Stopwatch watch;
@ -43,14 +43,11 @@ bool ReadBufferFromPocoSocket::nextImpl()
        ProfileEvents::increment(ProfileEvents::NetworkReceiveBytes, bytes_read);
    });

+    CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive);
+
    /// Add more details to exceptions.
    try
    {
-        CurrentMetrics::Increment metric_increment(CurrentMetrics::NetworkReceive);
-
-        if (internal_buffer.size() > INT_MAX)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow");
-
        /// If async_callback is specified, set socket to non-blocking mode
        /// and try to read data from it, if socket is not ready for reading,
        /// run async_callback and try again later.
@ -61,7 +58,7 @@ bool ReadBufferFromPocoSocket::nextImpl()
            socket.setBlocking(false);
            SCOPE_EXIT(socket.setBlocking(true));
            bool secure = socket.secure();
-            bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
+            bytes_read = socket.impl()->receiveBytes(ptr, static_cast<int>(size));

            /// Check EAGAIN and ERR_SSL_WANT_READ/ERR_SSL_WANT_WRITE for secure socket (reading from secure socket can write too).
            while (bytes_read < 0 && (errno == EAGAIN || (secure && (checkSSLWantRead(bytes_read) || checkSSLWantWrite(bytes_read)))))
@ -73,12 +70,12 @@ bool ReadBufferFromPocoSocket::nextImpl()
                    async_callback(socket.impl()->sockfd(), socket.getReceiveTimeout(), AsyncEventTimeoutType::RECEIVE, socket_description, AsyncTaskExecutor::Event::READ | AsyncTaskExecutor::Event::ERROR);

                /// Try to read again.
-                bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
+                bytes_read = socket.impl()->receiveBytes(ptr, static_cast<int>(size));
            }
        }
        else
        {
-            bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), static_cast<int>(internal_buffer.size()));
+            bytes_read = socket.impl()->receiveBytes(ptr, static_cast<int>(size));
        }
    }
    catch (const Poco::Net::NetException & e)
@ -99,6 +96,16 @@ bool ReadBufferFromPocoSocket::nextImpl()
    if (bytes_read < 0)
        throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket (peer: {}, local: {})", peer_address.toString(), socket.address().toString());

+    return bytes_read;
+}
+
+bool ReadBufferFromPocoSocketBase::nextImpl()
+{
+    if (internal_buffer.size() > INT_MAX)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer overflow");
+
+    ssize_t bytes_read = socketReceiveBytesImpl(internal_buffer.begin(), internal_buffer.size());
+
    if (read_event != ProfileEvents::end())
        ProfileEvents::increment(read_event, bytes_read);

@ -110,7 +117,7 @@ bool ReadBufferFromPocoSocket::nextImpl()
    return true;
 }

-ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size)
+ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, size_t buf_size)
    : BufferWithOwnMemory<ReadBuffer>(buf_size)
    , socket(socket_)
    , peer_address(socket.peerAddress())
@ -119,19 +126,22 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_,
 {
 }

-ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size)
-    : ReadBufferFromPocoSocket(socket_, buf_size)
+ReadBufferFromPocoSocketBase::ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size)
+    : ReadBufferFromPocoSocketBase(socket_, buf_size)
 {
    read_event = read_event_;
 }

-bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const
+bool ReadBufferFromPocoSocketBase::poll(size_t timeout_microseconds) const
 {
-    if (available())
+    /// For secure socket it is important to check if any remaining data available in underlying decryption buffer -
+    /// read always retrieves the whole encrypted frame from the wire and puts it into underlying buffer while returning only requested size -
+    /// further poll() can block though there is still data to read in the underlying decryption buffer.
+    if (available() || socket.impl()->available())
        return true;

    Stopwatch watch;
-    bool res = socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
+    bool res = socket.impl()->poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR);
    ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds());
    return res;
 }
--- a/src/IO/ReadBufferFromPocoSocket.h
+++ b/src/IO/ReadBufferFromPocoSocket.h
@ -9,7 +9,7 @@ namespace DB
 {

 /// Works with the ready Poco::Net::Socket. Blocking operations.
-class ReadBufferFromPocoSocket : public BufferWithOwnMemory<ReadBuffer>
+class ReadBufferFromPocoSocketBase : public BufferWithOwnMemory<ReadBuffer>
 {
 protected:
    Poco::Net::Socket & socket;
@ -25,16 +25,29 @@ protected:
    bool nextImpl() override;

 public:
-    explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
-    explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+    explicit ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+    explicit ReadBufferFromPocoSocketBase(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);

    bool poll(size_t timeout_microseconds) const;

    void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); }

+    ssize_t socketReceiveBytesImpl(char * ptr, size_t size);
+
 private:
    AsyncCallback async_callback;
    std::string socket_description;
 };

+class ReadBufferFromPocoSocket : public ReadBufferFromPocoSocketBase
+{
+public:
+    explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE)
+        : ReadBufferFromPocoSocketBase(socket_, buf_size)
+    {}
+    explicit ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE)
+        : ReadBufferFromPocoSocketBase(socket_, read_event_, buf_size)
+    {}
+};
+
 }
--- a/src/IO/ReadBufferFromPocoSocketChunked.cpp
+++ b/src/IO/ReadBufferFromPocoSocketChunked.cpp
@ -0,0 +1,166 @@
+#include <IO/ReadBufferFromPocoSocketChunked.h>
+#include <Common/logger_useful.h>
+#include <IO/NetUtils.h>
+
+
+namespace DB::ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+namespace DB
+{
+
+ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size)
+    : ReadBufferFromPocoSocketChunked(socket_, ProfileEvents::end(), buf_size)
+{}
+
+ReadBufferFromPocoSocketChunked::ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size)
+    : ReadBufferFromPocoSocketBase(
+        socket_, read_event_,
+        std::min(buf_size, static_cast<size_t>(std::numeric_limits<decltype(chunk_left)>::max()))),
+        our_address(socket_.address()), log(getLogger("Protocol"))
+{}
+
+void ReadBufferFromPocoSocketChunked::enableChunked()
+{
+    if (chunked)
+        return;
+    chunked = 1;
+    data_end = buffer().end();
+    /// Resize working buffer so any next read will call nextImpl
+    working_buffer.resize(offset());
+    chunk_left = 0;
+    next_chunk = 0;
+}
+
+bool ReadBufferFromPocoSocketChunked::hasBufferedData() const
+{
+    if (available())
+        return true;
+
+    return chunked && (static_cast<size_t>(data_end - working_buffer.end()) > sizeof(next_chunk));
+}
+
+bool ReadBufferFromPocoSocketChunked::poll(size_t timeout_microseconds) const
+{
+    if (chunked)
+        if (available() || static_cast<size_t>(data_end - working_buffer.end()) > sizeof(next_chunk))
+            return true;
+
+    return ReadBufferFromPocoSocketBase::poll(timeout_microseconds);
+}
+
+
+bool ReadBufferFromPocoSocketChunked::loadNextChunk(Position c_pos, bool cont)
+{
+    auto buffered = std::min(static_cast<size_t>(data_end - c_pos), sizeof(next_chunk));
+
+    if (buffered)
+        std::memcpy(&next_chunk, c_pos, buffered);
+    if (buffered < sizeof(next_chunk))
+        if (socketReceiveBytesImpl(reinterpret_cast<char *>(&next_chunk) + buffered, sizeof(next_chunk) - buffered) < static_cast<ssize_t>(sizeof(next_chunk) - buffered))
+            return false;
+    next_chunk = fromLittleEndian(next_chunk);
+
+    if (next_chunk)
+    {
+        if (cont)
+            LOG_TEST(log, "{} <- {} Chunk receive continued. Size {}", ourAddress().toString(), peerAddress().toString(), next_chunk);
+    }
+    else
+        LOG_TEST(log, "{} <- {} Chunk receive ended.", ourAddress().toString(), peerAddress().toString());
+
+    return true;
+}
+
+bool ReadBufferFromPocoSocketChunked::processChunkLeft(Position c_pos)
+{
+    if (data_end - c_pos < chunk_left)
+    {
+        working_buffer.resize(data_end - buffer().begin());
+        nextimpl_working_buffer_offset = c_pos - buffer().begin();
+        chunk_left -= (data_end - c_pos);
+        return true;
+    }
+
+    nextimpl_working_buffer_offset = c_pos - buffer().begin();
+    working_buffer.resize(nextimpl_working_buffer_offset + chunk_left);
+
+    c_pos += chunk_left;
+
+    if (!loadNextChunk(c_pos, true))
+        return false;
+
+    chunk_left = 0;
+    return true;
+}
+
+
+bool ReadBufferFromPocoSocketChunked::nextImpl()
+{
+    if (!chunked)
+        return ReadBufferFromPocoSocketBase::nextImpl();
+
+    auto * c_pos = pos;
+
+    if (chunk_left == 0)
+    {
+        if (next_chunk == 0)
+        {
+            if (chunked == 1)
+                chunked = 2; // first chunked block - no end marker
+            else
+                c_pos = pos + sizeof(next_chunk); // bypass chunk end marker
+
+            if (c_pos > data_end)
+                c_pos = data_end;
+
+            if (!loadNextChunk(c_pos))
+                return false;
+
+            chunk_left = next_chunk;
+            next_chunk = 0;
+
+            if (chunk_left == 0)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Native protocol: empty chunk received");
+
+            c_pos += sizeof(next_chunk);
+
+            if (c_pos >= data_end)
+            {
+                if (!ReadBufferFromPocoSocketBase::nextImpl())
+                    return false;
+                data_end = buffer().end();
+                c_pos = buffer().begin();
+            }
+
+            LOG_TEST(log, "{} <- {} Chunk receive started. Message {}, size {}", ourAddress().toString(), peerAddress().toString(), static_cast<unsigned int>(*c_pos), chunk_left);
+        }
+        else
+        {
+            c_pos += sizeof(next_chunk);
+            if (c_pos >= data_end)
+            {
+                if (!ReadBufferFromPocoSocketBase::nextImpl())
+                    return false;
+                data_end = buffer().end();
+                c_pos = buffer().begin();
+            }
+
+            chunk_left = next_chunk;
+            next_chunk = 0;
+        }
+    }
+    else
+    {
+        if (!ReadBufferFromPocoSocketBase::nextImpl())
+            return false;
+        data_end = buffer().end();
+        c_pos = buffer().begin();
+    }
+
+    return processChunkLeft(c_pos);
+}
+
+}
--- a/src/IO/ReadBufferFromPocoSocketChunked.h
+++ b/src/IO/ReadBufferFromPocoSocketChunked.h
@ -0,0 +1,109 @@
+#pragma once
+
+#include <IO/ReadBuffer.h>
+#include <IO/ReadBufferFromPocoSocket.h>
+
+/*
+
+Handshake              +=============
+                       | 'Hello' type
+                       |   handshake exchange
+                       |     chunked protocol negotiation
+                       +=============
+
+
+Basic chunk:
+                       +=============
+Chunk begins           | 0x12345678      chunk size, 4 bytes little endian
+                       +-------------
+                       | Packet type     always follows beginning of the chunk
+                       |   packet data
+                       +-------------
+Chunk ends             | 0x00000000      4 zero bytes
+                       +=============
+
+
+Datastream chunk:
+                       +=============
+Chunk begins           | 0x12345678
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+...arbitrary number        .....
+of packets...              .....
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+Chunk ends             | 0x00000000
+                       +=============
+
+
+Multipart chunk:
+                       +=============
+Chunk begins           | 0x12345678      chunk part size, 4 bytes little endian
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+                       | Packet type
+                       |   (partial) packet data
+                       +=============
+Chunk continues        | 0x12345678      chunk next part size, 4 bytes little endian
+                       +=============
+                       |   possibly previous packet's data
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+...arbitrary number        .....
+of chunk parts...          .....
+                       +-------------
+                       | Packet type
+                       |   packet data
+                       +-------------
+Chunk ends             | 0x00000000
+                       +=============
+
+*/
+
+namespace DB
+{
+
+class ReadBufferFromPocoSocketChunked: public ReadBufferFromPocoSocketBase
+{
+public:
+    using ReadBufferFromPocoSocketBase::setAsyncCallback;
+
+    explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+    explicit ReadBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & read_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+
+    void enableChunked();
+
+    bool hasBufferedData() const;
+
+    bool poll(size_t timeout_microseconds) const;
+
+    Poco::Net::SocketAddress peerAddress() { return peer_address; }
+    Poco::Net::SocketAddress ourAddress() { return our_address; }
+
+protected:
+    bool loadNextChunk(Position c_pos, bool cont = false);
+    bool processChunkLeft(Position c_pos);
+    bool nextImpl() override;
+
+    Poco::Net::SocketAddress our_address;
+
+private:
+    LoggerPtr log;
+    Position data_end = nullptr; // end position of data in the internal_buffer
+    UInt32 chunk_left = 0;       // chunk left to read from socket
+    UInt32 next_chunk = 0;       // size of the next cnunk
+    UInt8 chunked = 0;           // 0 - disabled; 1 - started; 2 - enabled;
+};
+
+}
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@ -46,7 +46,7 @@ namespace ProfileEvents

 namespace CurrentMetrics
 {
-    extern const Metric S3DiskNoKeyErrors;
+    extern const Metric DiskS3NoSuchKeyErrors;
 }

 namespace DB
@ -701,7 +701,7 @@ RequestResult Client::processRequestResult(RequestResult && outcome) const
        return std::forward<RequestResult>(outcome);

    if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY)
-        CurrentMetrics::add(CurrentMetrics::S3DiskNoKeyErrors);
+        CurrentMetrics::add(CurrentMetrics::DiskS3NoSuchKeyErrors);

    String enriched_message = fmt::format(
        "{} {}",
--- a/src/IO/WriteBuffer.h
+++ b/src/IO/WriteBuffer.h
@ -64,7 +64,8 @@ public:
        }

        bytes += bytes_in_buffer;
-        pos = working_buffer.begin();
+        pos = working_buffer.begin() + nextimpl_working_buffer_offset;
+        nextimpl_working_buffer_offset = 0;
    }

    /// Calling finalize() in the destructor of derived classes is a bad practice.
@ -164,6 +165,11 @@ protected:
    bool finalized = false;
    bool canceled = false;

+    /// The number of bytes to preserve from the initial position of `working_buffer`
+    /// buffer. Apparently this is an additional out-parameter for nextImpl(),
+    /// not a real field.
+    size_t nextimpl_working_buffer_offset = 0;
+
 private:
    /** Write the data in the buffer (from the beginning of the buffer to the current position).
      * Throw an exception if something is wrong.
--- a/src/IO/WriteBufferFromPocoSocket.cpp
+++ b/src/IO/WriteBufferFromPocoSocket.cpp
@ -183,6 +183,7 @@ WriteBufferFromPocoSocket::WriteBufferFromPocoSocket(Poco::Net::Socket & socket_
    , socket(socket_)
    , peer_address(socket.peerAddress())
    , our_address(socket.address())
+    , write_event(ProfileEvents::end())
    , socket_description("socket (" + peer_address.toString() + ")")
 {
 }
--- a/src/IO/WriteBufferFromPocoSocketChunked.cpp
+++ b/src/IO/WriteBufferFromPocoSocketChunked.cpp
@ -0,0 +1,210 @@
+#include <IO/WriteBufferFromPocoSocketChunked.h>
+#include <Common/logger_useful.h>
+#include <IO/NetUtils.h>
+
+
+namespace
+{
+
+template <typename T>
+void setValue(T * typed_ptr, std::type_identity_t<T> val)
+{
+    memcpy(static_cast<void*>(typed_ptr), &val, sizeof(T));
+}
+
+}
+
+namespace DB
+{
+
+WriteBufferFromPocoSocketChunked::WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size)
+    : WriteBufferFromPocoSocketChunked(socket_, ProfileEvents::end(), buf_size)
+{}
+
+WriteBufferFromPocoSocketChunked::WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size)
+    : WriteBufferFromPocoSocket(
+        socket_, write_event_,
+        std::clamp(buf_size, sizeof(*chunk_size_ptr) + 1, static_cast<size_t>(std::numeric_limits<std::remove_reference_t<decltype(*chunk_size_ptr)>>::max()))),
+        log(getLogger("Protocol"))
+{}
+
+void WriteBufferFromPocoSocketChunked::enableChunked()
+{
+    chunked = true;
+    /// Initialize next chunk
+    chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+    pos += std::min(available(), sizeof(*chunk_size_ptr));
+    /// Pretend finishChunk() was just called to prevent sending empty chunk if finishChunk() called immediately
+    last_finish_chunk = chunk_size_ptr;
+}
+
+void WriteBufferFromPocoSocketChunked::finishChunk()
+{
+    if (!chunked)
+        return;
+
+    if (pos <= reinterpret_cast<Position>(chunk_size_ptr) + sizeof(*chunk_size_ptr))
+    {
+        /// Prevent duplicate finish chunk (and finish chunk right after enableChunked())
+        if (chunk_size_ptr == last_finish_chunk)
+            return;
+
+        /// If current chunk is empty it means we are finishing a chunk previously sent by next(),
+        /// we want to convert current chunk header into end-of-chunk marker and initialize next chunk.
+        /// We don't need to worry about if it's the end of the buffer because next() always sends the whole buffer
+        /// so it should be a beginning of the buffer.
+
+        chassert(reinterpret_cast<Position>(chunk_size_ptr) == working_buffer.begin());
+
+        setValue(chunk_size_ptr, 0);
+        /// Initialize next chunk
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+        pos += std::min(available(), sizeof(*chunk_size_ptr));
+
+        last_finish_chunk = chunk_size_ptr;
+
+        return;
+    }
+
+    /// Previously finished chunk wasn't sent yet
+    if (last_finish_chunk == chunk_size_ptr)
+    {
+        chunk_started = false;
+        LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
+    }
+
+    /// Fill up current chunk size
+    setValue(chunk_size_ptr, toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr))));
+
+    if (!chunk_started)
+        LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}",
+                ourAddress().toString(), peerAddress().toString(),
+                static_cast<unsigned int>(*(reinterpret_cast<char *>(chunk_size_ptr) + sizeof(*chunk_size_ptr))),
+                *chunk_size_ptr);
+    else
+    {
+        chunk_started = false;
+        LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr);
+    }
+
+    LOG_TEST(log, "{} -> {} Chunk send ended.", ourAddress().toString(), peerAddress().toString());
+
+    if (available() < sizeof(*chunk_size_ptr))
+    {
+        finishing = available();
+        pos += available();
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+        last_finish_chunk = chunk_size_ptr;
+        return;
+    }
+
+    /// Buffer end-of-chunk
+    setValue(reinterpret_cast<decltype(chunk_size_ptr)>(pos), 0);
+    pos += sizeof(*chunk_size_ptr);
+    /// Initialize next chunk
+    chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(pos);
+    pos += std::min(available(), sizeof(*chunk_size_ptr));
+
+    last_finish_chunk = chunk_size_ptr;
+}
+
+WriteBufferFromPocoSocketChunked::~WriteBufferFromPocoSocketChunked()
+{
+    try
+    {
+        finalize();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+}
+
+void WriteBufferFromPocoSocketChunked::nextImpl()
+{
+    if (!chunked)
+    {
+        WriteBufferFromPocoSocket::nextImpl();
+        return;
+    }
+
+    /// next() after finishChunk at the end of the buffer
+    if (finishing < sizeof(*chunk_size_ptr))
+    {
+        pos -= finishing;
+        /// Send current chunk
+        WriteBufferFromPocoSocket::nextImpl();
+        /// Send end-of-chunk directly
+        UInt32 s = 0;
+        socketSendBytes(reinterpret_cast<const char *>(&s), sizeof(s));
+
+        finishing = sizeof(*chunk_size_ptr);
+
+        /// Initialize next chunk
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+        nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+        last_finish_chunk = chunk_size_ptr;
+
+        return;
+    }
+
+    /// Prevent sending empty chunk
+    if (offset() == sizeof(*chunk_size_ptr))
+    {
+        nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+        return;
+    }
+
+    /// Finish chunk at the end of the buffer
+    if (working_buffer.end() - reinterpret_cast<Position>(chunk_size_ptr) <= static_cast<std::ptrdiff_t>(sizeof(*chunk_size_ptr)))
+    {
+        pos = reinterpret_cast<Position>(chunk_size_ptr);
+        /// Send current chunk
+        WriteBufferFromPocoSocket::nextImpl();
+        /// Initialize next chunk
+        chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+        nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+        last_finish_chunk = nullptr;
+
+        return;
+    }
+
+    bool initialize_last_finish_chunk = false;
+    if (pos - reinterpret_cast<Position>(chunk_size_ptr) == sizeof(*chunk_size_ptr)) // next() after finishChunk
+    {
+        pos -= sizeof(*chunk_size_ptr);
+        initialize_last_finish_chunk = true;
+    }
+    else // fill up current chunk size
+    {
+        setValue(chunk_size_ptr, toLittleEndian(static_cast<UInt32>(pos - reinterpret_cast<Position>(chunk_size_ptr) - sizeof(*chunk_size_ptr))));
+        if (!chunk_started)
+        {
+            chunk_started = true;
+            LOG_TEST(log, "{} -> {} Chunk send started. Message {}, size {}",
+                    ourAddress().toString(), peerAddress().toString(),
+                    static_cast<unsigned int>(*(reinterpret_cast<char *>(chunk_size_ptr) + sizeof(*chunk_size_ptr))),
+                    *chunk_size_ptr);
+        }
+        else
+            LOG_TEST(log, "{} -> {} Chunk send continued. Size {}", ourAddress().toString(), peerAddress().toString(), *chunk_size_ptr);
+    }
+    /// Send current chunk
+    WriteBufferFromPocoSocket::nextImpl();
+    /// Initialize next chunk
+    chunk_size_ptr = reinterpret_cast<decltype(chunk_size_ptr)>(working_buffer.begin());
+    nextimpl_working_buffer_offset = sizeof(*chunk_size_ptr);
+
+    last_finish_chunk = initialize_last_finish_chunk ? chunk_size_ptr : nullptr;
+}
+
+void WriteBufferFromPocoSocketChunked::finalizeImpl()
+{
+    if (chunked && offset() == sizeof(*chunk_size_ptr))
+        pos -= sizeof(*chunk_size_ptr);
+    WriteBufferFromPocoSocket::finalizeImpl();
+}
+
+}
--- a/src/IO/WriteBufferFromPocoSocketChunked.h
+++ b/src/IO/WriteBufferFromPocoSocketChunked.h
@ -0,0 +1,36 @@
+#pragma once
+
+#include <Common/logger_useful.h>
+#include <IO/WriteBufferFromPocoSocket.h>
+#include <algorithm>
+
+
+namespace DB
+{
+
+class WriteBufferFromPocoSocketChunked: public WriteBufferFromPocoSocket
+{
+public:
+    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+    explicit WriteBufferFromPocoSocketChunked(Poco::Net::Socket & socket_, const ProfileEvents::Event & write_event_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+
+    void enableChunked();
+    void finishChunk();
+    ~WriteBufferFromPocoSocketChunked() override;
+
+protected:
+    void nextImpl() override;
+    void finalizeImpl() override;
+    Poco::Net::SocketAddress peerAddress() const { return peer_address; }
+    Poco::Net::SocketAddress ourAddress() const { return our_address; }
+
+private:
+    LoggerPtr log;
+    bool chunked = false;
+    UInt32 * last_finish_chunk = nullptr;       // pointer to the last chunk header created by finishChunk
+    bool chunk_started = false;                 // chunk started flag
+    UInt32 * chunk_size_ptr = nullptr;          // pointer to the chunk size holder in the buffer
+    size_t finishing = sizeof(*chunk_size_ptr); // indicates not enough buffer for end-of-chunk marker
+};
+
+}
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@ -113,6 +113,9 @@ Cluster::Address::Address(
    secure = ConfigHelper::getBool(config, config_prefix + ".secure", false, /* empty_as */true) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
    priority = Priority{config.getInt(config_prefix + ".priority", 1)};

+    proto_send_chunked = config.getString(config_prefix + ".proto_caps.send", "notchunked");
+    proto_recv_chunked = config.getString(config_prefix + ".proto_caps.recv", "notchunked");
+
    const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port";
    auto default_port = config.getInt(port_type, 0);

@ -425,7 +428,9 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
            auto pool = ConnectionPoolFactory::instance().get(
                static_cast<unsigned>(settings.distributed_connections_pool_size),
                address.host_name, address.port,
-                address.default_database, address.user, address.password, address.quota_key,
+                address.default_database, address.user, address.password,
+                address.proto_send_chunked, address.proto_recv_chunked,
+                address.quota_key,
                address.cluster, address.cluster_secret,
                "server", address.compression,
                address.secure, address.priority);
@ -589,6 +594,8 @@ void Cluster::addShard(
            replica.default_database,
            replica.user,
            replica.password,
+            replica.proto_send_chunked,
+            replica.proto_recv_chunked,
            replica.quota_key,
            replica.cluster,
            replica.cluster_secret,
@ -744,6 +751,8 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
                    address.default_database,
                    address.user,
                    address.password,
+                    address.proto_send_chunked,
+                    address.proto_recv_chunked,
                    address.quota_key,
                    address.cluster,
                    address.cluster_secret,
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@ -114,6 +114,8 @@ public:
        UInt16 port{0};
        String user;
        String password;
+        String proto_send_chunked = "notchunked";
+        String proto_recv_chunked = "notchunked";
        String quota_key;

        /// For inter-server authorization
--- a/src/Interpreters/InterpreterDeleteQuery.cpp
+++ b/src/Interpreters/InterpreterDeleteQuery.cpp
@ -17,6 +17,7 @@
 #include <Storages/AlterCommands.h>
 #include <Storages/IStorage.h>
 #include <Storages/MutationCommands.h>
+#include <Storages/MergeTree/MergeTreeSettings.h>


 namespace DB
@ -27,7 +28,6 @@ namespace ErrorCodes
    extern const int TABLE_IS_READ_ONLY;
    extern const int SUPPORT_IS_DISABLED;
    extern const int BAD_ARGUMENTS;
-    extern const int NOT_IMPLEMENTED;
    extern const int QUERY_IS_PROHIBITED;
 }

@ -67,13 +67,42 @@ BlockIO InterpreterDeleteQuery::execute()
    auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
    auto metadata_snapshot = table->getInMemoryMetadataPtr();

-    auto lightweightDelete = [&]()
+    if (table->supportsDelete())
+    {
+        /// Convert to MutationCommand
+        MutationCommands mutation_commands;
+        MutationCommand mut_command;
+
+        mut_command.type = MutationCommand::Type::DELETE;
+        mut_command.predicate = delete_query.predicate;
+
+        mutation_commands.emplace_back(mut_command);
+
+        table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
+        MutationsInterpreter::Settings settings(false);
+        MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate();
+        table->mutate(mutation_commands, getContext());
+        return {};
+    }
+    else if (table->supportsLightweightDelete())
    {
        if (!getContext()->getSettingsRef().enable_lightweight_delete)
            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
                            "Lightweight delete mutate is disabled. "
                            "Set `enable_lightweight_delete` setting to enable it");

+        if (metadata_snapshot->hasProjections())
+        {
+            if (const auto * merge_tree_data = dynamic_cast<const MergeTreeData *>(table.get()))
+                if (merge_tree_data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW)
+                    throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
+                        "DELETE query is not allowed for table {} because as it has projections and setting "
+                        "lightweight_mutation_projection_mode is set to THROW. "
+                        "User should change lightweight_mutation_projection_mode OR "
+                        "drop all the projections manually before running the query",
+                        table_id.getFullTableName());
+        }
+
        /// Build "ALTER ... UPDATE _row_exists = 0 WHERE predicate" query
        String alter_query =
            "ALTER TABLE " + table->getStorageID().getFullTableName()
@ -94,79 +123,9 @@ BlockIO InterpreterDeleteQuery::execute()
        context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync));
        InterpreterAlterQuery alter_interpreter(alter_ast, context);
        return alter_interpreter.execute();
-    };
-
-    if (table->supportsDelete())
-    {
-        /// Convert to MutationCommand
-        MutationCommands mutation_commands;
-        MutationCommand mut_command;
-
-        mut_command.type = MutationCommand::Type::DELETE;
-        mut_command.predicate = delete_query.predicate;
-
-        mutation_commands.emplace_back(mut_command);
-
-        table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
-        MutationsInterpreter::Settings settings(false);
-        MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), settings).validate();
-        table->mutate(mutation_commands, getContext());
-        return {};
-    }
-    else if (table->supportsLightweightDelete())
-    {
-        return lightweightDelete();
    }
    else
    {
-        if (table->hasProjection())
-        {
-            auto context = Context::createCopy(getContext());
-            auto mode = context->getSettingsRef().lightweight_mutation_projection_mode;
-            if (mode == LightweightMutationProjectionMode::THROW)
-            {
-                throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-                    "DELETE query is not supported for table {} as it has projections. "
-                    "User should drop all the projections manually before running the query",
-                    table->getStorageID().getFullTableName());
-            }
-            else if (mode == LightweightMutationProjectionMode::DROP)
-            {
-                std::vector<String> all_projections = metadata_snapshot->projections.getAllRegisteredNames();
-
-                context->setSetting("mutations_sync", Field(context->getSettingsRef().lightweight_deletes_sync));
-
-                /// Drop projections first so that lightweight delete can be performed.
-                for (const auto & projection : all_projections)
-                {
-                    String alter_query =
-                        "ALTER TABLE " + table->getStorageID().getFullTableName()
-                        + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster))
-                        + " DROP PROJECTION IF EXISTS " + projection;
-
-                    ParserAlterQuery parser;
-                    ASTPtr alter_ast = parseQuery(
-                        parser,
-                        alter_query.data(),
-                        alter_query.data() + alter_query.size(),
-                        "ALTER query",
-                        0,
-                        DBMS_DEFAULT_MAX_PARSER_DEPTH,
-                        DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
-
-                    InterpreterAlterQuery alter_interpreter(alter_ast, context);
-                    alter_interpreter.execute();
-                }
-            }
-            else
-            {
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                    "Unrecognized lightweight_mutation_projection_mode, only throw and drop are allowed.");
-            }
-
-            return lightweightDelete();
-        }
-
        throw Exception(ErrorCodes::BAD_ARGUMENTS,
            "DELETE query is not supported for table {}",
            table->getStorageID().getFullTableName());
--- a/src/Processors/Merges/AggregatingSortedTransform.h
+++ b/src/Processors/Merges/AggregatingSortedTransform.h
@ -3,6 +3,11 @@
 #include <Processors/Merges/IMergingTransform.h>
 #include <Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h>

+namespace ProfileEvents
+{
+    extern const Event AggregatingSortedMilliseconds;
+}
+
 namespace DB
 {

@ -29,6 +34,11 @@ public:
    }

    String getName() const override { return "AggregatingSortedTransform"; }
+
+    void onFinish() override
+    {
+        logMergedStats(ProfileEvents::AggregatingSortedMilliseconds, "Aggregated sorted", getLogger("AggregatingSortedTransform"));
+    }
 };

 }
--- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
@ -30,6 +30,8 @@ public:
    void consume(Input & input, size_t source_num) override;
    Status merge() override;

+    MergedStats getMergedStats() const override { return merged_data.getMergedStats(); }
+
    /// Stores information for aggregation of SimpleAggregateFunction columns
    struct SimpleAggregateDescription
    {
--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
@ -126,6 +126,9 @@ IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge()

 Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge()
 {
+    total_merged_rows += accumulated_rows;
+    total_merged_bytes += accumulated_bytes;
+
    accumulated_rows = 0;
    accumulated_bytes = 0;

--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
@ -50,6 +50,8 @@ public:
    void consume(Input & input, size_t source_num) override;
    Status merge() override;

+    MergedStats getMergedStats() const override { return {.bytes = accumulated_bytes, .rows = accumulated_rows, .blocks = chunk_num}; }
+
 private:
    Chunk prepareToMerge();
    void addToAggregation();
@ -92,6 +94,9 @@ private:
    UInt64 chunk_num = 0;
    size_t accumulated_rows = 0;
    size_t accumulated_bytes = 0;
+
+    size_t total_merged_rows = 0;
+    size_t total_merged_bytes = 0;
 };

 }
--- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
@ -33,6 +33,8 @@ public:
    const char * getName() const override { return "GraphiteRollupSortedAlgorithm"; }
    Status merge() override;

+    MergedStats getMergedStats() const override { return merged_data->getMergedStats(); }
+
    struct ColumnsDefinition
    {
        size_t path_column_num;
--- a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h
@ -1,7 +1,7 @@
 #pragma once

 #include <Processors/Chunk.h>
-#include <variant>
+#include <Common/ProfileEvents.h>

 namespace DB
 {
@ -65,6 +65,15 @@ public:

    IMergingAlgorithm() = default;
    virtual ~IMergingAlgorithm() = default;
+
+    struct MergedStats
+    {
+        UInt64 bytes = 0;
+        UInt64 rows = 0;
+        UInt64 blocks = 0;
+    };
+
+    virtual MergedStats getMergedStats() const = 0;
 };

 // TODO: use when compile with clang which could support it
--- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h
+++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h
@ -16,6 +16,8 @@ public:
    void initialize(Inputs inputs) override;
    void consume(Input & input, size_t source_num) override;

+    MergedStats getMergedStats() const override { return merged_data->getMergedStats(); }
+
 private:
    Block header;
    SortDescription description;
--- a/src/Processors/Merges/Algorithms/MergedData.h
+++ b/src/Processors/Merges/Algorithms/MergedData.h
@ -183,6 +183,8 @@ public:
    UInt64 totalAllocatedBytes() const { return total_allocated_bytes; }
    UInt64 maxBlockSize() const { return max_block_size; }

+    IMergingAlgorithm::MergedStats getMergedStats() const { return {.bytes = total_allocated_bytes, .rows = total_merged_rows, .blocks = total_chunks}; }
+
    virtual ~MergedData() = default;

 protected:
--- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h
@ -31,7 +31,7 @@ public:
    void consume(Input & input, size_t source_num) override;
    Status merge() override;

-    const MergedData & getMergedData() const { return merged_data; }
+    MergedStats getMergedStats() const override { return merged_data.getMergedStats(); }

 private:
    Block header;
--- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
@ -30,6 +30,8 @@ public:
    void consume(Input & input, size_t source_num) override;
    Status merge() override;

+    MergedStats getMergedStats() const override { return merged_data.getMergedStats(); }
+
    struct AggregateDescription;
    struct MapDescription;

--- a/src/Processors/Merges/CollapsingSortedTransform.h
+++ b/src/Processors/Merges/CollapsingSortedTransform.h
@ -3,6 +3,11 @@
 #include <Processors/Merges/IMergingTransform.h>
 #include <Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h>

+namespace ProfileEvents
+{
+    extern const Event CollapsingSortedMilliseconds;
+}
+
 namespace DB
 {

@ -36,6 +41,11 @@ public:
    }

    String getName() const override { return "CollapsingSortedTransform"; }
+
+    void onFinish() override
+    {
+        logMergedStats(ProfileEvents::CollapsingSortedMilliseconds, "Collapsed sorted", getLogger("CollapsingSortedTransform"));
+    }
 };

 }
--- a/src/Processors/Merges/IMergingTransform.h
+++ b/src/Processors/Merges/IMergingTransform.h
@ -2,7 +2,10 @@

 #include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
 #include <Processors/IProcessor.h>
+#include <Common/ProfileEvents.h>
 #include <Common/Stopwatch.h>
+#include <Common/logger_useful.h>
+#include <Common/formatReadable.h>

 namespace DB
 {
@ -110,6 +113,8 @@ public:

    void work() override
    {
+        Stopwatch watch{CLOCK_MONOTONIC_COARSE};
+
        if (!state.init_chunks.empty())
            algorithm.initialize(std::move(state.init_chunks));

@ -147,6 +152,8 @@ public:
            // std::cerr << "Finished" << std::endl;
            state.is_finished = true;
        }
+
+        merging_elapsed_ns += watch.elapsedNanoseconds();
    }

 protected:
@ -156,7 +163,33 @@ protected:
    Algorithm algorithm;

    /// Profile info.
-    Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE};
+    UInt64 merging_elapsed_ns = 0;
+
+    void logMergedStats(ProfileEvents::Event elapsed_ms_event, std::string_view transform_message, LoggerPtr log) const
+    {
+        auto stats = algorithm.getMergedStats();
+
+        UInt64 elapsed_ms = merging_elapsed_ns / 1000000LL;
+        ProfileEvents::increment(elapsed_ms_event, elapsed_ms);
+
+        /// Don't print info for small parts (< 1M rows)
+        if (stats.rows < 1000000)
+            return;
+
+        double seconds = static_cast<double>(merging_elapsed_ns) / 1000000000ULL;
+
+        if (seconds == 0.0)
+        {
+            LOG_DEBUG(log, "{}, {} blocks, {} rows, {} bytes in 0 sec.",
+                transform_message, stats.blocks, stats.rows, stats.bytes);
+        }
+        else
+        {
+            LOG_DEBUG(log, "{}, {} blocks, {} rows, {} bytes in {} sec., {} rows/sec., {}/sec.",
+                transform_message, stats.blocks, stats.rows, stats.bytes,
+                seconds, stats.rows / seconds, ReadableSize(stats.bytes / seconds));
+        }
+    }

 private:
    using IMergingTransformBase::state;
--- a/src/Processors/Merges/MergingSortedTransform.cpp
+++ b/src/Processors/Merges/MergingSortedTransform.cpp
@ -1,9 +1,12 @@
 #include <Processors/Merges/MergingSortedTransform.h>
 #include <Processors/Transforms/ColumnGathererTransform.h>
 #include <IO/WriteBuffer.h>
-
 #include <Common/logger_useful.h>
-#include <Common/formatReadable.h>
+
+namespace ProfileEvents
+{
+    extern const Event MergingSortedMilliseconds;
+}

 namespace DB
 {
@ -18,7 +21,6 @@ MergingSortedTransform::MergingSortedTransform(
    UInt64 limit_,
    bool always_read_till_end_,
    WriteBuffer * out_row_sources_buf_,
-    bool quiet_,
    bool use_average_block_sizes,
    bool have_all_inputs_)
    : IMergingTransform(
@ -37,7 +39,6 @@ MergingSortedTransform::MergingSortedTransform(
        limit_,
        out_row_sources_buf_,
        use_average_block_sizes)
-    , quiet(quiet_)
 {
 }

@ -48,22 +49,7 @@ void MergingSortedTransform::onNewInput()

 void MergingSortedTransform::onFinish()
 {
-    if (quiet)
-        return;
-
-    const auto & merged_data = algorithm.getMergedData();
-
-    auto log = getLogger("MergingSortedTransform");
-
-    double seconds = total_stopwatch.elapsedSeconds();
-
-    if (seconds == 0.0)
-        LOG_DEBUG(log, "Merge sorted {} blocks, {} rows in 0 sec.", merged_data.totalChunks(), merged_data.totalMergedRows());
-    else
-        LOG_DEBUG(log, "Merge sorted {} blocks, {} rows in {} sec., {} rows/sec., {}/sec",
-            merged_data.totalChunks(), merged_data.totalMergedRows(), seconds,
-            merged_data.totalMergedRows() / seconds,
-            ReadableSize(merged_data.totalAllocatedBytes() / seconds));
+    logMergedStats(ProfileEvents::MergingSortedMilliseconds, "Merged sorted", getLogger("MergingSortedTransform"));
 }

 }
--- a/src/Processors/Merges/MergingSortedTransform.h
+++ b/src/Processors/Merges/MergingSortedTransform.h
@ -21,7 +21,6 @@ public:
        UInt64 limit_ = 0,
        bool always_read_till_end_ = false,
        WriteBuffer * out_row_sources_buf_ = nullptr,
-        bool quiet_ = false,
        bool use_average_block_sizes = false,
        bool have_all_inputs_ = true);

@ -30,9 +29,6 @@ public:
 protected:
    void onNewInput() override;
    void onFinish() override;
-
-private:
-    bool quiet = false;
 };

 }
--- a/src/Processors/Merges/ReplacingSortedTransform.h
+++ b/src/Processors/Merges/ReplacingSortedTransform.h
@ -3,6 +3,10 @@
 #include <Processors/Merges/IMergingTransform.h>
 #include <Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h>

+namespace ProfileEvents
+{
+    extern const Event ReplacingSortedMilliseconds;
+}

 namespace DB
 {
@ -38,6 +42,11 @@ public:
    }

    String getName() const override { return "ReplacingSorted"; }
+
+    void onFinish() override
+    {
+        logMergedStats(ProfileEvents::ReplacingSortedMilliseconds, "Replaced sorted", getLogger("ReplacingSortedTransform"));
+    }
 };

 }
--- a/src/Processors/Merges/SummingSortedTransform.h
+++ b/src/Processors/Merges/SummingSortedTransform.h
@ -3,6 +3,11 @@
 #include <Processors/Merges/IMergingTransform.h>
 #include <Processors/Merges/Algorithms/SummingSortedAlgorithm.h>

+namespace ProfileEvents
+{
+    extern const Event SummingSortedMilliseconds;
+}
+
 namespace DB
 {

@ -33,6 +38,11 @@ public:
    }

    String getName() const override { return "SummingSortedTransform"; }
+
+    void onFinish() override
+    {
+        logMergedStats(ProfileEvents::SummingSortedMilliseconds, "Summed sorted", getLogger("SummingSortedTransform"));
+    }
 };

 }
--- a/src/Processors/Merges/VersionedCollapsingTransform.h
+++ b/src/Processors/Merges/VersionedCollapsingTransform.h
@ -3,6 +3,10 @@
 #include <Processors/Merges/IMergingTransform.h>
 #include <Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h>

+namespace ProfileEvents
+{
+    extern const Event VersionedCollapsingSortedMilliseconds;
+}

 namespace DB
 {
@ -33,6 +37,11 @@ public:
    }

    String getName() const override { return "VersionedCollapsingTransform"; }
+
+    void onFinish() override
+    {
+        logMergedStats(ProfileEvents::VersionedCollapsingSortedMilliseconds, "Versioned collapsed sorted", getLogger("VersionedCollapsingTransform"));
+    }
 };

 }
--- a/src/Processors/Sources/PostgreSQLSource.cpp
+++ b/src/Processors/Sources/PostgreSQLSource.cpp
@ -35,9 +35,9 @@ PostgreSQLSource<T>::PostgreSQLSource(
    const Block & sample_block,
    UInt64 max_block_size_)
    : ISource(sample_block.cloneEmpty())
-    , query_str(query_str_)
    , max_block_size(max_block_size_)
    , connection_holder(std::move(connection_holder_))
+    , query_str(query_str_)
 {
    init(sample_block);
 }
@ -51,10 +51,10 @@ PostgreSQLSource<T>::PostgreSQLSource(
    UInt64 max_block_size_,
    bool auto_commit_)
    : ISource(sample_block.cloneEmpty())
-    , query_str(query_str_)
-    , tx(std::move(tx_))
    , max_block_size(max_block_size_)
    , auto_commit(auto_commit_)
+    , query_str(query_str_)
+    , tx(std::move(tx_))
 {
    init(sample_block);
 }
@ -204,15 +204,15 @@ PostgreSQLSource<T>::~PostgreSQLSource()
                  */
                stream->close();
            }
-
-            stream.reset();
-            tx.reset();
        }
        catch (...)
        {
            tryLogCurrentException(__PRETTY_FUNCTION__);
        }

+        stream.reset();
+        tx.reset();
+
        if (connection_holder)
            connection_holder->setBroken();
    }
--- a/src/Processors/Sources/PostgreSQLSource.h
+++ b/src/Processors/Sources/PostgreSQLSource.h
@ -38,14 +38,12 @@ protected:
        UInt64 max_block_size_,
        bool auto_commit_);

-    String query_str;
-    std::shared_ptr<T> tx;
-    std::unique_ptr<pqxx::stream_from> stream;
-
    Status prepare() override;

-    void onStart();
    Chunk generate() override;
+
+    void onStart();
+
    void onFinish();

 private:
@ -61,6 +59,12 @@ private:
    postgres::ConnectionHolderPtr connection_holder;

    std::unordered_map<size_t, PostgreSQLArrayInfo> array_info;
+
+protected:
+    String query_str;
+    /// tx and stream must be destroyed before connection_holder.
+    std::shared_ptr<T> tx;
+    std::unique_ptr<pqxx::stream_from> stream;
 };


--- a/src/Processors/Transforms/ColumnGathererTransform.cpp
+++ b/src/Processors/Transforms/ColumnGathererTransform.cpp
@ -1,11 +1,15 @@
 #include <Processors/Transforms/ColumnGathererTransform.h>
+#include <Common/ProfileEvents.h>
 #include <Common/logger_useful.h>
 #include <Common/typeid_cast.h>
 #include <Common/formatReadable.h>
 #include <Columns/ColumnSparse.h>
 #include <IO/WriteHelpers.h>
-#include <iomanip>

+namespace ProfileEvents
+{
+    extern const Event GatheringColumnMilliseconds;
+}

 namespace DB
 {
@ -33,6 +37,13 @@ ColumnGathererStream::ColumnGathererStream(
        throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There are no streams to gather");
 }

+void ColumnGathererStream::updateStats(const IColumn & column)
+{
+    merged_rows += column.size();
+    merged_bytes += column.allocatedBytes();
+    ++merged_blocks;
+}
+
 void ColumnGathererStream::initialize(Inputs inputs)
 {
    Columns source_columns;
@ -82,7 +93,9 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
        {
            res.addColumn(source_to_fully_copy->column);
        }
-        merged_rows += source_to_fully_copy->size;
+
+        updateStats(*source_to_fully_copy->column);
+
        source_to_fully_copy->pos = source_to_fully_copy->size;
        source_to_fully_copy = nullptr;
        return Status(std::move(res));
@ -96,8 +109,7 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
        {
            next_required_source = 0;
            Chunk res;
-            merged_rows += sources.front().column->size();
-            merged_bytes += sources.front().column->allocatedBytes();
+            updateStats(*sources.front().column);
            res.addColumn(std::move(sources.front().column));
            sources.front().pos = sources.front().size = 0;
            return Status(std::move(res));
@ -123,8 +135,8 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
    if (source_to_fully_copy && result_column->empty())
    {
        Chunk res;
-        merged_rows += source_to_fully_copy->column->size();
-        merged_bytes += source_to_fully_copy->column->allocatedBytes();
+        updateStats(*source_to_fully_copy->column);
+
        if (result_column->hasDynamicStructure())
        {
            auto col = result_column->cloneEmpty();
@ -140,13 +152,13 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
        return Status(std::move(res));
    }

-    auto col = result_column->cloneEmpty();
-    result_column.swap(col);
+    auto return_column = result_column->cloneEmpty();
+    result_column.swap(return_column);

    Chunk res;
-    merged_rows += col->size();
-    merged_bytes += col->allocatedBytes();
-    res.addColumn(std::move(col));
+    updateStats(*return_column);
+
+    res.addColumn(std::move(return_column));
    return Status(std::move(res), row_sources_buf.eof() && !source_to_fully_copy);
 }

@ -185,31 +197,10 @@ ColumnGathererTransform::ColumnGathererTransform(
            toString(header.columns()));
 }

-void ColumnGathererTransform::work()
-{
-    Stopwatch stopwatch;
-    IMergingTransform<ColumnGathererStream>::work();
-    elapsed_ns += stopwatch.elapsedNanoseconds();
-}
-
 void ColumnGathererTransform::onFinish()
 {
-    auto merged_rows = algorithm.getMergedRows();
-    auto merged_bytes = algorithm.getMergedRows();
-    /// Don't print info for small parts (< 10M rows)
-    if (merged_rows < 10000000)
-        return;
-
-    double seconds = static_cast<double>(elapsed_ns) / 1000000000ULL;
    const auto & column_name = getOutputPort().getHeader().getByPosition(0).name;
-
-    if (seconds == 0.0)
-        LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in 0 sec.",
-            column_name, static_cast<double>(merged_bytes) / merged_rows);
-    else
-        LOG_DEBUG(log, "Gathered column {} ({} bytes/elem.) in {} sec., {} rows/sec., {}/sec.",
-            column_name, static_cast<double>(merged_bytes) / merged_rows, seconds,
-            merged_rows / seconds, ReadableSize(merged_bytes / seconds));
+    logMergedStats(ProfileEvents::GatheringColumnMilliseconds, fmt::format("Gathered column {}", column_name), log);
 }

 }
--- a/src/Processors/Transforms/ColumnGathererTransform.h
+++ b/src/Processors/Transforms/ColumnGathererTransform.h
@ -72,10 +72,11 @@ public:
    template <typename Column>
    void gather(Column & column_res);

-    UInt64 getMergedRows() const { return merged_rows; }
-    UInt64 getMergedBytes() const { return merged_bytes; }
+    MergedStats getMergedStats() const override { return {.bytes = merged_bytes, .rows = merged_rows, .blocks = merged_blocks}; }

 private:
+    void updateStats(const IColumn & column);
+
    /// Cache required fields
    struct Source
    {
@ -105,6 +106,7 @@ private:
    ssize_t next_required_source = -1;
    UInt64 merged_rows = 0;
    UInt64 merged_bytes = 0;
+    UInt64 merged_blocks = 0;
 };

 class ColumnGathererTransform final : public IMergingTransform<ColumnGathererStream>
@ -120,12 +122,8 @@ public:

    String getName() const override { return "ColumnGathererTransform"; }

-    void work() override;
-
 protected:
    void onFinish() override;
-    UInt64 elapsed_ns = 0;
-
    LoggerPtr log;
 };

--- a/src/Processors/Transforms/MergeJoinTransform.cpp
+++ b/src/Processors/Transforms/MergeJoinTransform.cpp
@ -511,6 +511,16 @@ void MergeJoinAlgorithm::logElapsed(double seconds)
        stat.max_blocks_loaded);
 }

+IMergingAlgorithm::MergedStats MergeJoinAlgorithm::getMergedStats() const
+{
+    return
+    {
+        .bytes = stat.num_bytes[0] + stat.num_bytes[1],
+        .rows = stat.num_rows[0] + stat.num_rows[1],
+        .blocks = stat.num_blocks[0] + stat.num_blocks[1],
+    };
+}
+
 static void prepareChunk(Chunk & chunk)
 {
    if (!chunk)
@ -547,6 +557,7 @@ void MergeJoinAlgorithm::consume(Input & input, size_t source_num)
    {
        stat.num_blocks[source_num] += 1;
        stat.num_rows[source_num] += input.chunk.getNumRows();
+        stat.num_bytes[source_num] += input.chunk.allocatedBytes();
    }

    prepareChunk(input.chunk);
@ -1271,7 +1282,7 @@ MergeJoinTransform::MergeJoinTransform(

 void MergeJoinTransform::onFinish()
 {
-    algorithm.logElapsed(total_stopwatch.elapsedSeconds());
+    algorithm.logElapsed(static_cast<double>(merging_elapsed_ns) / 1000000000ULL);
 }

 }
--- a/src/Processors/Transforms/MergeJoinTransform.h
+++ b/src/Processors/Transforms/MergeJoinTransform.h
@ -245,6 +245,8 @@ public:
    void setAsofInequality(ASOFJoinInequality asof_inequality_);

    void logElapsed(double seconds);
+    MergedStats getMergedStats() const override;
+
 private:
    std::optional<Status> handleAnyJoinState();
    Status anyJoin();
@ -280,6 +282,7 @@ private:
    {
        size_t num_blocks[2] = {0, 0};
        size_t num_rows[2] = {0, 0};
+        size_t num_bytes[2] = {0, 0};

        size_t max_blocks_loaded = 0;
    };
--- a/src/Processors/Transforms/MergeSortingTransform.cpp
+++ b/src/Processors/Transforms/MergeSortingTransform.cpp
@ -185,7 +185,6 @@ void MergeSortingTransform::consume(Chunk chunk)

        if (!external_merging_sorted)
        {
-            bool quiet = false;
            bool have_all_inputs = false;
            bool use_average_block_sizes = false;

@ -199,7 +198,6 @@ void MergeSortingTransform::consume(Chunk chunk)
                    limit,
                    /*always_read_till_end_=*/ false,
                    nullptr,
-                    quiet,
                    use_average_block_sizes,
                    have_all_inputs);

--- a/src/Processors/Transforms/PasteJoinTransform.cpp
+++ b/src/Processors/Transforms/PasteJoinTransform.cpp
@ -58,6 +58,16 @@ static void prepareChunk(Chunk & chunk)
    chunk.setColumns(std::move(columns), num_rows);
 }

+IMergingAlgorithm::MergedStats PasteJoinAlgorithm::getMergedStats() const
+{
+    return
+    {
+        .bytes = stat.num_bytes[0] + stat.num_bytes[1],
+        .rows = stat.num_rows[0] + stat.num_rows[1],
+        .blocks = stat.num_blocks[0] + stat.num_blocks[1],
+    };
+}
+
 void PasteJoinAlgorithm::initialize(Inputs inputs)
 {
    if (inputs.size() != 2)
--- a/src/Processors/Transforms/PasteJoinTransform.h
+++ b/src/Processors/Transforms/PasteJoinTransform.h
@ -35,8 +35,7 @@ public:
    void initialize(Inputs inputs) override;
    void consume(Input & input, size_t source_num) override;
    Status merge() override;
-
-    void logElapsed(double seconds);
+    MergedStats getMergedStats() const override;

 private:
    Chunk createBlockWithDefaults(size_t source_num);
@ -55,6 +54,7 @@ private:
    {
        size_t num_blocks[2] = {0, 0};
        size_t num_rows[2] = {0, 0};
+        size_t num_bytes[2] = {0, 0};

        size_t max_blocks_loaded = 0;
    };
--- a/src/Processors/tests/gtest_full_sorting_join.cpp
+++ b/src/Processors/tests/gtest_full_sorting_join.cpp
@ -208,6 +208,12 @@ Block executePipeline(QueryPipeline && pipeline)
 template <typename T>
 void assertColumnVectorEq(const typename ColumnVector<T>::Container & expected, const Block & block, const std::string & name)
 {
+    if (expected.empty())
+    {
+        ASSERT_TRUE(block.columns() == 0);
+        return;
+    }
+
    const auto * actual = typeid_cast<const ColumnVector<T> *>(block.getByName(name).column.get());
    ASSERT_TRUE(actual) << "unexpected column type: " << block.getByName(name).column->dumpStructure() << "expected: " << typeid(ColumnVector<T>).name();

@ -230,6 +236,12 @@ void assertColumnVectorEq(const typename ColumnVector<T>::Container & expected,
 template <typename T>
 void assertColumnEq(const IColumn & expected, const Block & block, const std::string & name)
 {
+    if (expected.empty())
+    {
+        ASSERT_TRUE(block.columns() == 0);
+        return;
+    }
+
    const ColumnPtr & actual = block.getByName(name).column;
    ASSERT_TRUE(checkColumn<T>(*actual));
    ASSERT_TRUE(checkColumn<T>(expected));
--- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp
+++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp
@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest)
    EXPECT_EQ(pipe.numOutputPorts(), 3);

    auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
-        8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, false, true);
+        8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, true);

    pipe.addTransform(std::move(transform));

@ -125,7 +125,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes)
    EXPECT_EQ(pipe.numOutputPorts(), 3);

    auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
-        8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, false, true);
+        8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, true);

    pipe.addTransform(std::move(transform));

--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@ -103,6 +103,7 @@ namespace DB::ErrorCodes
    extern const int SUPPORT_IS_DISABLED;
    extern const int UNSUPPORTED_METHOD;
    extern const int USER_EXPIRED;
+    extern const int NETWORK_ERROR;
 }

 namespace
@ -254,8 +255,8 @@ void TCPHandler::runImpl()
    socket().setSendTimeout(send_timeout);
    socket().setNoDelay(true);

-    in = std::make_shared<ReadBufferFromPocoSocket>(socket(), read_event);
-    out = std::make_shared<WriteBufferFromPocoSocket>(socket(), write_event);
+    in = std::make_shared<ReadBufferFromPocoSocketChunked>(socket(), read_event);
+    out = std::make_shared<WriteBufferFromPocoSocketChunked>(socket(), write_event);

    /// Support for PROXY protocol
    if (parse_proxy_protocol && !receiveProxyHeader())
@ -280,6 +281,48 @@ void TCPHandler::runImpl()
        if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM)
            receiveAddendum();

+        {
+            /// Server side of chunked protocol negotiation.
+            /// Server advertises its protocol capabilities (separate for send and receive channels) by sending
+            /// in its 'Hello' response one of four types - chunked, notchunked, chunked_optional, notchunked_optional.
+            /// Not optional types are strict meaning that server only supports this type, optional means that
+            /// server prefer this type but capable to work in opposite.
+            /// Client selects which type it is going to communicate based on the settings from config or arguments,
+            /// and sends either "chunked" or "notchunked" protocol request in addendum section of handshake.
+            /// Client can detect if server's protocol capabilities are not compatible with client's settings (for example
+            /// server strictly requires chunked protocol but client's settings only allows notchunked protocol) - in such case
+            /// client should interrupt this connection. However if client continues with incompatible protocol type request, server
+            /// will send appropriate exception and disconnect client.
+
+            auto is_chunked = [](const String & chunked_srv_str, const String & chunked_cl_str, const String & direction)
+            {
+                bool chunked_srv = chunked_srv_str.starts_with("chunked");
+                bool optional_srv = chunked_srv_str.ends_with("_optional");
+                bool chunked_cl = chunked_cl_str.starts_with("chunked");
+
+                if (optional_srv)
+                    return chunked_cl;
+
+                if (chunked_cl != chunked_srv)
+                    throw NetException(
+                        ErrorCodes::NETWORK_ERROR,
+                        "Incompatible protocol: {} is {}, client requested {}",
+                        direction,
+                        chunked_srv ? "chunked" : "notchunked",
+                        chunked_cl ? "chunked" : "notchunked");
+
+                return chunked_srv;
+            };
+
+            bool out_chunked = is_chunked(server.config().getString("proto_caps.send", "notchunked"), proto_recv_chunked_cl, "send");
+            bool in_chunked = is_chunked(server.config().getString("proto_caps.recv", "notchunked"), proto_send_chunked_cl, "recv");
+
+            if (out_chunked)
+                out->enableChunked();
+            if (in_chunked)
+                in->enableChunked();
+        }
+
        if (!is_interserver_mode)
        {
            /// If session created, then settings in session context has been updated.
@ -321,7 +364,7 @@ void TCPHandler::runImpl()
        {
            Stopwatch idle_time;
            UInt64 timeout_ms = std::min(poll_interval, idle_connection_timeout) * 1000000;
-            while (tcp_server.isOpen() && !server.isCancelled() && !static_cast<ReadBufferFromPocoSocket &>(*in).poll(timeout_ms))
+            while (tcp_server.isOpen() && !server.isCancelled() && !in->poll(timeout_ms))
            {
                if (idle_time.elapsedSeconds() > idle_connection_timeout)
                {
@ -796,7 +839,7 @@ bool TCPHandler::readDataNext()
    /// We are waiting for a packet from the client. Thus, every `POLL_INTERVAL` seconds check whether we need to shut down.
    while (true)
    {
-        if (static_cast<ReadBufferFromPocoSocket &>(*in).poll(timeout_us))
+        if (in->poll(timeout_us))
        {
            /// If client disconnected.
            if (in->eof())
@ -1186,6 +1229,8 @@ void TCPHandler::processTablesStatusRequest()
    }

    response.write(*out, client_tcp_protocol_version);
+
+    out->finishChunk();
 }

 void TCPHandler::receiveUnexpectedTablesStatusRequest()
@ -1206,6 +1251,8 @@ void TCPHandler::sendPartUUIDs()

        writeVarUInt(Protocol::Server::PartUUIDs, *out);
        writeVectorBinary(uuids, *out);
+
+        out->finishChunk();
        out->next();
    }
 }
@ -1214,6 +1261,8 @@ void TCPHandler::sendPartUUIDs()
 void TCPHandler::sendReadTaskRequestAssumeLocked()
 {
    writeVarUInt(Protocol::Server::ReadTaskRequest, *out);
+
+    out->finishChunk();
    out->next();
 }

@ -1222,6 +1271,8 @@ void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRanges
 {
    writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out);
    announcement.serialize(*out);
+
+    out->finishChunk();
    out->next();
 }

@ -1230,6 +1281,8 @@ void TCPHandler::sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest re
 {
    writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out);
    request.serialize(*out);
+
+    out->finishChunk();
    out->next();
 }

@ -1238,6 +1291,8 @@ void TCPHandler::sendProfileInfo(const ProfileInfo & info)
 {
    writeVarUInt(Protocol::Server::ProfileInfo, *out);
    info.write(*out, client_tcp_protocol_version);
+
+    out->finishChunk();
    out->next();
 }

@ -1253,6 +1308,8 @@ void TCPHandler::sendTotals(const Block & totals)

        state.block_out->write(totals);
        state.maybe_compressed_out->next();
+
+        out->finishChunk();
        out->next();
    }
 }
@ -1269,6 +1326,8 @@ void TCPHandler::sendExtremes(const Block & extremes)

        state.block_out->write(extremes);
        state.maybe_compressed_out->next();
+
+        out->finishChunk();
        out->next();
    }
 }
@ -1286,6 +1345,8 @@ void TCPHandler::sendProfileEvents()
        writeStringBinary("", *out);

        state.profile_events_block_out->write(block);
+
+        out->finishChunk();
        out->next();

        auto elapsed_milliseconds = stopwatch.elapsedMilliseconds();
@ -1323,6 +1384,8 @@ void TCPHandler::sendTimezone()
    LOG_DEBUG(log, "TCPHandler::sendTimezone(): {}", tz);
    writeVarUInt(Protocol::Server::TimezoneUpdate, *out);
    writeStringBinary(tz, *out);
+
+    out->finishChunk();
    out->next();
 }

@ -1583,6 +1646,12 @@ void TCPHandler::receiveAddendum()

    if (!is_interserver_mode)
        session->setQuotaClientKey(quota_key);
+
+    if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+    {
+        readStringBinary(proto_send_chunked_cl, *in);
+        readStringBinary(proto_recv_chunked_cl, *in);
+    }
 }


@ -1616,6 +1685,11 @@ void TCPHandler::sendHello()
        writeStringBinary(server_display_name, *out);
    if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH)
        writeVarUInt(VERSION_PATCH, *out);
+    if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS)
+    {
+        writeStringBinary(server.config().getString("proto_caps.send", "notchunked"), *out);
+        writeStringBinary(server.config().getString("proto_caps.recv", "notchunked"), *out);
+    }
    if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES)
    {
        auto rules = server.context()->getAccessControl().getPasswordComplexityRules();
@ -1668,6 +1742,7 @@ bool TCPHandler::receivePacket()

        case Protocol::Client::Ping:
            writeVarUInt(Protocol::Server::Pong, *out);
+            out->finishChunk();
            out->next();
            return false;

@ -2197,7 +2272,7 @@ QueryState::CancellationStatus TCPHandler::getQueryCancellationStatus()
    after_check_cancelled.restart();

    /// During request execution the only packet that can come from the client is stopping the query.
-    if (static_cast<ReadBufferFromPocoSocket &>(*in).poll(0))
+    if (in->poll(0))
    {
        if (in->eof())
        {
@ -2248,19 +2323,33 @@ void TCPHandler::sendData(const Block & block)
        }

        writeVarUInt(Protocol::Server::Data, *out);
-        /// Send external table name (empty name is the main table)
-        writeStringBinary("", *out);

        /// For testing hedged requests
        if (block.rows() > 0 && query_context->getSettingsRef().sleep_in_send_data_ms.totalMilliseconds())
        {
+            /// This strange sequence is needed in case of chunked protocol is enabled, in order for client not to
+            /// hang on receiving of at least packet type - chunk will not be processed unless either chunk footer
+            /// or chunk continuation header is received - first 'next' is sending starting chunk containing packet type
+            /// and second 'next' is sending chunk continuation header.
+            out->next();
+            /// Send external table name (empty name is the main table)
+            writeStringBinary("", *out);
            out->next();
            std::chrono::milliseconds ms(query_context->getSettingsRef().sleep_in_send_data_ms.totalMilliseconds());
            std::this_thread::sleep_for(ms);
        }
+        else
+        {
+            /// Send external table name (empty name is the main table)
+            writeStringBinary("", *out);
+        }

        state.block_out->write(block);
-        state.maybe_compressed_out->next();
+
+        if (state.maybe_compressed_out != out)
+            state.maybe_compressed_out->next();
+
+        out->finishChunk();
        out->next();
    }
    catch (...)
@ -2296,6 +2385,8 @@ void TCPHandler::sendLogData(const Block & block)
    writeStringBinary("", *out);

    state.logs_block_out->write(block);
+
+    out->finishChunk();
    out->next();
 }

@ -2307,6 +2398,7 @@ void TCPHandler::sendTableColumns(const ColumnsDescription & columns)
    writeStringBinary("", *out);
    writeStringBinary(columns.toString(), *out);

+    out->finishChunk();
    out->next();
 }

@ -2316,6 +2408,8 @@ void TCPHandler::sendException(const Exception & e, bool with_stack_trace)

    writeVarUInt(Protocol::Server::Exception, *out);
    writeException(e, *out, with_stack_trace);
+
+    out->finishChunk();
    out->next();
 }

@ -2326,6 +2420,8 @@ void TCPHandler::sendEndOfStream()
    state.io.setAllDataSent();

    writeVarUInt(Protocol::Server::EndOfStream, *out);
+
+    out->finishChunk();
    out->next();
 }

@ -2344,6 +2440,8 @@ void TCPHandler::sendProgress()
    increment.elapsed_ns = current_elapsed_ns - state.prev_elapsed_ns;
    state.prev_elapsed_ns = current_elapsed_ns;
    increment.write(*out, client_tcp_protocol_version);
+
+    out->finishChunk();
    out->next();
 }

--- a/src/Server/TCPHandler.h
+++ b/src/Server/TCPHandler.h
@ -18,6 +18,8 @@
 #include <Interpreters/ProfileEventsExt.h>
 #include <Formats/NativeReader.h>
 #include <Formats/NativeWriter.h>
+#include <IO/ReadBufferFromPocoSocketChunked.h>
+#include <IO/WriteBufferFromPocoSocketChunked.h>

 #include "Core/Types.h"
 #include "IServer.h"
@ -186,6 +188,8 @@ private:
    UInt64 client_version_minor = 0;
    UInt64 client_version_patch = 0;
    UInt32 client_tcp_protocol_version = 0;
+    String proto_send_chunked_cl = "notchunked";
+    String proto_recv_chunked_cl = "notchunked";
    String quota_key;

    /// Connection settings, which are extracted from a context.
@ -204,8 +208,8 @@ private:
    ClientInfo::QueryKind query_kind = ClientInfo::QueryKind::NO_QUERY;

    /// Streams for reading/writing from/to client connection socket.
-    std::shared_ptr<ReadBuffer> in;
-    std::shared_ptr<WriteBuffer> out;
+    std::shared_ptr<ReadBufferFromPocoSocketChunked> in;
+    std::shared_ptr<WriteBufferFromPocoSocketChunked> out;

    ProfileEvents::Event read_event;
    ProfileEvents::Event write_event;
--- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp
@ -273,6 +273,8 @@ ConnectionPoolWithFailoverPtr DistributedAsyncInsertDirectoryQueue::createPool(c
            address.default_database,
            address.user,
            address.password,
+            address.proto_send_chunked,
+            address.proto_recv_chunked,
            address.quota_key,
            address.cluster,
            address.cluster_secret,
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -1662,11 +1662,9 @@ void IMergeTreeDataPart::loadColumns(bool require)
 }


-/// Project part / part with project parts / compact part doesn't support LWD.
 bool IMergeTreeDataPart::supportLightweightDeleteMutate() const
 {
-    return (part_type == MergeTreeDataPartType::Wide || part_type == MergeTreeDataPartType::Compact) &&
-        parent_part == nullptr && projection_parts.empty();
+    return (part_type == MergeTreeDataPartType::Wide || part_type == MergeTreeDataPartType::Compact);
 }

 bool IMergeTreeDataPart::hasLightweightDelete() const
--- a/src/Storages/MergeTree/MergeProgress.h
+++ b/src/Storages/MergeTree/MergeProgress.h
@ -8,10 +8,10 @@

 namespace ProfileEvents
 {
-    extern const Event MergesTimeMilliseconds;
    extern const Event MergedUncompressedBytes;
    extern const Event MergedRows;
-    extern const Event Merge;
+    extern const Event MutatedRows;
+    extern const Event MutatedUncompressedBytes;
 }

 namespace DB
@ -63,18 +63,17 @@ public:
    void updateWatch()
    {
        UInt64 watch_curr_elapsed = merge_list_element_ptr->watch.elapsed();
-        ProfileEvents::increment(ProfileEvents::MergesTimeMilliseconds, (watch_curr_elapsed - watch_prev_elapsed) / 1000000);
        watch_prev_elapsed = watch_curr_elapsed;
    }

-    void operator() (const Progress & value)
+    void operator()(const Progress & value)
    {
-        ProfileEvents::increment(ProfileEvents::MergedUncompressedBytes, value.read_bytes);
-        if (stage.is_first)
-        {
-            ProfileEvents::increment(ProfileEvents::MergedRows, value.read_rows);
-            ProfileEvents::increment(ProfileEvents::Merge);
-        }
+        if (merge_list_element_ptr->is_mutation)
+            updateProfileEvents(value, ProfileEvents::MutatedRows, ProfileEvents::MutatedUncompressedBytes);
+        else
+            updateProfileEvents(value, ProfileEvents::MergedRows, ProfileEvents::MergedUncompressedBytes);
+
+
        updateWatch();

        merge_list_element_ptr->bytes_read_uncompressed += value.read_bytes;
@ -90,6 +89,14 @@ public:
                std::memory_order_relaxed);
        }
    }
+
+private:
+    void updateProfileEvents(const Progress & value, ProfileEvents::Event rows_event, ProfileEvents::Event bytes_event) const
+    {
+        ProfileEvents::increment(bytes_event, value.read_bytes);
+        if (stage.is_first)
+            ProfileEvents::increment(rows_event, value.read_rows);
+    }
 };

 }
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@ -8,6 +8,7 @@
 #include <Common/logger_useful.h>
 #include <Common/ActionBlocker.h>
 #include <Core/Settings.h>
+#include <Common/ProfileEvents.h>
 #include <Processors/Transforms/CheckSortedTransform.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
 #include <Compression/CompressedWriteBuffer.h>
@ -39,6 +40,18 @@
 #include <Interpreters/MergeTreeTransaction.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>

+namespace ProfileEvents
+{
+    extern const Event Merge;
+    extern const Event MergedColumns;
+    extern const Event GatheredColumns;
+    extern const Event MergeTotalMilliseconds;
+    extern const Event MergeExecuteMilliseconds;
+    extern const Event MergeHorizontalStageExecuteMilliseconds;
+    extern const Event MergeVerticalStageExecuteMilliseconds;
+    extern const Event MergeProjectionStageExecuteMilliseconds;
+}
+
 namespace DB
 {

@ -169,6 +182,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColu

 bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
 {
+    ProfileEvents::increment(ProfileEvents::Merge);
+
    String local_tmp_prefix;
    if (global_ctx->need_prefix)
    {
@ -446,6 +461,13 @@ void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const Str

 MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::getContextForNextStage()
 {
+    /// Do not increment for projection stage because time is already accounted in main task.
+    if (global_ctx->parent_part == nullptr)
+    {
+        ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+        ProfileEvents::increment(ProfileEvents::MergeHorizontalStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    }
+
    auto new_ctx = std::make_shared<VerticalMergeRuntimeContext>();

    new_ctx->rows_sources_write_buf = std::move(ctx->rows_sources_write_buf);
@ -463,8 +485,14 @@ MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::g

 MergeTask::StageRuntimeContextPtr MergeTask::VerticalMergeStage::getContextForNextStage()
 {
-    auto new_ctx = std::make_shared<MergeProjectionsRuntimeContext>();
+    /// Do not increment for projection stage because time is already accounted in main task.
+    if (global_ctx->parent_part == nullptr)
+    {
+        ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+        ProfileEvents::increment(ProfileEvents::MergeVerticalStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    }

+    auto new_ctx = std::make_shared<MergeProjectionsRuntimeContext>();
    new_ctx->need_sync = std::move(ctx->need_sync);

    ctx.reset();
@ -474,9 +502,14 @@ MergeTask::StageRuntimeContextPtr MergeTask::VerticalMergeStage::getContextForNe

 bool MergeTask::ExecuteAndFinalizeHorizontalPart::execute()
 {
-    assert(subtasks_iterator != subtasks.end());
-    if ((this->**subtasks_iterator)())
-        return true;
+    chassert(subtasks_iterator != subtasks.end());
+
+    Stopwatch watch;
+    bool res = (this->**subtasks_iterator)();
+    ctx->elapsed_execute_ns += watch.elapsedNanoseconds();
+
+    if (res)
+        return res;

    /// Move to the next subtask in an array of subtasks
    ++subtasks_iterator;
@ -534,7 +567,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl()

 bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const
 {
-     /// No need to execute this part if it is horizontal merge.
+    /// No need to execute this part if it is horizontal merge.
    if (global_ctx->chosen_merge_algorithm != MergeAlgorithm::Vertical)
        return false;

@ -784,6 +817,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c

    /// Print overall profiling info. NOTE: it may duplicates previous messages
    {
+        ProfileEvents::increment(ProfileEvents::MergedColumns, global_ctx->merging_columns.size());
+        ProfileEvents::increment(ProfileEvents::GatheredColumns, global_ctx->gathering_columns.size());
+
        double elapsed_seconds = global_ctx->merge_list_element_ptr->watch.elapsedSeconds();
        LOG_DEBUG(ctx->log,
            "Merge sorted {} rows, containing {} columns ({} merged, {} gathered) in {} sec., {} rows/sec., {}/sec.",
@ -906,12 +942,29 @@ bool MergeTask::MergeProjectionsStage::finalizeProjectionsAndWholeMerge() const
    return false;
 }

+MergeTask::StageRuntimeContextPtr MergeTask::MergeProjectionsStage::getContextForNextStage()
+{
+    /// Do not increment for projection stage because time is already accounted in main task.
+    /// The projection stage has its own empty projection stage which may add a drift of several milliseconds.
+    if (global_ctx->parent_part == nullptr)
+    {
+        ProfileEvents::increment(ProfileEvents::MergeExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+        ProfileEvents::increment(ProfileEvents::MergeProjectionStageExecuteMilliseconds, ctx->elapsed_execute_ns / 1000000UL);
+    }
+
+    return nullptr;
+}

 bool MergeTask::VerticalMergeStage::execute()
 {
-    assert(subtasks_iterator != subtasks.end());
-    if ((this->**subtasks_iterator)())
-        return true;
+    chassert(subtasks_iterator != subtasks.end());
+
+    Stopwatch watch;
+    bool res = (this->**subtasks_iterator)();
+    ctx->elapsed_execute_ns += watch.elapsedNanoseconds();
+
+    if (res)
+        return res;

    /// Move to the next subtask in an array of subtasks
    ++subtasks_iterator;
@ -920,9 +973,14 @@ bool MergeTask::VerticalMergeStage::execute()

 bool MergeTask::MergeProjectionsStage::execute()
 {
-    assert(subtasks_iterator != subtasks.end());
-    if ((this->**subtasks_iterator)())
-        return true;
+    chassert(subtasks_iterator != subtasks.end());
+
+    Stopwatch watch;
+    bool res = (this->**subtasks_iterator)();
+    ctx->elapsed_execute_ns += watch.elapsedNanoseconds();
+
+    if (res)
+        return res;

    /// Move to the next subtask in an array of subtasks
    ++subtasks_iterator;
@ -969,12 +1027,26 @@ bool MergeTask::VerticalMergeStage::executeVerticalMergeForAllColumns() const

 bool MergeTask::execute()
 {
-    assert(stages_iterator != stages.end());
-    if ((*stages_iterator)->execute())
+    chassert(stages_iterator != stages.end());
+    const auto & current_stage = *stages_iterator;
+
+    if (current_stage->execute())
        return true;

-    /// Stage is finished, need initialize context for the next stage
-    auto next_stage_context = (*stages_iterator)->getContextForNextStage();
+    /// Stage is finished, need to initialize context for the next stage and update profile events.
+
+    UInt64 current_elapsed_ms = global_ctx->merge_list_element_ptr->watch.elapsedMilliseconds();
+    UInt64 stage_elapsed_ms = current_elapsed_ms - global_ctx->prev_elapsed_ms;
+    global_ctx->prev_elapsed_ms = current_elapsed_ms;
+
+    auto next_stage_context = current_stage->getContextForNextStage();
+
+    /// Do not increment for projection stage because time is already accounted in main task.
+    if (global_ctx->parent_part == nullptr)
+    {
+        ProfileEvents::increment(current_stage->getTotalTimeProfileEvent(), stage_elapsed_ms);
+        ProfileEvents::increment(ProfileEvents::MergeTotalMilliseconds, stage_elapsed_ms);
+    }

    /// Move to the next stage in an array of stages
    ++stages_iterator;
@ -1099,7 +1171,6 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
                /* limit_= */0,
                /* always_read_till_end_= */false,
                ctx->rows_sources_write_buf.get(),
-                true,
                ctx->blocks_are_granules_size);
            break;

--- a/src/Storages/MergeTree/MergeTask.h
+++ b/src/Storages/MergeTree/MergeTask.h
@ -3,6 +3,7 @@
 #include <list>
 #include <memory>

+#include <Common/ProfileEvents.h>
 #include <Common/filesystemHelpers.h>

 #include <Compression/CompressedReadBuffer.h>
@ -26,6 +27,12 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeTreeIndices.h>

+namespace ProfileEvents
+{
+    extern const Event MergeHorizontalStageTotalMilliseconds;
+    extern const Event MergeVerticalStageTotalMilliseconds;
+    extern const Event MergeProjectionStageTotalMilliseconds;
+}

 namespace DB
 {
@ -134,6 +141,7 @@ private:
    {
        virtual void setRuntimeContext(StageRuntimeContextPtr local, StageRuntimeContextPtr global) = 0;
        virtual StageRuntimeContextPtr getContextForNextStage() = 0;
+        virtual ProfileEvents::Event getTotalTimeProfileEvent() const = 0;
        virtual bool execute() = 0;
        virtual ~IStage() = default;
    };
@ -195,6 +203,7 @@ private:
        bool need_prefix;

        scope_guard temporary_directory_lock;
+        UInt64 prev_elapsed_ms{0};
    };

    using GlobalRuntimeContextPtr = std::shared_ptr<GlobalRuntimeContext>;
@ -233,6 +242,7 @@ private:
        /// Dependencies for next stages
        std::list<DB::NameAndTypePair>::const_iterator it_name_and_type;
        bool need_sync{false};
+        UInt64 elapsed_execute_ns{0};
    };

    using ExecuteAndFinalizeHorizontalPartRuntimeContextPtr = std::shared_ptr<ExecuteAndFinalizeHorizontalPartRuntimeContext>;
@ -256,7 +266,6 @@ private:

        ExecuteAndFinalizeHorizontalPartSubtasks::const_iterator subtasks_iterator = subtasks.begin();

-
        MergeAlgorithm chooseMergeAlgorithm() const;
        void createMergedStream();
        void extractMergingAndGatheringColumns() const;
@ -268,6 +277,7 @@ private:
        }

        StageRuntimeContextPtr getContextForNextStage() override;
+        ProfileEvents::Event getTotalTimeProfileEvent() const override { return ProfileEvents::MergeHorizontalStageTotalMilliseconds; }

        ExecuteAndFinalizeHorizontalPartRuntimeContextPtr ctx;
        GlobalRuntimeContextPtr global_ctx;
@ -307,6 +317,7 @@ private:
        QueryPipeline column_parts_pipeline;
        std::unique_ptr<PullingPipelineExecutor> executor;
        std::unique_ptr<CompressedReadBufferFromFile> rows_sources_read_buf{nullptr};
+        UInt64 elapsed_execute_ns{0};
    };

    using VerticalMergeRuntimeContextPtr = std::shared_ptr<VerticalMergeRuntimeContext>;
@ -321,6 +332,7 @@ private:
            global_ctx = static_pointer_cast<GlobalRuntimeContext>(global);
        }
        StageRuntimeContextPtr getContextForNextStage() override;
+        ProfileEvents::Event getTotalTimeProfileEvent() const override { return ProfileEvents::MergeVerticalStageTotalMilliseconds; }

        bool prepareVerticalMergeForAllColumns() const;
        bool executeVerticalMergeForAllColumns() const;
@ -361,6 +373,7 @@ private:
        MergeTasks::iterator projections_iterator;

        LoggerPtr log{getLogger("MergeTask::MergeProjectionsStage")};
+        UInt64 elapsed_execute_ns{0};
    };

    using MergeProjectionsRuntimeContextPtr = std::shared_ptr<MergeProjectionsRuntimeContext>;
@ -368,12 +381,15 @@ private:
    struct MergeProjectionsStage : public IStage
    {
        bool execute() override;
+
        void setRuntimeContext(StageRuntimeContextPtr local, StageRuntimeContextPtr global) override
        {
            ctx = static_pointer_cast<MergeProjectionsRuntimeContext>(local);
            global_ctx = static_pointer_cast<GlobalRuntimeContext>(global);
        }
-        StageRuntimeContextPtr getContextForNextStage() override { return nullptr; }
+
+        StageRuntimeContextPtr getContextForNextStage() override;
+        ProfileEvents::Event getTotalTimeProfileEvent() const override { return ProfileEvents::MergeProjectionStageTotalMilliseconds; }

        bool mergeMinMaxIndexAndPrepareProjections() const;
        bool executeProjections() const;
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@ -215,6 +215,7 @@ struct Settings;
    M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \
    /** Projection settings. */ \
    M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \
+    M(LightweightMutationProjectionMode, lightweight_mutation_projection_mode, LightweightMutationProjectionMode::THROW, "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop projections of this table's relevant parts, or rebuild the projections.", 0) \
    M(DeduplicateMergeProjectionMode, deduplicate_merge_projection_mode, DeduplicateMergeProjectionMode::THROW, "Whether to allow create projection for the table with non-classic MergeTree, if allowed, what is the action when merge, drop or rebuild.", 0) \

 #define MAKE_OBSOLETE_MERGE_TREE_SETTING(M, TYPE, NAME, DEFAULT) \
--- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
@ -254,6 +254,7 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit
            LOG_ERROR(log, "{}. Data after mutation is not byte-identical to data on another replicas. "
                           "We will download merged part from replica to force byte-identical result.", getCurrentExceptionMessage(false));

+            mutate_task->updateProfileEvents();
            write_part_log(ExecutionStatus::fromCurrentException("", true));

            if (storage.getSettings()->detach_not_byte_identical_parts)
@ -281,6 +282,7 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit
         */
    finish_callback = [storage_ptr = &storage]() { storage_ptr->merge_selecting_task->schedule(); };
    ProfileEvents::increment(ProfileEvents::ReplicatedPartMutations);
+    mutate_task->updateProfileEvents();
    write_part_log({});

    return true;
--- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp
@ -102,6 +102,7 @@ bool MutatePlainMergeTreeTask::executeStep()
                transaction.commit();

                storage.updateMutationEntriesErrors(future_part, true, "");
+                mutate_task->updateProfileEvents();
                write_part_log({});

                state = State::NEED_FINISH;
@ -114,6 +115,7 @@ bool MutatePlainMergeTreeTask::executeStep()
                PreformattedMessage exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false);
                LOG_ERROR(getLogger("MutatePlainMergeTreeTask"), exception_message);
                storage.updateMutationEntriesErrors(future_part, false, exception_message.text);
+                mutate_task->updateProfileEvents();
                write_part_log(ExecutionStatus::fromCurrentException("", true));
                tryLogCurrentException(__PRETTY_FUNCTION__);
                return false;
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@ -38,7 +38,13 @@

 namespace ProfileEvents
 {
-extern const Event MutateTaskProjectionsCalculationMicroseconds;
+    extern const Event MutationTotalParts;
+    extern const Event MutationUntouchedParts;
+    extern const Event MutationTotalMilliseconds;
+    extern const Event MutationExecuteMilliseconds;
+    extern const Event MutationAllPartColumns;
+    extern const Event MutationSomePartColumns;
+    extern const Event MutateTaskProjectionsCalculationMicroseconds;
 }

 namespace CurrentMetrics
@ -659,7 +665,7 @@ static NameSet collectFilesToSkip(
    const Block & updated_header,
    const std::set<MergeTreeIndexPtr> & indices_to_recalc,
    const String & mrk_extension,
-    const std::set<ProjectionDescriptionRawPtr> & projections_to_recalc,
+    const std::set<ProjectionDescriptionRawPtr> & projections_to_skip,
    const std::set<ColumnStatisticsPtr> & stats_to_recalc)
 {
    NameSet files_to_skip = source_part->getFileNamesWithoutChecksums();
@ -684,7 +690,7 @@ static NameSet collectFilesToSkip(
        }
    }

-    for (const auto & projection : projections_to_recalc)
+    for (const auto & projection : projections_to_skip)
        files_to_skip.insert(projection->getDirectoryName());

    for (const auto & stat : stats_to_recalc)
@ -1046,6 +1052,7 @@ struct MutationContext

    /// Whether we need to count lightweight delete rows in this mutation
    bool count_lightweight_deleted_rows;
+    UInt64 execute_elapsed_ns = 0;
 };

 using MutationContextPtr = std::shared_ptr<MutationContext>;
@ -1554,6 +1561,10 @@ private:
                removed_projections.insert(command.column_name);
        }

+        bool lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name);
+        bool lightweight_delete_drop = lightweight_delete_mode
+            && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP;
+
        const auto & projections = ctx->metadata_snapshot->getProjections();
        for (const auto & projection : projections)
        {
@ -1561,10 +1572,11 @@ private:
                continue;

            bool need_recalculate =
-                ctx->materialized_projections.contains(projection.name)
+                (ctx->materialized_projections.contains(projection.name)
                || (!is_full_part_storage
                    && ctx->source_part->hasProjection(projection.name)
-                    && !ctx->source_part->hasBrokenProjection(projection.name));
+                    && !ctx->source_part->hasBrokenProjection(projection.name)))
+                && !lightweight_delete_drop;

            if (need_recalculate)
            {
@ -1572,7 +1584,7 @@ private:
            }
            else
            {
-                if (ctx->source_part->checksums.has(projection.getDirectoryName()))
+                if (!lightweight_delete_mode && ctx->source_part->checksums.has(projection.getDirectoryName()))
                    entries_to_hardlink.insert(projection.getDirectoryName());
            }
        }
@ -2017,6 +2029,9 @@ MutateTask::MutateTask(

 bool MutateTask::execute()
 {
+    Stopwatch watch;
+    SCOPE_EXIT({ ctx->execute_elapsed_ns += watch.elapsedNanoseconds(); });
+
    switch (state)
    {
        case State::NEED_PREPARE:
@ -2050,6 +2065,15 @@ bool MutateTask::execute()
    return false;
 }

+void MutateTask::updateProfileEvents() const
+{
+    UInt64 total_elapsed_ms = (*ctx->mutate_entry)->watch.elapsedMilliseconds();
+    UInt64 execute_elapsed_ms = ctx->execute_elapsed_ns / 1000000UL;
+
+    ProfileEvents::increment(ProfileEvents::MutationTotalMilliseconds, total_elapsed_ms);
+    ProfileEvents::increment(ProfileEvents::MutationExecuteMilliseconds, execute_elapsed_ms);
+}
+
 static bool canSkipConversionToNullable(const MergeTreeDataPartPtr & part, const MutationCommand & command)
 {
    if (command.type != MutationCommand::READ_COLUMN)
@ -2112,6 +2136,7 @@ static bool canSkipMutationCommandForPart(const MergeTreeDataPartPtr & part, con

 bool MutateTask::prepare()
 {
+    ProfileEvents::increment(ProfileEvents::MutationTotalParts);
    MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry);

    if (ctx->future_part->parts.size() != 1)
@ -2174,6 +2199,7 @@ bool MutateTask::prepare()
            ctx->temporary_directory_lock = std::move(lock);
        }

+        ProfileEvents::increment(ProfileEvents::MutationUntouchedParts);
        promise.set_value(std::move(part));
        return false;
    }
@ -2198,6 +2224,8 @@ bool MutateTask::prepare()

    ctx->stage_progress = std::make_unique<MergeStageProgress>(1.0);

+    bool lightweight_delete_mode = false;
+
    if (!ctx->for_interpreter.empty())
    {
        /// Always disable filtering in mutations: we want to read and write all rows because for updates we rewrite only some of the
@ -2215,6 +2243,21 @@ bool MutateTask::prepare()
        ctx->mutating_pipeline_builder = ctx->interpreter->execute();
        ctx->updated_header = ctx->interpreter->getUpdatedHeader();
        ctx->progress_callback = MergeProgressCallback((*ctx->mutate_entry)->ptr(), ctx->watch_prev_elapsed, *ctx->stage_progress);
+
+        lightweight_delete_mode = ctx->updated_header.has(RowExistsColumn::name);
+        /// If under the condition of lightweight delete mode with rebuild option, add projections again here as we can only know
+        /// the condition as early as from here.
+        if (lightweight_delete_mode
+            && ctx->data->getSettings()->lightweight_mutation_projection_mode == LightweightMutationProjectionMode::REBUILD)
+        {
+            for (const auto & projection : ctx->metadata_snapshot->getProjections())
+            {
+                if (!ctx->source_part->hasProjection(projection.name))
+                    continue;
+
+                ctx->materialized_projections.insert(projection.name);
+            }
+        }
    }

    auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + ctx->future_part->name, ctx->space_reservation->getDisk(), 0);
@ -2256,7 +2299,7 @@ bool MutateTask::prepare()
    if (ctx->mutating_pipeline_builder.initialized())
        ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies());

-    if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->updated_header.has(RowExistsColumn::name))
+    if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && lightweight_delete_mode)
    {
        /// This mutation contains lightweight delete and we need to count the deleted rows,
        /// Reset existing_rows_count of new data part to 0 and it will be updated while writing _row_exists column
@ -2283,6 +2326,7 @@ bool MutateTask::prepare()
        ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::REMOVE_BLOBS;

        task = std::make_unique<MutateAllPartColumnsTask>(ctx);
+        ProfileEvents::increment(ProfileEvents::MutationAllPartColumns);
    }
    else /// TODO: check that we modify only non-key columns in this case.
    {
@ -2293,10 +2337,30 @@ bool MutateTask::prepare()
            ctx->context,
            ctx->materialized_indices);

-        ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(
-            ctx->source_part,
-            ctx->metadata_snapshot,
-            ctx->materialized_projections);
+        auto lightweight_mutation_projection_mode = ctx->data->getSettings()->lightweight_mutation_projection_mode;
+        bool lightweight_delete_drops_projections =
+            lightweight_mutation_projection_mode == LightweightMutationProjectionMode::DROP
+            || lightweight_mutation_projection_mode == LightweightMutationProjectionMode::THROW;
+
+        std::set<ProjectionDescriptionRawPtr> projections_to_skip_container;
+        auto * projections_to_skip = &projections_to_skip_container;
+
+        bool should_create_projections = !(lightweight_delete_mode && lightweight_delete_drops_projections);
+        /// Under lightweight delete mode, if option is drop, projections_to_recalc should be empty.
+        if (should_create_projections)
+        {
+            ctx->projections_to_recalc = MutationHelpers::getProjectionsToRecalculate(
+                ctx->source_part,
+                ctx->metadata_snapshot,
+                ctx->materialized_projections);
+
+            projections_to_skip = &ctx->projections_to_recalc;
+        }
+        else
+        {
+            for (const auto & projection : ctx->metadata_snapshot->getProjections())
+                projections_to_skip->insert(&projection);
+        }

        ctx->stats_to_recalc = MutationHelpers::getStatisticsToRecalculate(ctx->metadata_snapshot, ctx->materialized_statistics);

@ -2306,7 +2370,7 @@ bool MutateTask::prepare()
            ctx->updated_header,
            ctx->indices_to_recalc,
            ctx->mrk_extension,
-            ctx->projections_to_recalc,
+            *projections_to_skip,
            ctx->stats_to_recalc);

        ctx->files_to_rename = MutationHelpers::collectFilesForRenames(
@ -2322,6 +2386,7 @@ bool MutateTask::prepare()
        ctx->new_data_part->remove_tmp_policy = IMergeTreeDataPart::BlobsRemovalPolicyForTemporaryParts::ASK_KEEPER;

        task = std::make_unique<MutateSomePartColumnsTask>(ctx);
+        ProfileEvents::increment(ProfileEvents::MutationSomePartColumns);
    }

    return true;
--- a/src/Storages/MergeTree/MutateTask.h
+++ b/src/Storages/MergeTree/MutateTask.h
@ -39,6 +39,7 @@ public:
        bool need_prefix_);

    bool execute();
+    void updateProfileEvents() const;

    std::future<MergeTreeData::MutableDataPartPtr> getFuture()
    {
--- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
+++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp
@ -659,7 +659,7 @@ void PostgreSQLReplicationHandler::dropReplicationSlot(pqxx::nontransaction & tx

 void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx)
 {
-    std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", publication_name);
+    std::string query_str = fmt::format("DROP PUBLICATION IF EXISTS {}", doubleQuoteString(publication_name));
    tx.exec(query_str);
    LOG_DEBUG(log, "Dropped publication: {}", publication_name);
 }
@ -667,7 +667,7 @@ void PostgreSQLReplicationHandler::dropPublication(pqxx::nontransaction & tx)

 void PostgreSQLReplicationHandler::addTableToPublication(pqxx::nontransaction & ntx, const String & table_name)
 {
-    std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", publication_name, doubleQuoteWithSchema(table_name));
+    std::string query_str = fmt::format("ALTER PUBLICATION {} ADD TABLE ONLY {}", doubleQuoteString(publication_name), doubleQuoteWithSchema(table_name));
    ntx.exec(query_str);
    LOG_TRACE(log, "Added table {} to publication `{}`", doubleQuoteWithSchema(table_name), publication_name);
 }
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@ -16,6 +16,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
 #include <IO/Operators.h>
+#include <Storages/MergeTree/MergeTreeVirtualColumns.h>


 namespace DB
@ -334,10 +335,17 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
    NameSet required_ttl_columns;
    NameSet updated_ttl_columns;

-    auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set)
+    auto add_dependent_columns = [&updated_columns](const Names & required_columns, auto & to_set, bool is_projection = false)
    {
        for (const auto & dependency : required_columns)
        {
+            /// useful in the case of lightweight delete with wide part and option of rebuild projection
+            if (is_projection && updated_columns.contains(RowExistsColumn::name))
+            {
+                to_set.insert(required_columns.begin(), required_columns.end());
+                return true;
+            }
+
            if (updated_columns.contains(dependency))
            {
                to_set.insert(required_columns.begin(), required_columns.end());
@ -357,7 +365,7 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(
    for (const auto & projection : getProjections())
    {
        if (has_dependency(projection.name, ColumnDependency::PROJECTION))
-            add_dependent_columns(projection.getRequiredColumns(), projections_columns);
+            add_dependent_columns(projection.getRequiredColumns(), projections_columns, true);
    }

    auto add_for_rows_ttl = [&](const auto & expression, auto & to_set)
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -5704,7 +5704,8 @@ std::optional<QueryPipeline> StorageReplicatedMergeTree::distributedWriteFromClu
        {
            auto connection = std::make_shared<Connection>(
                node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(),
-                node.user, node.password, SSHKey(), /*jwt*/"", node.quota_key, node.cluster, node.cluster_secret,
+                node.user, node.password, node.proto_send_chunked, node.proto_recv_chunked,
+                SSHKey(), /*jwt*/"", node.quota_key, node.cluster, node.cluster_secret,
                "ParallelInsertSelectInititiator",
                node.compression,
                node.secure
--- a/src/Storages/System/StorageSystemContributors.generated.cpp
+++ b/src/Storages/System/StorageSystemContributors.generated.cpp
@ -457,6 +457,7 @@ const char * auto_contributors[] {
    "Gleb-Tretyakov",
    "GoGoWen2021",
    "Gosha Letov",
+    "Graham Campbell",
    "Gregory",
    "Grigorii Sokolik",
    "Grigory",
@ -472,6 +473,7 @@ const char * auto_contributors[] {
    "Habibullah Oladepo",
    "HaiBo Li",
    "Hakob Saghatelyan",
+    "Halersson Paris",
    "Hamoon",
    "Han Fei",
    "Han Shukai",
@ -541,6 +543,7 @@ const char * auto_contributors[] {
    "JackyWoo",
    "Jacob Hayes",
    "Jacob Herrington",
+    "Jacob Reckhard",
    "Jai Jhala",
    "Jake Bamrah",
    "Jake Liu",
@ -661,6 +664,7 @@ const char * auto_contributors[] {
    "LaurieLY",
    "Lee sungju",
    "Lemore",
+    "Lennard Eijsackers",
    "Leonardo Cecchi",
    "Leonardo Maciel",
    "Leonid Krylov",
@ -922,6 +926,7 @@ const char * auto_contributors[] {
    "Pervakov Grigorii",
    "Pervakov Grigory",
    "Peter",
+    "Peter Nguyen",
    "Petr Vasilev",
    "Pham Anh Tuan",
    "Philip Hallstrom",
@ -981,6 +986,7 @@ const char * auto_contributors[] {
    "Ronald Bradford",
    "Rory Crispin",
    "Roy Bellingan",
+    "Ruihang Xia",
    "Ruslan",
    "Ruslan Mardugalliamov",
    "Ruslan Savchenko",
@ -1000,9 +1006,11 @@ const char * auto_contributors[] {
    "Sami Kerola",
    "Samuel Chou",
    "Samuel Colvin",
+    "Samuele Guerrini",
    "San",
    "Sanjam Panda",
    "Sariel",
+    "Sasha Sheikin",
    "Saulius Valatka",
    "Sean Haynes",
    "Sean Lafferty",
@ -1202,6 +1210,7 @@ const char * auto_contributors[] {
    "Vladimir Makarov",
    "Vladimir Mihailenco",
    "Vladimir Smirnov",
+    "Vladimir Varankin",
    "Vladislav Rassokhin",
    "Vladislav Smirnov",
    "Vladislav V",
@ -1275,6 +1284,7 @@ const char * auto_contributors[] {
    "Zhichun Wu",
    "Zhiguo Zhou",
    "Zhipeng",
+    "Zhukova, Maria",
    "Zhuo Qiu",
    "Zijie Lu",
    "Zimu Li",
@ -1502,6 +1512,7 @@ const char * auto_contributors[] {
    "hchen9",
    "hcz",
    "hdhoang",
+    "heguangnan",
    "heleihelei",
    "helifu",
    "hendrik-m",
@ -1572,6 +1583,7 @@ const char * auto_contributors[] {
    "kevinyhzou",
    "kgurjev",
    "khamadiev",
+    "khodyrevyurii",
    "kigerzhang",
    "kirillikoff",
    "kmeaw",
@ -1787,6 +1799,7 @@ const char * auto_contributors[] {
    "ruslandoga",
    "ryzuo",
    "s-kat",
+    "sakulali",
    "sanjam",
    "santaux",
    "santrancisco",
@ -1804,6 +1817,7 @@ const char * auto_contributors[] {
    "shabroo",
    "shangshujie",
    "shedx",
+    "shiyer7474",
    "shuai-xu",
    "shuchaome",
    "shuyang",
@ -1901,6 +1915,7 @@ const char * auto_contributors[] {
    "wzl",
    "xPoSx",
    "xbthink",
+    "xc0derx",
    "xiao",
    "xiaolei565",
    "xiebin",
@ -1964,6 +1979,7 @@ const char * auto_contributors[] {
    "zkun",
    "zlx19950903",
    "zombee0",
+    "zoomxi",
    "zvonand",
    "zvrr",
    "zvvr",
--- a/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.cpp
+++ b/src/Storages/TimeSeries/TimeSeriesDefinitionNormalizer.cpp
@ -227,8 +227,11 @@ void TimeSeriesDefinitionNormalizer::addMissingColumns(ASTCreateQuery & create)
        /// We use Nullable(DateTime64(3)) as the default type of the `min_time` and `max_time` columns.
        /// It's nullable because it allows the aggregation (see aggregate_min_time_and_max_time) work correctly even
        /// for rows in the "tags" table which doesn't have `min_time` and `max_time` (because they have no matching rows in the "data" table).
-        make_new_column(TimeSeriesColumnNames::MinTime, make_nullable(timestamp_type));
-        make_new_column(TimeSeriesColumnNames::MaxTime, make_nullable(timestamp_type));
+
+        if (!is_next_column_named(TimeSeriesColumnNames::MinTime))
+            make_new_column(TimeSeriesColumnNames::MinTime, make_nullable(timestamp_type));
+        if (!is_next_column_named(TimeSeriesColumnNames::MaxTime))
+            make_new_column(TimeSeriesColumnNames::MaxTime, make_nullable(timestamp_type));
    }

    /// Add missing columns for the "metrics" table.
--- a/tests/ci/changelog.py
+++ b/tests/ci/changelog.py
@ -115,7 +115,6 @@ def get_descriptions(prs: PullRequests) -> Dict[str, List[Description]]:
        # pylint: enable=protected-access
        if repo_name not in repos:
            repos[repo_name] = pr.base.repo
-        in_changelog = False
        merge_commit = pr.merge_commit_sha
        if merge_commit is None:
            logging.warning("PR %s does not have merge-commit, skipping", pr.number)
@ -291,7 +290,7 @@ def generate_description(item: PullRequest, repo: Repository) -> Optional[Descri
    # Normalize bug fixes
    if (
        re.match(
-            r"(?i)bug\Wfix",
+            r".*(?i)bug\Wfix",
            category,
        )
        # Map "Critical Bug Fix" to "Bug fix" category for changelog
--- a/tests/ci/ci_utils.py
+++ b/tests/ci/ci_utils.py
@ -167,6 +167,11 @@ class GH:
        latest_branch = Shell.get_output(
            'gh pr list --label release --repo ClickHouse/ClickHouse --search "sort:created" -L1 --json headRefName'
        )
+        if latest_branch:
+            latest_branch = json.loads(latest_branch)[0]["headRefName"]
+        print(
+            f"Latest branch [{latest_branch}], release branch [{branch}], release latest [{latest_branch == branch}]"
+        )
        return latest_branch == branch


--- a/tests/ci/create_release.py
+++ b/tests/ci/create_release.py
@ -61,6 +61,7 @@ class ReleaseContextManager:
            # create initial release info
            self.release_info = ReleaseInfo(
                release_branch="NA",
+                release_type="NA",
                commit_sha=args.ref,
                release_tag="NA",
                version="NA",
@ -93,6 +94,7 @@ class ReleaseContextManager:
@dataclasses.dataclass
 class ReleaseInfo:
    version: str
+    release_type: str
    release_tag: str
    release_branch: str
    commit_sha: str
@ -131,7 +133,7 @@ class ReleaseInfo:
        return self

    def prepare(
-        self, commit_ref: str, release_type: str, skip_tag_check: bool
+        self, commit_ref: str, release_type: str, _skip_tag_check: bool
    ) -> "ReleaseInfo":
        version = None
        release_branch = None
@ -143,17 +145,18 @@ class ReleaseInfo:
        assert release_type in ("patch", "new")
        if release_type == "new":
            # check commit_ref is right and on a right branch
-            Shell.check(
-                f"git merge-base --is-ancestor {commit_ref} origin/master",
-                strict=True,
-                verbose=True,
-            )
+            if commit_ref != "master":
+                Shell.check(
+                    f"git merge-base --is-ancestor {commit_ref} origin/master",
+                    strict=True,
+                    verbose=True,
+                )
            with checkout(commit_ref):
                commit_sha = Shell.get_output_or_raise(f"git rev-list -n1 {commit_ref}")
                # Git() must be inside "with checkout" contextmanager
                git = Git()
                version = get_version_from_repo(git=git)
-                release_branch = "master"
+                release_branch = f"{version.major}.{version.minor}"
                expected_prev_tag = f"v{version.major}.{version.minor}.1.1-new"
                version.bump().with_description(VersionType.NEW)
                assert (
@ -204,10 +207,11 @@ class ReleaseInfo:
                expected_tag_prefix
            ) and git.latest_tag.endswith(expected_tag_suffix):
                pass
-            elif not skip_tag_check:
-                assert (
-                    False
-                ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]. Already Released?"
+            # TODO: uncomment and check with dry-run
+            # elif not skip_tag_check:
+            #     assert (
+            #         False
+            #     ), f"BUG: Unexpected latest tag [{git.latest_tag}] expected [{expected_tag_prefix}*{expected_tag_suffix}]. Already Released?"

            previous_release_sha = Shell.get_output_or_raise(
                f"git rev-list -n1 {previous_release_tag}"
@ -238,6 +242,7 @@ class ReleaseInfo:
        self.release_progress = ReleaseProgress.STARTED
        self.progress_status = ReleaseProgressDescription.OK
        self.latest = latest_release
+        self.release_type = release_type
        return self

    def push_release_tag(self, dry_run: bool) -> None:
@ -262,16 +267,15 @@ class ReleaseInfo:
    @staticmethod
    def _create_gh_label(label: str, color_hex: str, dry_run: bool) -> None:
        cmd = f"gh api repos/{CI.Envs.GITHUB_REPOSITORY}/labels -f name={label} -f color={color_hex}"
-        Shell.check(cmd, dry_run=dry_run, strict=True)
+        res = Shell.check(cmd, dry_run=dry_run, verbose=True)
+        if not res:
+            # not a critical error - do not fail. branch might be created already (recovery case)
+            print("WARNING: failed to create backport labels for the new branch")

    def push_new_release_branch(self, dry_run: bool) -> None:
-        assert (
-            self.release_branch == "master"
-        ), "New release branch can be created only for release type [new]"
        git = Git()
        version = get_version_from_repo(git=git)
-        new_release_branch = f"{version.major}.{version.minor}"
-        stable_release_type = version.get_stable_release_type()
+        new_release_branch = self.release_branch
        version_after_release = copy(version)
        version_after_release.bump()
        assert (
@ -285,11 +289,8 @@ class ReleaseInfo:
        print(
            f"Create and push new release branch [{new_release_branch}], commit [{self.commit_sha}]"
        )
-        with checkout(self.release_branch):
+        with checkout("master"):
            with checkout_new(new_release_branch):
-                pr_labels = f"--label {CI.Labels.RELEASE}"
-                if stable_release_type == VersionType.LTS:
-                    pr_labels += f" --label {CI.Labels.RELEASE_LTS}"
                cmd_push_branch = (
                    f"{GIT_PREFIX} push --set-upstream origin {new_release_branch}"
                )
@ -302,67 +303,108 @@ class ReleaseInfo:
        ReleaseInfo._create_gh_label(
            f"v{new_release_branch}-affected", "c2bfff", dry_run=dry_run
        )
-        Shell.check(
-            f"""gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Release pull request for branch {new_release_branch}'
-            --head {new_release_branch} {pr_labels}
-            --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.'
-            """,
-            dry_run=dry_run,
-            strict=True,
-            verbose=True,
-        )

    def get_version_bump_branch(self):
        return f"bump_version_{self.version}"

    def update_version_and_contributors_list(self, dry_run: bool) -> None:
-        # Bump version, update contributors list, create PR
-        branch_upd_version_contributors = self.get_version_bump_branch()
+        # Bump version, update contributors list, create on release branch
        with checkout(self.commit_sha):
            git = Git()
            version = get_version_from_repo(git=git)
-            if self.release_branch == "master":
+            if self.release_type == "patch":
+                assert (
+                    version.string == self.version
+                ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]"
+                version.bump_patch()
+            else:
+                version.reset_tweak()
+            version.with_description(version.get_stable_release_type())
+
+        with checkout(self.release_branch):
+            update_cmake_version(version)
+            update_contributors(raise_error=True)
+            cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'"
+            cmd_push_branch = f"{GIT_PREFIX} push"
+            Shell.check(
+                cmd_commit_version_upd, strict=True, dry_run=dry_run, verbose=True
+            )
+            Shell.check(cmd_push_branch, strict=True, dry_run=dry_run, verbose=True)
+            if dry_run:
+                Shell.check(
+                    f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
+                    verbose=True,
+                )
+                Shell.check(
+                    f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
+                    verbose=True,
+                )
+
+        # TODO: move to new GH step?
+        if self.release_type == "new":
+            print("Update version on master branch")
+            branch_upd_version_contributors = self.get_version_bump_branch()
+            with checkout(self.commit_sha):
+                git = Git()
+                version = get_version_from_repo(git=git)
                version.bump()
                version.with_description(VersionType.TESTING)
-            else:
-                version.with_description(version.get_stable_release_type())
-            assert (
-                version.string == self.version
-            ), f"BUG: version in release info does not match version in git commit, expected [{self.version}], got [{version.string}]"
-        with checkout(self.release_branch):
-            with checkout_new(branch_upd_version_contributors):
-                update_cmake_version(version)
-                update_contributors(raise_error=True)
-                cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'"
-                cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}"
-                actor = os.getenv("GITHUB_ACTOR", "") or "me"
-                body = f"Automatic version bump after release {self.release_tag}\n### Changelog category (leave one):\n- Not for changelog (changelog entry is not required)\n"
-                cmd_create_pr = f"gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base {self.release_branch} --body \"{body}\" --assignee {actor}"
+            with checkout("master"):
+                with checkout_new(branch_upd_version_contributors):
+                    update_cmake_version(version)
+                    update_contributors(raise_error=True)
+                    cmd_commit_version_upd = f"{GIT_PREFIX} commit '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}' -m 'Update autogenerated version to {self.version} and contributors'"
+                    cmd_push_branch = f"{GIT_PREFIX} push --set-upstream origin {branch_upd_version_contributors}"
+                    actor = os.getenv("GITHUB_ACTOR", "") or "me"
+                    body = f"Automatic version bump after release {self.release_tag}\n### Changelog category (leave one):\n- Not for changelog (changelog entry is not required)\n"
+                    cmd_create_pr = f"gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Update version after release' --head {branch_upd_version_contributors} --base master --body \"{body}\" --assignee {actor}"
+                    Shell.check(
+                        cmd_commit_version_upd,
+                        strict=True,
+                        dry_run=dry_run,
+                        verbose=True,
+                    )
+                    Shell.check(
+                        cmd_push_branch, strict=True, dry_run=dry_run, verbose=True
+                    )
+                    Shell.check(
+                        cmd_create_pr, strict=True, dry_run=dry_run, verbose=True
+                    )
+                    if dry_run:
+                        Shell.check(
+                            f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
+                            verbose=True,
+                        )
+                        Shell.check(
+                            f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
+                            verbose=True,
+                        )
+                        self.version_bump_pr = "dry-run"
+                    else:
+                        self.version_bump_pr = GH.get_pr_url_by_branch(
+                            branch=branch_upd_version_contributors
+                        )
+
+            # TODO: move to new GH step?
+            print("Create Release PR")
+            with checkout(self.release_branch):
+                pr_labels = f"--label {CI.Labels.RELEASE}"
+                if version.get_stable_release_type() == VersionType.LTS:
+                    pr_labels += f" --label {CI.Labels.RELEASE_LTS}"
                Shell.check(
-                    cmd_commit_version_upd, strict=True, dry_run=dry_run, verbose=True
+                    f"""gh pr create --repo {CI.Envs.GITHUB_REPOSITORY} --title 'Release pull request for branch {self.release_branch}' \
+                                --head {self.release_branch} {pr_labels} \
+                                --body 'This PullRequest is a part of ClickHouse release cycle. It is used by CI system only. Do not perform any changes with it.'""",
+                    dry_run=dry_run,
+                    strict=True,
+                    verbose=True,
                )
-                Shell.check(cmd_push_branch, strict=True, dry_run=dry_run, verbose=True)
-                Shell.check(cmd_create_pr, strict=True, dry_run=dry_run, verbose=True)
-                if dry_run:
-                    Shell.check(
-                        f"{GIT_PREFIX} diff '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
-                        verbose=True,
-                    )
-                    Shell.check(
-                        f"{GIT_PREFIX} checkout '{CMAKE_PATH}' '{CONTRIBUTORS_PATH}'",
-                        verbose=True,
-                    )
-                    self.version_bump_pr = "dry-run"
-                else:
-                    self.version_bump_pr = GH.get_pr_url_by_branch(
-                        branch=branch_upd_version_contributors
-                    )

    def get_change_log_branch(self):
        return f"auto/{self.release_tag}"

    def update_release_info(self, dry_run: bool) -> "ReleaseInfo":
-        if self.release_branch != "master":
+        if self.release_type == "patch":
            if not self.changelog_pr:
                branch = self.get_change_log_branch()
                if not dry_run:
@ -371,21 +413,22 @@ class ReleaseInfo:
                    url = "dry-run"
                print(f"ChangeLog PR url [{url}]")
                self.changelog_pr = url
-
-            if not self.version_bump_pr:
-                branch = self.get_version_bump_branch()
-                if not dry_run:
-                    url = GH.get_pr_url_by_branch(branch=branch)
-                else:
-                    url = "dry-run"
-                print(f"Version bump PR url [{url}]")
-                self.version_bump_pr = url
-
-            self.release_url = f"https://github.com/{CI.Envs.GITHUB_REPOSITORY}/releases/tag/{self.release_tag}"
-            print(f"Release url [{self.release_url}]")
-
            self.docker = f"docker run --rm clickhouse/clickhouse:{self.version} clickhouse --version"
+        else:
+            # new release branch - find version bump pr on a master branch
+            branch = self.get_version_bump_branch()
+            if not dry_run:
+                url = GH.get_pr_url_by_branch(branch=branch)
+            else:
+                url = "dry-run"
+            print(f"Version bump PR url [{url}]")
+            self.version_bump_pr = url
+
+        self.release_url = f"https://github.com/{CI.Envs.GITHUB_REPOSITORY}/releases/tag/{self.release_tag}"
+        print(f"Release url [{self.release_url}]")
+
        self.dump()
+
        return self

    def create_gh_release(self, packages_files: List[str], dry_run: bool) -> None:
@ -410,35 +453,40 @@ class ReleaseInfo:

    def merge_prs(self, dry_run: bool) -> None:
        repo = CI.Envs.GITHUB_REPOSITORY
-        assert self.version_bump_pr
-        if dry_run:
-            version_bump_pr_num = 12345
-        else:
-            version_bump_pr_num = int(self.version_bump_pr.split("/")[-1])
-        print("Merging Version bump PR")
-        res_1 = Shell.check(
-            f"gh pr merge {version_bump_pr_num} --repo {repo} --merge --auto",
-            verbose=True,
-            dry_run=dry_run,
-        )
-
-        res_2 = True
-        if not self.release_tag.endswith("-new"):
+        if self.release_type == "patch":
            assert self.changelog_pr
            print("Merging ChangeLog PR")
            if dry_run:
                changelog_pr_num = 23456
            else:
                changelog_pr_num = int(self.changelog_pr.split("/")[-1])
-            res_2 = Shell.check(
+            res = Shell.check(
                f"gh pr merge {changelog_pr_num} --repo {repo} --merge --auto",
                verbose=True,
                dry_run=dry_run,
            )
        else:
-            assert not self.changelog_pr
+            if not dry_run:
+                assert not self.changelog_pr
+            res = True

-        self.prs_merged = res_1 and res_2
+        if self.release_type == "new":
+            assert self.version_bump_pr
+            print("Merging Version Bump PR")
+            if dry_run:
+                version_bump_pr = 23456
+            else:
+                version_bump_pr = int(self.version_bump_pr.split("/")[-1])
+            res = res and Shell.check(
+                f"gh pr merge {version_bump_pr} --repo {repo} --merge --auto",
+                verbose=True,
+                dry_run=dry_run,
+            )
+        else:
+            if not dry_run:
+                assert not self.changelog_pr
+
+        self.prs_merged = res


 class RepoTypes:
@ -759,7 +807,7 @@ if __name__ == "__main__":
            release_info.prepare(
                commit_ref=args.ref,
                release_type=args.release_type,
-                skip_tag_check=args.skip_tag_check,
+                _skip_tag_check=args.skip_tag_check,
            )

    if args.download_packages:
--- a/tests/ci/docker_server.py
+++ b/tests/ci/docker_server.py
@ -70,7 +70,7 @@ def parse_args() -> argparse.Namespace:
    parser.add_argument(
        "--tag-type",
        type=str,
-        choices=("head", "release", "latest-release"),
+        choices=("head", "release", "release-latest"),
        default="head",
        help="defines required tags for resulting docker image. "
        "head - for master image (tag: head) "
--- a/tests/ci/version_helper.py
+++ b/tests/ci/version_helper.py
@ -85,6 +85,16 @@ class ClickHouseVersion:
            self._tweak = 1
        return self

+    def bump_patch(self) -> "ClickHouseVersion":
+        self._revision += 1
+        self._patch += 1
+        self._tweak = 1
+        return self
+
+    def reset_tweak(self) -> "ClickHouseVersion":
+        self._tweak = 1
+        return self
+
    def major_update(self) -> "ClickHouseVersion":
        if self._git is not None:
            self._git.update()
@ -104,13 +114,6 @@ class ClickHouseVersion:
            self.major, self.minor, self.patch + 1, self.revision, self._git
        )

-    def reset_tweak(self) -> "ClickHouseVersion":
-        if self._git is not None:
-            self._git.update()
-        return ClickHouseVersion(
-            self.major, self.minor, self.patch, self.revision, self._git, 1
-        )
-
    @property
    def major(self) -> int:
        return self._major
--- a/tests/fuzz/README.md
+++ b/tests/fuzz/README.md
@ -1,23 +0,0 @@
-The list of functions generated via the following query
-
-```
-    clickhouse client -q "SELECT * FROM (SELECT DISTINCT concat('\"', name, '\"') as res FROM system.functions ORDER BY name UNION ALL SELECT concat('\"', a.name, b.name, '\"') as res FROM system.functions as a CROSS JOIN system.aggregate_function_combinators as b WHERE a.is_aggregate = 1) ORDER BY res" > functions.dict
-```
-
-The list of datatypes generated via the following query:
-
-```
-    clickhouse client -q "SELECT DISTINCT concat('\"', name, '\"') as res FROM system.data_type_families ORDER BY name" > datatypes.dict
-```
-
-The list of keywords generated via the following query:
-
-```
-    clickhouse client -q "SELECT DISTINCT concat('\"', keyword, '\"') as res FROM system.keywords ORDER BY keyword" > key_words.dict
-```
-
-Then merge all dictionaries into one (all.dict)
-
-```
-    cat ./dictionaries/* | sort | uniq > all.dict
-```
--- a/tests/fuzz/all.dict
+++ b/tests/fuzz/all.dict
--- a/tests/fuzz/dictionaries/datatypes.dict
+++ b/tests/fuzz/dictionaries/datatypes.dict
@ -31,6 +31,7 @@
 "Decimal256"
 "Decimal32"
 "Decimal64"
+"Dynamic"
 "ENUM"
 "Enum"
 "Enum16"
@ -74,6 +75,7 @@
 "JSON"
 "LONGBLOB"
 "LONGTEXT"
+"LineString"
 "LowCardinality"
 "MEDIUMBLOB"
 "MEDIUMINT"
@ -81,6 +83,7 @@
 "MEDIUMINT UNSIGNED"
 "MEDIUMTEXT"
 "Map"
+"MultiLineString"
 "MultiPolygon"
 "NATIONAL CHAR"
 "NATIONAL CHAR VARYING"
@ -132,4 +135,3 @@
 "YEAR"
 "bool"
 "boolean"
-"Dynamic"
--- a/tests/fuzz/dictionaries/functions.dict
+++ b/tests/fuzz/dictionaries/functions.dict
@ -126,6 +126,7 @@
 "JSONHas"
 "JSONKey"
 "JSONLength"
+"JSONMergePatch"
 "JSONType"
 "JSON_ARRAY_LENGTH"
 "JSON_EXISTS"
@ -227,6 +228,8 @@
 "UTC_timestamp"
 "UUIDNumToString"
 "UUIDStringToNum"
+"UUIDToNum"
+"UUIDv7ToDateTime"
 "VAR_POP"
 "VAR_POPArgMax"
 "VAR_POPArgMin"
@ -263,6 +266,7 @@
 "YYYYMMDDhhmmssToDateTime"
 "YYYYMMDDhhmmssToDateTime64"
 "_CAST"
+"__actionName"
 "__bitBoolMaskAnd"
 "__bitBoolMaskOr"
 "__bitSwapLastTwo"
@ -660,6 +664,8 @@
 "base58Encode"
 "base64Decode"
 "base64Encode"
+"base64URLDecode"
+"base64URLEncode"
 "basename"
 "bin"
 "bitAnd"
@ -744,8 +750,15 @@
 "cbrt"
 "ceil"
 "ceiling"
+"changeDay"
+"changeHour"
+"changeMinute"
+"changeMonth"
+"changeSecond"
+"changeYear"
 "char"
 "cityHash64"
+"clamp"
 "coalesce"
 "concat"
 "concatAssumeInjective"
@ -970,6 +983,7 @@
 "current_date"
 "current_schemas"
 "current_timestamp"
+"current_user"
 "cutFragment"
 "cutIPv6"
 "cutQueryString"
@ -988,7 +1002,9 @@
 "dateDiff"
 "dateName"
 "dateTime64ToSnowflake"
+"dateTime64ToSnowflakeID"
 "dateTimeToSnowflake"
+"dateTimeToSnowflakeID"
 "dateTrunc"
 "date_diff"
 "decodeHTMLComponent"
@ -1032,6 +1048,21 @@
 "deltaSumTimestampSimpleState"
 "deltaSumTimestampState"
 "demangle"
+"denseRank"
+"denseRankArgMax"
+"denseRankArgMin"
+"denseRankArray"
+"denseRankDistinct"
+"denseRankForEach"
+"denseRankIf"
+"denseRankMap"
+"denseRankMerge"
+"denseRankNull"
+"denseRankOrDefault"
+"denseRankOrNull"
+"denseRankResample"
+"denseRankSimpleState"
+"denseRankState"
 "dense_rank"
 "dense_rankArgMax"
 "dense_rankArgMin"
@ -1108,8 +1139,11 @@
 "domainWithoutWWWRFC"
 "dotProduct"
 "dumpColumnStructure"
+"dynamicElement"
+"dynamicType"
 "e"
 "editDistance"
+"editDistanceUTF8"
 "empty"
 "emptyArrayDate"
 "emptyArrayDateTime"
@ -1334,14 +1368,17 @@
 "gccMurmurHash"
 "gcd"
 "generateRandomStructure"
+"generateSnowflakeID"
 "generateULID"
 "generateUUIDv4"
+"generateUUIDv7"
 "geoDistance"
 "geoToH3"
 "geoToS2"
 "geohashDecode"
 "geohashEncode"
 "geohashesInBox"
+"getClientHTTPHeader"
 "getMacro"
 "getOSKernelVersion"
 "getServerPort"
@ -1589,6 +1626,20 @@
 "groupBitmapXorSimpleState"
 "groupBitmapXorState"
 "groupConcat"
+"groupConcatArgMax"
+"groupConcatArgMin"
+"groupConcatArray"
+"groupConcatDistinct"
+"groupConcatForEach"
+"groupConcatIf"
+"groupConcatMap"
+"groupConcatMerge"
+"groupConcatNull"
+"groupConcatOrDefault"
+"groupConcatOrNull"
+"groupConcatResample"
+"groupConcatSimpleState"
+"groupConcatState"
 "groupUniqArray"
 "groupUniqArrayArgMax"
 "groupUniqArrayArgMin"
@ -1604,6 +1655,21 @@
 "groupUniqArrayResample"
 "groupUniqArraySimpleState"
 "groupUniqArrayState"
+"group_concat"
+"group_concatArgMax"
+"group_concatArgMin"
+"group_concatArray"
+"group_concatDistinct"
+"group_concatForEach"
+"group_concatIf"
+"group_concatMap"
+"group_concatMerge"
+"group_concatNull"
+"group_concatOrDefault"
+"group_concatOrNull"
+"group_concatResample"
+"group_concatSimpleState"
+"group_concatState"
 "h3CellAreaM2"
 "h3CellAreaRads2"
 "h3Distance"
@ -1660,6 +1726,8 @@
 "hasTokenCaseInsensitiveOrNull"
 "hasTokenOrNull"
 "hex"
+"hilbertDecode"
+"hilbertEncode"
 "histogram"
 "histogramArgMax"
 "histogramArgMin"
@ -1881,6 +1949,7 @@
 "less"
 "lessOrEquals"
 "levenshteinDistance"
+"levenshteinDistanceUTF8"
 "lgamma"
 "like"
 "ln"
@ -2498,10 +2567,44 @@
 "parseDateTimeInJodaSyntaxOrZero"
 "parseDateTimeOrNull"
 "parseDateTimeOrZero"
+"parseReadableSize"
+"parseReadableSizeOrNull"
+"parseReadableSizeOrZero"
 "parseTimeDelta"
+"partitionID"
 "partitionId"
 "path"
 "pathFull"
+"percentRank"
+"percentRankArgMax"
+"percentRankArgMin"
+"percentRankArray"
+"percentRankDistinct"
+"percentRankForEach"
+"percentRankIf"
+"percentRankMap"
+"percentRankMerge"
+"percentRankNull"
+"percentRankOrDefault"
+"percentRankOrNull"
+"percentRankResample"
+"percentRankSimpleState"
+"percentRankState"
+"percent_rank"
+"percent_rankArgMax"
+"percent_rankArgMin"
+"percent_rankArray"
+"percent_rankDistinct"
+"percent_rankForEach"
+"percent_rankIf"
+"percent_rankMap"
+"percent_rankMerge"
+"percent_rankNull"
+"percent_rankOrDefault"
+"percent_rankOrNull"
+"percent_rankResample"
+"percent_rankSimpleState"
+"percent_rankState"
 "pi"
 "plus"
 "pmod"
@ -2533,6 +2636,7 @@
 "positive_modulo"
 "pow"
 "power"
+"printf"
 "proportionsZTest"
 "protocol"
 "punycodeDecode"
@ -3103,6 +3207,8 @@
 "rankResample"
 "rankSimpleState"
 "rankState"
+"readWKTLineString"
+"readWKTMultiLineString"
 "readWKTMultiPolygon"
 "readWKTPoint"
 "readWKTPolygon"
@ -3340,6 +3446,8 @@
 "skewSampState"
 "sleep"
 "sleepEachRow"
+"snowflakeIDToDateTime"
+"snowflakeIDToDateTime64"
 "snowflakeToDateTime"
 "snowflakeToDateTime64"
 "soundex"
@ -3902,6 +4010,7 @@
 "truncate"
 "tryBase58Decode"
 "tryBase64Decode"
+"tryBase64URLDecode"
 "tryDecrypt"
 "tryIdnaEncode"
 "tryPunycodeDecode"
@ -3923,6 +4032,7 @@
 "tupleModuloByNumber"
 "tupleMultiply"
 "tupleMultiplyByNumber"
+"tupleNames"
 "tupleNegate"
 "tuplePlus"
 "tupleToNameValuePairs"
--- a/tests/fuzz/dictionaries/key_words.dict
+++ b/tests/fuzz/dictionaries/key_words.dict
@ -3,7 +3,7 @@
 "ADD CONSTRAINT"
 "ADD INDEX"
 "ADD PROJECTION"
-"ADD STATISTIC"
+"ADD STATISTICS"
 "ADMIN OPTION FOR"
 "AFTER"
 "ALGORITHM"
@ -76,7 +76,7 @@
 "CLEAR COLUMN"
 "CLEAR INDEX"
 "CLEAR PROJECTION"
-"CLEAR STATISTIC"
+"CLEAR STATISTICS"
 "CLUSTER"
 "CLUSTERS"
 "CN"
@ -110,6 +110,8 @@
 "CURRENTUSER"
 "CURRENT_USER"
 "D"
+"DATA"
+"DATA INNER UUID"
 "DATABASE"
 "DATABASES"
 "DATE"
@ -147,7 +149,7 @@
 "DROP PART"
 "DROP PARTITION"
 "DROP PROJECTION"
-"DROP STATISTIC"
+"DROP STATISTICS"
 "DROP TABLE"
 "DROP TEMPORARY TABLE"
 "ELSE"
@ -247,6 +249,7 @@
 "IS NULL"
 "IS_OBJECT_ID"
 "JOIN"
+"JWT"
 "KERBEROS"
 "KEY"
 "KEY BY"
@ -277,13 +280,15 @@
 "MATERIALIZE COLUMN"
 "MATERIALIZE INDEX"
 "MATERIALIZE PROJECTION"
-"MATERIALIZE STATISTIC"
+"MATERIALIZE STATISTICS"
 "MATERIALIZE TTL"
 "MATERIALIZED"
 "MAX"
 "MCS"
 "MEMORY"
 "MERGES"
+"METRICS"
+"METRICS INNER UUID"
 "MI"
 "MICROSECOND"
 "MICROSECONDS"
@ -297,12 +302,14 @@
 "MODIFY"
 "MODIFY COLUMN"
 "MODIFY COMMENT"
+"MODIFY DEFINER"
 "MODIFY ORDER BY"
 "MODIFY QUERY"
 "MODIFY REFRESH"
 "MODIFY SAMPLE BY"
 "MODIFY SETTING"
 "MODIFY SQL SECURITY"
+"MODIFY STATISTICS"
 "MODIFY TTL"
 "MONTH"
 "MONTHS"
@ -373,6 +380,7 @@
 "Protobuf"
 "Q"
 "QQ"
+"QUALIFY"
 "QUARTER"
 "QUARTERS"
 "QUERY"
@ -384,6 +392,7 @@
 "READONLY"
 "REALM"
 "RECOMPRESS"
+"RECURSIVE"
 "REFERENCES"
 "REFRESH"
 "REGEXP"
@ -415,6 +424,7 @@
 "SALT"
 "SAMPLE"
 "SAMPLE BY"
+"SAN"
 "SCHEME"
 "SECOND"
 "SECONDS"
@ -460,7 +470,8 @@
 "SS"
 "SSH_KEY"
 "SSL_CERTIFICATE"
-"STATISTIC"
+"START TRANSACTION"
+"STATISTICS"
 "STEP"
 "STORAGE"
 "STRICT"
@ -475,6 +486,8 @@
 "TABLE"
 "TABLE OVERRIDE"
 "TABLES"
+"TAGS"
+"TAGS INNER UUID"
 "TEMPORARY"
 "TEMPORARY TABLE"
 "TEST"
@ -529,6 +542,7 @@
 "WITH NAME"
 "WITH REPLACE OPTION"
 "WITH TIES"
+"WITH_ITEMINDEX"
 "WK"
 "WRITABLE"
 "WW"
@ -540,4 +554,3 @@
 "bagexpansion"
 "base_backup"
 "cluster_host_ids"
-"with_itemindex"
--- a/Show More
+++ b/Show More