Merge branch 'function_factorial' of https://github.com/bigo-sg/ClickHouse into function_factorial

2024-11-26 17:41:59 +00:00 · 2022-11-15 09:46:44 +08:00 · 2022-11-15 09:46:44 +08:00 · 7178d80c51
commit 7178d80c51
parent 5eba20e119 10449247fd
302 changed files with 11454 additions and 1823 deletions
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@ -16,7 +16,9 @@ endmacro()

 if (SANITIZE)
    if (SANITIZE STREQUAL "address")
-        set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
+        # LLVM-15 has a bug in Address Sanitizer, preventing the usage of 'sanitize-address-use-after-scope',
+        # see https://github.com/llvm/llvm-project/issues/58633
+        set (ASAN_FLAGS "-fsanitize=address -fno-sanitize-address-use-after-scope")
        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")

--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -80,6 +80,16 @@ RUN arch=${TARGETARCH:-amd64} \
    && mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
    && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client

+# Remove as much of Ubuntu as possible.
+# ClickHouse does not need Ubuntu. It can run on top of Linux kernel without any OS distribution.
+# ClickHouse does not need Docker at all. ClickHouse is above all that.
+# It does not care about Ubuntu, Docker, or other cruft and you should neither.
+# The fact that this Docker image is based on Ubuntu is just a misconception.
+# Some vulnerability scanners are arguing about Ubuntu, which is not relevant to ClickHouse at all.
+# ClickHouse does not care when you report false vulnerabilities by running some Docker scanners.
+
+RUN apt-get remove --purge -y libksba8 && apt-get autoremove -y
+
 # we need to allow "others" access to clickhouse folder, because docker container
 # can be started with arbitrary uid (openshift usecase)

--- a/docs/en/getting-started/example-datasets/github.md
+++ b/docs/en/getting-started/example-datasets/github.md
--- a/docs/en/getting-started/example-datasets/images/superset-authors-matrix.png
+++ b/docs/en/getting-started/example-datasets/images/superset-authors-matrix.png
--- a/docs/en/getting-started/example-datasets/images/superset-authors-matrix_v2.png
+++ b/docs/en/getting-started/example-datasets/images/superset-authors-matrix_v2.png
--- a/docs/en/getting-started/example-datasets/images/superset-commits-authors.png
+++ b/docs/en/getting-started/example-datasets/images/superset-commits-authors.png
--- a/docs/en/getting-started/example-datasets/images/superset-github-lines-added-deleted.png
+++ b/docs/en/getting-started/example-datasets/images/superset-github-lines-added-deleted.png
--- a/docs/en/operations/settings/permissions-for-queries.md
+++ b/docs/en/operations/settings/permissions-for-queries.md
@ -16,44 +16,54 @@ Queries in ClickHouse can be divided into several types:

 The following settings regulate user permissions by the type of query:

-   [readonly](#settings_readonly) — Restricts permissions for all types of queries except DDL queries.
-   [allow_ddl](#settings_allow_ddl) — Restricts permissions for DDL queries.
+## readonly
+Restricts permissions for read data, write data, and change settings queries.

-`KILL QUERY` can be performed with any settings.
+When set to 1, allows:

-## readonly {#settings_readonly}
+-   All types of read queries (like SELECT and equivalent queries).
+-   Queries that modify only session context (like USE).

-Restricts permissions for reading data, write data and change settings queries.
+When set to 2, allows the above plus:
+- SET and CREATE TEMPORARY TABLE

-See how the queries are divided into types [above](#permissions_for_queries).
+  :::tip
+  Queries like EXISTS, DESCRIBE, EXPLAIN, SHOW PROCESSLIST, etc are equivalent to SELECT, because they just do select from system tables.
+  :::

 Possible values:

-   0 — All queries are allowed.
-   1 — Only read data queries are allowed.
-   2 — Read data and change settings queries are allowed.
+-   0 — Read, Write, and Change settings queries are allowed.
+-   1 — Only Read data queries are allowed.
+-   2 — Read data and Change settings queries are allowed.

+Default value: 0
+
+:::note
 After setting `readonly = 1`, the user can’t change `readonly` and `allow_ddl` settings in the current session.

 When using the `GET` method in the [HTTP interface](../../interfaces/http.md), `readonly = 1` is set automatically. To modify data, use the `POST` method.

-Setting `readonly = 1` prohibit the user from changing all the settings. There is a way to prohibit the user from changing only specific settings. Also there is a way to allow changing only specific settings under `readonly = 1` restrictions. For details see [constraints on settings](../../operations/settings/constraints-on-settings.md).
+Setting `readonly = 1` prohibits the user from changing settings. There is a way to prohibit the user from changing only specific settings. Also there is a way to allow changing only specific settings under `readonly = 1` restrictions. For details see [constraints on settings](../../operations/settings/constraints-on-settings.md).
+:::

-Default value: 0

 ## allow_ddl {#settings_allow_ddl}

 Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries.

-See how the queries are divided into types [above](#permissions_for_queries).
-
 Possible values:

 -   0 — DDL queries are not allowed.
 -   1 — DDL queries are allowed.

-You can’t execute `SET allow_ddl = 1` if `allow_ddl = 0` for the current session.
-
 Default value: 1

-[Original article](https://clickhouse.com/docs/en/operations/settings/permissions_for_queries/) <!--hide-->
+:::note
+You cannot run `SET allow_ddl = 1` if `allow_ddl = 0` for the current session.
+:::
+
+
+:::note KILL QUERY
+`KILL QUERY` can be performed with any combination of readonly and allow_ddl settings.
+:::
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -3399,6 +3399,17 @@ Use schema from cache for URL with last modification time validation (for urls w

 Default value: `true`.

+## use_structure_from_insertion_table_in_table_functions {use_structure_from_insertion_table_in_table_functions}
+
+Use structure from insertion table instead of schema inference from data.
+
+Possible values:
+- 0 - disabled
+- 1 - enabled
+- 2 - auto
+
+Default value: 2.
+
 ## compatibility {#compatibility}

 This setting changes other settings according to provided ClickHouse version.
--- a/docs/en/sql-reference/functions/arithmetic-functions.md
+++ b/docs/en/sql-reference/functions/arithmetic-functions.md
@ -65,6 +65,11 @@ An exception is thrown when dividing by zero or when dividing a minimal negative

 Differs from [modulo](#modulo) in that it returns zero when the divisor is zero.

+## positive_modulo(a, b)
+Calculates the remainder when dividing `a` by `b`. Similar to function `modulo` except that `positive_modulo` always return non-negative number.
+
+Notice that `positive_modulo` is 4-5 times slower than `modulo`. You should not use `positive_modulo` unless you want to get positive result and don't care about performance too much.
+
 ## negate(a), -a operator

 Calculates a number with the reverse sign. The result is always signed.
--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@ -24,6 +24,11 @@ Returns a pseudo-random UInt64 number, evenly distributed among all UInt64-type

 Uses a linear congruential generator.

+## canonicalRand
+The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1).
+
+Non-deterministic. Return type is Float64.
+
 ## randConstant

 Produces a constant column with a random value.
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@ -6,21 +6,22 @@ sidebar_label: Splitting and Merging Strings and Arrays

 # Functions for Splitting and Merging Strings and Arrays

-## splitByChar(separator, s)
+## splitByChar(separator, s[, max_substrings])

-Splits a string into substrings separated by a specified character. It uses a constant string `separator` which consisting of exactly one character.
+Splits a string into substrings separated by a specified character. It uses a constant string `separator` which consists of exactly one character.
 Returns an array of selected substrings. Empty substrings may be selected if the separator occurs at the beginning or end of the string, or if there are multiple consecutive separators.

 **Syntax**

 ``` sql
-splitByChar(separator, s)
+splitByChar(separator, s[, max_substrings]))
 ```

 **Arguments**

 -   `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md).
 -   `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+-   `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.

 **Returned value(s)**

@ -44,20 +45,22 @@ SELECT splitByChar(',', '1,2,3,abcde');
 └─────────────────────────────────┘
 ```

-## splitByString(separator, s)
+## splitByString(separator, s[, max_substrings])

 Splits a string into substrings separated by a string. It uses a constant string `separator` of multiple characters as the separator. If the string `separator` is empty, it will split the string `s` into an array of single characters.

 **Syntax**

 ``` sql
-splitByString(separator, s)
+splitByString(separator, s[, max_substrings]))
 ```

 **Arguments**

 -   `separator` — The separator. [String](../../sql-reference/data-types/string.md).
 -   `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+-   `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+

 **Returned value(s)**

@ -91,20 +94,22 @@ SELECT splitByString('', 'abcde');
 └────────────────────────────┘
 ```

-## splitByRegexp(regexp, s)
+## splitByRegexp(regexp, s[, max_substrings])

 Splits a string into substrings separated by a regular expression. It uses a regular expression string `regexp` as the separator. If the `regexp` is empty, it will split the string `s` into an array of single characters. If no match is found for this regular expression, the string `s` won't be split.

 **Syntax**

 ``` sql
-splitByRegexp(regexp, s)
+splitByRegexp(regexp, s[, max_substrings]))
 ```

 **Arguments**

 -   `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 -   `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+-   `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+

 **Returned value(s)**

@ -146,7 +151,7 @@ Result:
 └────────────────────────────┘
 ```

-## splitByWhitespace(s)
+## splitByWhitespace(s[, max_substrings])

 Splits a string into substrings separated by whitespace characters. 
 Returns an array of selected substrings.
@ -154,12 +159,14 @@ Returns an array of selected substrings.
 **Syntax**

 ``` sql
-splitByWhitespace(s)
+splitByWhitespace(s[, max_substrings]))
 ```

 **Arguments**

 -   `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+-   `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+

 **Returned value(s)**

@ -179,7 +186,7 @@ SELECT splitByWhitespace('  1!  a,  b.  ');
 └─────────────────────────────────────┘
 ```

-## splitByNonAlpha(s)
+## splitByNonAlpha(s[, max_substrings])

 Splits a string into substrings separated by whitespace and punctuation characters. 
 Returns an array of selected substrings.
@ -187,12 +194,14 @@ Returns an array of selected substrings.
 **Syntax**

 ``` sql
-splitByNonAlpha(s)
+splitByNonAlpha(s[, max_substrings]))
 ```

 **Arguments**

 -   `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+-   `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+

 **Returned value(s)**

@ -217,10 +226,28 @@ SELECT splitByNonAlpha('  1!  a,  b.  ');
 Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default.
 Returns the string.

-## alphaTokens(s)
+## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings])

 Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings.

+**Syntax**
+
+``` sql
+alphaTokens(s[, max_substrings]))
+splitByAlpha(s[, max_substrings])
+```
+
+**Arguments**
+
+-   `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+-   `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+
+**Returned value(s)**
+
+Returns an array of selected substrings.
+
+Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+
 **Example**

 ``` sql
--- a/docs/en/sql-reference/statements/set-role.md
+++ b/docs/en/sql-reference/statements/set-role.md
@ -41,7 +41,7 @@ Purge default roles from a user:
 SET DEFAULT ROLE NONE TO user
 ```

-Set all the granted roles as default excepting some of them:
+Set all the granted roles as default except for specific roles `role1` and `role2`:

 ``` sql
 SET DEFAULT ROLE ALL EXCEPT role1, role2 TO user
--- a/docs/zh/introduction/distinctive-features.md
+++ b/docs/zh/introduction/distinctive-features.md
@ -67,7 +67,7 @@ ClickHouse提供各种各样在允许牺牲数据精度的情况下对查询进
 2.  基于数据的部分样本进行近似查询。这时，仅会从磁盘检索少部分比例的数据。
 3.  不使用全部的聚合条件，通过随机选择有限个数据聚合条件进行聚合。这在数据聚合条件满足某些分布条件下，在提供相当准确的聚合结果的同时降低了计算资源的使用。

-## Adaptive Join Algorithm {#adaptive-join-algorithm}
+## 自适应连接算法 {#adaptive-join-algorithm}

 ClickHouse支持自定义[JOIN](../sql-reference/statements/select/join.md)多个表，它更倾向于散列连接算法，如果有多个大表，则使用合并-连接算法

--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -243,6 +243,7 @@ try
    registerAggregateFunctions();

    processConfig();
+    initTtyBuffer(toProgressOption(config().getString("progress", "default")));

    /// Includes delayed_interactive.
    if (is_interactive)
@ -1088,8 +1089,6 @@ void Client::processConfig()
    }
    else
    {
-        std::string progress = config().getString("progress", "tty");
-        need_render_progress = (Poco::icompare(progress, "off") && Poco::icompare(progress, "no") && Poco::icompare(progress, "false") && Poco::icompare(progress, "0"));
        echo_queries = config().getBool("echo", false);
        ignore_error = config().getBool("ignore-error", false);

--- a/programs/git-import/git-import.cpp
+++ b/programs/git-import/git-import.cpp
@ -351,7 +351,7 @@ struct LineChange
            ++pos;
        }

-        indent = std::max(255U, num_spaces);
+        indent = std::min(255U, num_spaces);
        line.assign(pos, end);

        if (pos == end)
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -37,6 +37,7 @@
 #include <AggregateFunctions/registerAggregateFunctions.h>
 #include <TableFunctions/registerTableFunctions.h>
 #include <Storages/registerStorages.h>
+#include <Storages/NamedCollections.h>
 #include <Dictionaries/registerDictionaries.h>
 #include <Disks/registerDisks.h>
 #include <Formats/registerFormats.h>
@ -118,6 +119,8 @@ void LocalServer::initialize(Poco::Util::Application & self)
        config().getUInt("max_io_thread_pool_size", 100),
        config().getUInt("max_io_thread_pool_free_size", 0),
        config().getUInt("io_thread_pool_queue_size", 10000));
+
+    NamedCollectionFactory::instance().initialize(config());
 }


@ -414,6 +417,8 @@ try
    registerFormats();

    processConfig();
+    initTtyBuffer(toProgressOption(config().getString("progress", "default")));
+
    applyCmdSettings(global_context);

    if (is_interactive)
@ -489,8 +494,6 @@ void LocalServer::processConfig()
    }
    else
    {
-        std::string progress = config().getString("progress", "tty");
-        need_render_progress = (Poco::icompare(progress, "off") && Poco::icompare(progress, "no") && Poco::icompare(progress, "false") && Poco::icompare(progress, "0"));
        echo_queries = config().hasOption("echo") || config().hasOption("verbose");
        ignore_error = config().getBool("ignore-error", false);
        is_multiquery = true;
--- a/programs/server/MetricsTransmitter.cpp
+++ b/programs/server/MetricsTransmitter.cpp
@ -123,7 +123,7 @@ void MetricsTransmitter::transmit(std::vector<ProfileEvents::Count> & prev_count
    {
        for (const auto & name_value : async_metrics_values)
        {
-            key_vals.emplace_back(asynchronous_metrics_path_prefix + name_value.first, name_value.second);
+            key_vals.emplace_back(asynchronous_metrics_path_prefix + name_value.first, name_value.second.value);
        }
    }

--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -60,6 +60,7 @@
 #include <Storages/System/attachInformationSchemaTables.h>
 #include <Storages/Cache/ExternalDataSourceCache.h>
 #include <Storages/Cache/registerRemoteFileMetadatas.h>
+#include <Storages/NamedCollections.h>
 #include <AggregateFunctions/registerAggregateFunctions.h>
 #include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
 #include <Functions/registerFunctions.h>
@ -732,6 +733,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
        config().getUInt("max_io_thread_pool_free_size", 0),
        config().getUInt("io_thread_pool_queue_size", 10000));

+    NamedCollectionFactory::instance().initialize(config());
+
    /// Initialize global local cache for remote filesystem.
    if (config().has("local_cache_for_remote_fs"))
    {
@ -805,41 +808,43 @@ int Server::main(const std::vector<std::string> & /*args*/)
        /// that are interpreted (not executed) but can alter the behaviour of the program as well.

        /// Please keep the below log messages in-sync with the ones in daemon/BaseDaemon.cpp
-
-        String calculated_binary_hash = getHashOfLoadedBinaryHex();
-
        if (stored_binary_hash.empty())
        {
-            LOG_WARNING(log, "Integrity check of the executable skipped because the reference checksum could not be read."
-                " (calculated checksum: {})", calculated_binary_hash);
-        }
-        else if (calculated_binary_hash == stored_binary_hash)
-        {
-            LOG_INFO(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
+            LOG_WARNING(log, "Integrity check of the executable skipped because the reference checksum could not be read.");
        }
        else
        {
-            /// If program is run under debugger, ptrace will fail.
-            if (ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == -1)
+            String calculated_binary_hash = getHashOfLoadedBinaryHex();
+            if (calculated_binary_hash == stored_binary_hash)
            {
-                /// Program is run under debugger. Modification of it's binary image is ok for breakpoints.
-                global_context->addWarningMessage(
-                    fmt::format("Server is run under debugger and its binary image is modified (most likely with breakpoints).",
-                    calculated_binary_hash)
-                );
+                LOG_INFO(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
            }
            else
            {
-                throw Exception(ErrorCodes::CORRUPTED_DATA,
-                    "Calculated checksum of the executable ({0}) does not correspond"
-                    " to the reference checksum stored in the executable ({1})."
-                    " This may indicate one of the following:"
-                    " - the executable {2} was changed just after startup;"
-                    " - the executable {2} was corrupted on disk due to faulty hardware;"
-                    " - the loaded executable was corrupted in memory due to faulty hardware;"
-                    " - the file {2} was intentionally modified;"
-                    " - a logical error in the code."
-                    , calculated_binary_hash, stored_binary_hash, executable_path);
+                /// If program is run under debugger, ptrace will fail.
+                if (ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == -1)
+                {
+                    /// Program is run under debugger. Modification of it's binary image is ok for breakpoints.
+                    global_context->addWarningMessage(fmt::format(
+                        "Server is run under debugger and its binary image is modified (most likely with breakpoints).",
+                        calculated_binary_hash));
+                }
+                else
+                {
+                    throw Exception(
+                        ErrorCodes::CORRUPTED_DATA,
+                        "Calculated checksum of the executable ({0}) does not correspond"
+                        " to the reference checksum stored in the executable ({1})."
+                        " This may indicate one of the following:"
+                        " - the executable {2} was changed just after startup;"
+                        " - the executable {2} was corrupted on disk due to faulty hardware;"
+                        " - the loaded executable was corrupted in memory due to faulty hardware;"
+                        " - the file {2} was intentionally modified;"
+                        " - a logical error in the code.",
+                        calculated_binary_hash,
+                        stored_binary_hash,
+                        executable_path);
+                }
            }
        }
    }
@ -1279,6 +1284,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
 #if USE_SSL
            CertificateReloader::instance().tryLoad(*config);
 #endif
+            NamedCollectionFactory::instance().reload(*config);
            ProfileEvents::increment(ProfileEvents::MainConfigLoads);

            /// Must be the last.
@ -1486,11 +1492,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
 #endif

    SCOPE_EXIT({
-        /// Stop reloading of the main config. This must be done before `global_context->shutdown()` because
-        /// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart.
-        main_config_reloader.reset();
-        access_control.stopPeriodicReloading();
-
        async_metrics.stop();

        /** Ask to cancel background jobs all table engines,
@ -1789,10 +1790,17 @@ int Server::main(const std::vector<std::string> & /*args*/)

        SCOPE_EXIT_SAFE({
            LOG_DEBUG(log, "Received termination signal.");
-            LOG_DEBUG(log, "Waiting for current connections to close.");
+
+            /// Stop reloading of the main config. This must be done before everything else because it
+            /// can try to access/modify already deleted objects.
+            /// E.g. it can recreate new servers or it may pass a changed config to some destroyed parts of ContextSharedPart.
+            main_config_reloader.reset();
+            access_control.stopPeriodicReloading();

            is_cancelled = true;

+            LOG_DEBUG(log, "Waiting for current connections to close.");
+
            size_t current_connections = 0;
            {
                std::lock_guard lock(servers_lock);
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@ -130,6 +130,7 @@ enum class AccessType
    M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", TABLE, SHOW_ACCESS) \
    M(SHOW_QUOTAS, "SHOW CREATE QUOTA", GLOBAL, SHOW_ACCESS) \
    M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \
+    M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", GLOBAL, SHOW_ACCESS) \
    M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \
    M(ACCESS_MANAGEMENT, "", GROUP, ALL) \
    \
--- a/src/Access/ContextAccess.cpp
+++ b/src/Access/ContextAccess.cpp
@ -379,12 +379,12 @@ std::shared_ptr<const EnabledRowPolicies> ContextAccess::getEnabledRowPolicies()
    return no_row_policies;
 }

-ASTPtr ContextAccess::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type, const ASTPtr & combine_with_expr) const
+RowPolicyFilterPtr ContextAccess::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type, RowPolicyFilterPtr combine_with_filter) const
 {
    std::lock_guard lock{mutex};
    if (enabled_row_policies)
-        return enabled_row_policies->getFilter(database, table_name, filter_type, combine_with_expr);
-    return nullptr;
+        return enabled_row_policies->getFilter(database, table_name, filter_type, combine_with_filter);
+    return combine_with_filter;
 }

 std::shared_ptr<const EnabledQuota> ContextAccess::getQuota() const
@ -465,6 +465,17 @@ std::shared_ptr<const AccessRights> ContextAccess::getAccessRightsWithImplicit()
 template <bool throw_if_denied, bool grant_option, typename... Args>
 bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... args) const
 {
+    if (user_was_dropped)
+    {
+        /// If the current user has been dropped we always throw an exception (even if `throw_if_denied` is false)
+        /// because dropping of the current user is considered as a situation which is exceptional enough to stop
+        /// query execution.
+        throw Exception(getUserName() + ": User has been dropped", ErrorCodes::UNKNOWN_USER);
+    }
+
+    if (is_full_access)
+        return true;
+
    auto access_granted = [&]
    {
        if (trace_log)
@ -483,12 +494,6 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg
        return false;
    };

-    if (is_full_access)
-        return true;
-
-    if (user_was_dropped)
-        return access_denied("User has been dropped", ErrorCodes::UNKNOWN_USER);
-
    if (flags & AccessType::CLUSTER && !access_control->doesOnClusterQueriesRequireClusterGrant())
        flags &= ~AccessType::CLUSTER;

--- a/src/Access/ContextAccess.h
+++ b/src/Access/ContextAccess.h
@ -1,7 +1,7 @@
 #pragma once

 #include <Access/AccessRights.h>
-#include <Access/Common/RowPolicyDefs.h>
+#include <Access/EnabledRowPolicies.h>
 #include <Interpreters/ClientInfo.h>
 #include <Core/UUID.h>
 #include <base/scope_guard.h>
@ -87,7 +87,7 @@ public:

    /// Returns the row policy filter for a specified table.
    /// The function returns nullptr if there is no filter to apply.
-    ASTPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type, const ASTPtr & combine_with_expr = nullptr) const;
+    RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type, RowPolicyFilterPtr combine_with_filter = {}) const;

    /// Returns the quota to track resource consumption.
    std::shared_ptr<const EnabledQuota> getQuota() const;
--- a/src/Access/EnabledRowPolicies.cpp
+++ b/src/Access/EnabledRowPolicies.cpp
@ -6,12 +6,18 @@

 namespace DB
 {
+
+bool RowPolicyFilter::empty() const
+{
+    bool value;
+    return !expression || (tryGetLiteralBool(expression.get(), value) && value);
+}
+
 size_t EnabledRowPolicies::Hash::operator()(const MixedFiltersKey & key) const
 {
    return std::hash<std::string_view>{}(key.database) - std::hash<std::string_view>{}(key.table_name) + static_cast<size_t>(key.filter_type);
 }

-
 EnabledRowPolicies::EnabledRowPolicies() : params()
 {
 }
@ -23,7 +29,7 @@ EnabledRowPolicies::EnabledRowPolicies(const Params & params_) : params(params_)
 EnabledRowPolicies::~EnabledRowPolicies() = default;


-ASTPtr EnabledRowPolicies::getFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const
+RowPolicyFilterPtr EnabledRowPolicies::getFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const
 {
    /// We don't lock `mutex` here.
    auto loaded = mixed_filters.load();
@ -31,26 +37,36 @@ ASTPtr EnabledRowPolicies::getFilter(const String & database, const String & tab
    if (it == loaded->end())
        return {};

-    auto filter = it->second.ast;
-
-    bool value;
-    if (tryGetLiteralBool(filter.get(), value) && value)
-        return nullptr; /// The condition is always true, no need to check it.
-
-    return filter;
+    return it->second;
 }

-ASTPtr EnabledRowPolicies::getFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type, const ASTPtr & combine_with_expr) const
+RowPolicyFilterPtr EnabledRowPolicies::getFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type, RowPolicyFilterPtr combine_with_filter) const
 {
-    ASTPtr filter = getFilter(database, table_name, filter_type);
-    if (filter && combine_with_expr)
-        filter = makeASTForLogicalAnd({filter, combine_with_expr});
-    else if (!filter)
-        filter = combine_with_expr;
+    RowPolicyFilterPtr filter = getFilter(database, table_name, filter_type);
+    if (filter && combine_with_filter)
+    {
+        auto new_filter = std::make_shared<RowPolicyFilter>(*filter);

-    bool value;
-    if (tryGetLiteralBool(filter.get(), value) && value)
-        return nullptr;  /// The condition is always true, no need to check it.
+        if (filter->empty())
+        {
+            new_filter->expression = combine_with_filter->expression;
+        }
+        else if (combine_with_filter->empty())
+        {
+            new_filter->expression = filter->expression;
+        }
+        else
+        {
+            new_filter->expression = makeASTForLogicalAnd({filter->expression, combine_with_filter->expression});
+        }
+
+        std::copy(combine_with_filter->policies.begin(), combine_with_filter->policies.end(), std::back_inserter(new_filter->policies));
+        filter = new_filter;
+    }
+    else if (!filter)
+    {
+        filter = combine_with_filter;
+    }

    return filter;
 }
--- a/src/Access/EnabledRowPolicies.h
+++ b/src/Access/EnabledRowPolicies.h
@ -1,12 +1,16 @@
 #pragma once

 #include <Access/Common/RowPolicyDefs.h>
+#include <Access/RowPolicy.h>
 #include <base/types.h>
 #include <Core/UUID.h>
+
 #include <boost/container/flat_set.hpp>
 #include <boost/smart_ptr/atomic_shared_ptr.hpp>
-#include <unordered_map>
+
 #include <memory>
+#include <unordered_map>
+#include <vector>


 namespace DB
@ -14,6 +18,19 @@ namespace DB
 class IAST;
 using ASTPtr = std::shared_ptr<IAST>;

+struct RowPolicyFilter;
+using RowPolicyFilterPtr = std::shared_ptr<const RowPolicyFilter>;
+
+
+struct RowPolicyFilter
+{
+    ASTPtr expression;
+    std::shared_ptr<const std::pair<String, String>> database_and_table_name;
+    std::vector<RowPolicyPtr> policies;
+
+    bool empty() const;
+};
+

 /// Provides fast access to row policies' conditions for a specific user and tables.
 class EnabledRowPolicies
@ -39,8 +56,8 @@ public:
    /// Returns prepared filter for a specific table and operations.
    /// The function can return nullptr, that means there is no filters applied.
    /// The returned filter can be a combination of the filters defined by multiple row policies.
-    ASTPtr getFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const;
-    ASTPtr getFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type, const ASTPtr & combine_with_expr) const;
+    RowPolicyFilterPtr getFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const;
+    RowPolicyFilterPtr getFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type, RowPolicyFilterPtr combine_with_filter) const;

 private:
    friend class RowPolicyCache;
@ -57,18 +74,12 @@ private:
        friend bool operator!=(const MixedFiltersKey & left, const MixedFiltersKey & right) { return left.toTuple() != right.toTuple(); }
    };

-    struct MixedFiltersResult
-    {
-        ASTPtr ast;
-        std::shared_ptr<const std::pair<String, String>> database_and_table_name;
-    };
-
    struct Hash
    {
        size_t operator()(const MixedFiltersKey & key) const;
    };

-    using MixedFiltersMap = std::unordered_map<MixedFiltersKey, MixedFiltersResult, Hash>;
+    using MixedFiltersMap = std::unordered_map<MixedFiltersKey, RowPolicyFilterPtr, Hash>;

    const Params params;
    mutable boost::atomic_shared_ptr<const MixedFiltersMap> mixed_filters;
--- a/src/Access/RowPolicyCache.cpp
+++ b/src/Access/RowPolicyCache.cpp
@ -212,6 +212,7 @@ void RowPolicyCache::mixFiltersFor(EnabledRowPolicies & enabled)
    {
        FiltersMixer mixer;
        std::shared_ptr<const std::pair<String, String>> database_and_table_name;
+        std::vector<RowPolicyPtr> policies;
    };

    std::unordered_map<MixedFiltersKey, MixerWithNames, Hash> mixers;
@ -232,7 +233,10 @@ void RowPolicyCache::mixFiltersFor(EnabledRowPolicies & enabled)
                auto & mixer = mixers[key];
                mixer.database_and_table_name = info.database_and_table_name;
                if (match)
+                {
                    mixer.mixer.add(info.parsed_filters[filter_type_i], policy.isRestrictive());
+                    mixer.policies.push_back(info.policy);
+                }
            }
        }
    }
@ -240,9 +244,11 @@ void RowPolicyCache::mixFiltersFor(EnabledRowPolicies & enabled)
    auto mixed_filters = boost::make_shared<MixedFiltersMap>();
    for (auto & [key, mixer] : mixers)
    {
-        auto & mixed_filter = (*mixed_filters)[key];
-        mixed_filter.database_and_table_name = mixer.database_and_table_name;
-        mixed_filter.ast = std::move(mixer.mixer).getResult(access_control.isEnabledUsersWithoutRowPoliciesCanReadRows());
+        auto mixed_filter = std::make_shared<RowPolicyFilter>();
+        mixed_filter->database_and_table_name = std::move(mixer.database_and_table_name);
+        mixed_filter->expression = std::move(mixer.mixer).getResult(access_control.isEnabledUsersWithoutRowPoliciesCanReadRows());
+        mixed_filter->policies = std::move(mixer.policies);
+        mixed_filters->emplace(key, std::move(mixed_filter));
    }

    enabled.mixed_filters.store(mixed_filters);
--- a/src/Access/SettingsConstraints.cpp
+++ b/src/Access/SettingsConstraints.cpp
@ -147,7 +147,7 @@ bool SettingsConstraints::checkImpl(const Settings & current_settings, SettingCh
            {
                if (const auto hints = current_settings.getHints(change.name); !hints.empty())
                {
-                      e.addMessage(fmt::format("Maybe you meant {}", toString(hints)));
+                    e.addMessage(fmt::format("Maybe you meant {}", toString(hints)));
                }
            }
            throw;
--- a/src/Analyzer/Identifier.h
+++ b/src/Analyzer/Identifier.h
@ -152,6 +152,11 @@ public:
        return popFirst(1);
    }

+    void pop_front() /// NOLINT
+    {
+        return popFirst();
+    }
+
    void popLast(size_t parts_to_remove_size)
    {
        assert(parts_to_remove_size <= parts.size());
@ -177,23 +182,21 @@ public:

    void push_back(std::string && part) /// NOLINT
    {
-        parts.push_back(std::move(part));
-        full_name += '.';
-        full_name += parts.back();
+        emplace_back(std::move(part));
    }

    void push_back(const std::string & part) /// NOLINT
    {
-        parts.push_back(part);
-        full_name += '.';
-        full_name += parts.back();
+        emplace_back(part);
    }

    template <typename ...Args>
    void emplace_back(Args&&... args) /// NOLINT
    {
        parts.emplace_back(std::forward<Args>(args)...);
-        full_name += '.';
+        bool was_not_empty = parts.size() != 1;
+        if (was_not_empty)
+            full_name += '.';
        full_name += parts.back();
    }
 private:
@ -365,6 +368,26 @@ inline std::ostream & operator<<(std::ostream & stream, const IdentifierView & i

 }

+template <>
+struct std::hash<DB::Identifier>
+{
+    size_t operator()(const DB::Identifier & identifier) const
+    {
+        std::hash<std::string> hash;
+        return hash(identifier.getFullName());
+    }
+};
+
+template <>
+struct std::hash<DB::IdentifierView>
+{
+    size_t operator()(const DB::IdentifierView & identifier) const
+    {
+        std::hash<std::string_view> hash;
+        return hash(identifier.getFullName());
+    }
+};
+
 /// See https://fmt.dev/latest/api.html#formatting-user-defined-types

 template <>
--- a/src/Analyzer/Passes/FuseFunctionsPass.cpp
+++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp
@ -0,0 +1,231 @@
+#include <Analyzer/Passes/FuseFunctionsPass.h>
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <Functions/FunctionFactory.h>
+
+#include <AggregateFunctions/IAggregateFunction.h>
+
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/FunctionNode.h>
+#include <Analyzer/ConstantNode.h>
+
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+
+class FuseFunctionsVisitor : public InDepthQueryTreeVisitor<FuseFunctionsVisitor>
+{
+public:
+
+    explicit FuseFunctionsVisitor(const std::unordered_set<String> names_to_collect_)
+        : names_to_collect(names_to_collect_)
+    {}
+
+    void visitImpl(QueryTreeNodePtr & node)
+    {
+        auto * function_node = node->as<FunctionNode>();
+        if (!function_node || !function_node->isAggregateFunction() || !names_to_collect.contains(function_node->getFunctionName()))
+            return;
+
+        if (function_node->getResultType()->isNullable())
+            /// Do not apply to functions with Nullable result type, because `sumCount` handles it different from `sum` and `avg`.
+            return;
+
+        const auto & argument_nodes = function_node->getArguments().getNodes();
+        if (argument_nodes.size() != 1)
+            /// Do not apply for `count()` with without arguments or `count(*)`, only `count(x)` is supported.
+            return;
+
+        mapping[QueryTreeNodeWithHash(argument_nodes[0])].push_back(&node);
+    }
+
+    struct QueryTreeNodeWithHash
+    {
+        const QueryTreeNodePtr & node;
+        IQueryTreeNode::Hash hash;
+
+        explicit QueryTreeNodeWithHash(const QueryTreeNodePtr & node_)
+            : node(node_)
+            , hash(node->getTreeHash())
+        {}
+
+        bool operator==(const QueryTreeNodeWithHash & rhs) const
+        {
+            return hash == rhs.hash && node->isEqual(*rhs.node);
+        }
+
+        struct Hash
+        {
+            size_t operator() (const QueryTreeNodeWithHash & key) const { return key.hash.first ^ key.hash.second; }
+        };
+    };
+
+    /// argument -> list of sum/count/avg functions with this argument
+    std::unordered_map<QueryTreeNodeWithHash, std::vector<QueryTreeNodePtr *>, QueryTreeNodeWithHash::Hash> mapping;
+
+private:
+    std::unordered_set<String> names_to_collect;
+};
+
+QueryTreeNodePtr createResolvedFunction(ContextPtr context, const String & name, DataTypePtr result_type, QueryTreeNodes arguments)
+{
+    auto function_node = std::make_shared<FunctionNode>(name);
+    auto function = FunctionFactory::instance().get(name, context);
+    function_node->resolveAsFunction(std::move(function), result_type);
+    function_node->getArguments().getNodes() = std::move(arguments);
+    return function_node;
+}
+
+FunctionNodePtr createResolvedAggregateFunction(const String & name, const QueryTreeNodePtr & argument, const Array & parameters = {})
+{
+    auto function_node = std::make_shared<FunctionNode>(name);
+
+    AggregateFunctionProperties properties;
+    auto aggregate_function = AggregateFunctionFactory::instance().get(name, {argument->getResultType()}, parameters, properties);
+
+    function_node->resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType());
+
+    function_node->getArgumentsNode() = std::make_shared<ListNode>(QueryTreeNodes{argument});
+    return function_node;
+}
+
+QueryTreeNodePtr createTupleElementFunction(ContextPtr context, DataTypePtr result_type, QueryTreeNodePtr argument, UInt64 index)
+{
+    return createResolvedFunction(context, "tupleElement", result_type, {argument, std::make_shared<ConstantNode>(index)});
+}
+
+QueryTreeNodePtr createArrayElementFunction(ContextPtr context, DataTypePtr result_type, QueryTreeNodePtr argument, UInt64 index)
+{
+    return createResolvedFunction(context, "arrayElement", result_type, {argument, std::make_shared<ConstantNode>(index)});
+}
+
+void replaceWithSumCount(QueryTreeNodePtr & node, const FunctionNodePtr & sum_count_node, ContextPtr context)
+{
+    auto sum_count_result_type = std::dynamic_pointer_cast<const DataTypeTuple>(sum_count_node->getResultType());
+    if (!sum_count_result_type || sum_count_result_type->getElements().size() != 2)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+            "Unexpected return type '{}' of function '{}', should be tuple of two elements",
+            sum_count_node->getResultType(), sum_count_node->getFunctionName());
+    }
+
+    String function_name = node->as<const FunctionNode &>().getFunctionName();
+
+    if (function_name == "sum")
+    {
+        assert(node->getResultType()->equals(*sum_count_result_type->getElement(0)));
+        node = createTupleElementFunction(context, node->getResultType(), sum_count_node, 1);
+    }
+    else if (function_name == "count")
+    {
+        assert(node->getResultType()->equals(*sum_count_result_type->getElement(1)));
+        node = createTupleElementFunction(context, node->getResultType(), sum_count_node, 2);
+    }
+    else if (function_name == "avg")
+    {
+        auto sum_result = createTupleElementFunction(context, sum_count_result_type->getElement(0), sum_count_node, 1);
+        auto count_result = createTupleElementFunction(context, sum_count_result_type->getElement(1), sum_count_node, 2);
+        /// To avoid integer division by zero
+        auto count_float_result = createResolvedFunction(context, "toFloat64", std::make_shared<DataTypeFloat64>(), {count_result});
+        node = createResolvedFunction(context, "divide", node->getResultType(), {sum_result, count_float_result});
+    }
+    else
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unsupported function '{}'", function_name);
+    }
+}
+
+FunctionNodePtr createFusedQuantilesNode(const std::vector<QueryTreeNodePtr *> nodes, const QueryTreeNodePtr & argument)
+{
+    Array parameters;
+    parameters.reserve(nodes.size());
+    for (const auto * node : nodes)
+    {
+        const FunctionNode & function_node = (*node)->as<const FunctionNode &>();
+        const auto & function_name = function_node.getFunctionName();
+
+        const auto & parameter_nodes = function_node.getParameters().getNodes();
+        if (parameter_nodes.empty())
+        {
+            parameters.push_back(Float64(0.5)); /// default value
+            continue;
+        }
+
+        if (parameter_nodes.size() != 1)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function '{}' should have exactly one parameter", function_name);
+
+        const auto & constant_value = parameter_nodes.front()->getConstantValueOrNull();
+        if (!constant_value)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function '{}' should have constant parameter", function_name);
+
+        parameters.push_back(constant_value->getValue());
+    }
+    return createResolvedAggregateFunction("quantiles", argument, parameters);
+}
+
+
+void tryFuseSumCountAvg(QueryTreeNodePtr query_tree_node, ContextPtr context)
+{
+    FuseFunctionsVisitor visitor({"sum", "count", "avg"});
+    visitor.visit(query_tree_node);
+
+    for (auto & [argument, nodes] : visitor.mapping)
+    {
+        if (nodes.size() < 2)
+            continue;
+
+        auto sum_count_node = createResolvedAggregateFunction("sumCount", argument.node);
+        for (auto * node : nodes)
+        {
+            assert(node);
+            replaceWithSumCount(*node, sum_count_node, context);
+        }
+    }
+}
+
+void tryFuseQuantiles(QueryTreeNodePtr query_tree_node, ContextPtr context)
+{
+    FuseFunctionsVisitor visitor_quantile({"quantile"});
+    visitor_quantile.visit(query_tree_node);
+    for (auto & [argument, nodes] : visitor_quantile.mapping)
+    {
+        if (nodes.size() < 2)
+            continue;
+
+        auto quantiles_node = createFusedQuantilesNode(nodes, argument.node);
+        auto result_array_type = std::dynamic_pointer_cast<const DataTypeArray>(quantiles_node->getResultType());
+        if (!result_array_type)
+        {
+            throw Exception(ErrorCodes::LOGICAL_ERROR,
+                "Unexpected return type '{}' of function '{}', should be array",
+                quantiles_node->getResultType(), quantiles_node->getFunctionName());
+        }
+
+        for (size_t i = 0; i < nodes.size(); ++i)
+        {
+            *nodes[i] = createArrayElementFunction(context, result_array_type->getNestedType(), quantiles_node, i + 1);
+        }
+    }
+}
+
+}
+
+void FuseFunctionsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
+{
+    tryFuseSumCountAvg(query_tree_node, context);
+    tryFuseQuantiles(query_tree_node, context);
+}
+
+}
--- a/src/Analyzer/Passes/FuseFunctionsPass.h
+++ b/src/Analyzer/Passes/FuseFunctionsPass.h
@ -0,0 +1,27 @@
+#pragma once
+
+#include <Analyzer/IQueryTreePass.h>
+
+namespace DB
+{
+
+/*
+ * This pass replaces several calls of aggregate functions of the same family into one call.
+ * Result will be calculated only once because of CSE.
+ *
+ * Replaces:
+ * `sum(x), count(x), avg(x)` with `sumCount(x).1, sumCount(x).2, sumCount(x).1 / toFloat64(sumCount(x).2)`
+ * `quantile(0.5)(x), quantile(0.9)(x)` with `quantiles(0.5, 0.9)(x)[1], quantiles(0.5, 0.9)(x)[2]`
+ */
+class FuseFunctionsPass final : public IQueryTreePass
+{
+public:
+    String getName() override { return "FuseFunctionsPass"; }
+
+    String getDescription() override { return "Replaces several calls of aggregate functions of the same family into one call"; }
+
+    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+};
+
+}
+
--- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
+++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
@ -33,18 +33,27 @@ public:

        if (function_node->getFunctionName() == "count" && !first_argument_constant_literal.isNull())
        {
+            resolveAsCountAggregateFunction(*function_node);
            function_node->getArguments().getNodes().clear();
        }
-        else if (function_node->getFunctionName() == "sum" && first_argument_constant_literal.getType() == Field::Types::UInt64 &&
+        else if (function_node->getFunctionName() == "sum" &&
+            first_argument_constant_literal.getType() == Field::Types::UInt64 &&
            first_argument_constant_literal.get<UInt64>() == 1)
        {
-            auto result_type = function_node->getResultType();
-            AggregateFunctionProperties properties;
-            auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
-            function_node->resolveAsAggregateFunction(std::move(aggregate_function), std::move(result_type));
+            resolveAsCountAggregateFunction(*function_node);
            function_node->getArguments().getNodes().clear();
        }
    }
+private:
+    static inline void resolveAsCountAggregateFunction(FunctionNode & function_node)
+    {
+        auto function_result_type = function_node.getResultType();
+
+        AggregateFunctionProperties properties;
+        auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
+
+        function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type));
+    }
 };

 }
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@ -13,6 +13,7 @@
 #include <Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.h>
 #include <Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.h>
 #include <Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h>
+#include <Analyzer/Passes/FuseFunctionsPass.h>

 #include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
@ -66,7 +67,6 @@ public:
  *
  * TODO: Support _shard_num into shardNum() rewriting.
  * TODO: Support logical expressions optimizer.
-  * TODO: Support fuse sum count optimize_fuse_sum_count_avg, optimize_syntax_fuse_functions.
  * TODO: Support setting convert_query_to_cnf.
  * TODO: Support setting optimize_using_constraints.
  * TODO: Support setting optimize_substitute_columns.
@ -78,7 +78,6 @@ public:
  * TODO: Support setting optimize_redundant_functions_in_order_by.
  * TODO: Support setting optimize_monotonous_functions_in_order_by.
  * TODO: Support setting optimize_if_transform_strings_to_enum.
-  * TODO: Support settings.optimize_syntax_fuse_functions.
  * TODO: Support settings.optimize_or_like_chain.
  * TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
  */
@ -191,6 +190,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)

    manager.addPass(std::make_unique<OrderByTupleEliminationPass>());
    manager.addPass(std::make_unique<OrderByLimitByDuplicateEliminationPass>());
+
+    if (settings.optimize_syntax_fuse_functions)
+        manager.addPass(std::make_unique<FuseFunctionsPass>());
 }

 }
--- a/src/Analyzer/Utils.cpp
+++ b/src/Analyzer/Utils.cpp
@ -5,6 +5,11 @@
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTFunction.h>

+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeArray.h>
+
+#include <Functions/FunctionHelpers.h>
+
 #include <Analyzer/IdentifierNode.h>
 #include <Analyzer/JoinNode.h>
 #include <Analyzer/ArrayJoinNode.h>
@ -289,41 +294,28 @@ QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & join_tree_nod
    return result;
 }

-QueryTreeNodePtr getColumnSourceForJoinNodeWithUsing(const QueryTreeNodePtr & join_node)
+bool nestedIdentifierCanBeResolved(const DataTypePtr & compound_type, IdentifierView nested_identifier)
 {
-    QueryTreeNodePtr column_source_node = join_node;
+    const IDataType * current_type = compound_type.get();

-    while (true)
+    for (const auto & identifier_part : nested_identifier)
    {
-        auto column_source_node_type = column_source_node->getNodeType();
-        if (column_source_node_type == QueryTreeNodeType::TABLE ||
-            column_source_node_type == QueryTreeNodeType::TABLE_FUNCTION ||
-            column_source_node_type == QueryTreeNodeType::QUERY ||
-            column_source_node_type == QueryTreeNodeType::UNION)
-        {
-            break;
-        }
-        else if (column_source_node_type == QueryTreeNodeType::ARRAY_JOIN)
-        {
-            auto & array_join_node = column_source_node->as<ArrayJoinNode &>();
-            column_source_node = array_join_node.getTableExpression();
-            continue;
-        }
-        else if (column_source_node_type == QueryTreeNodeType::JOIN)
-        {
-            auto & join_node_typed = column_source_node->as<JoinNode &>();
-            column_source_node = isRight(join_node_typed.getKind()) ? join_node_typed.getRightTableExpression() : join_node_typed.getLeftTableExpression();
-            continue;
-        }
-        else
-        {
-            throw Exception(ErrorCodes::LOGICAL_ERROR,
-                "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}",
-                column_source_node->getNodeTypeName());
-        }
+        while (const DataTypeArray * array = checkAndGetDataType<DataTypeArray>(current_type))
+            current_type = array->getNestedType().get();
+
+        const DataTypeTuple * tuple = checkAndGetDataType<DataTypeTuple>(current_type);
+
+        if (!tuple)
+            return false;
+
+        auto position = tuple->tryGetPositionByName(identifier_part);
+        if (!position)
+            return false;
+
+        current_type = tuple->getElements()[*position].get();
    }

-    return column_source_node;
+    return true;
 }

 }
--- a/src/Analyzer/Utils.h
+++ b/src/Analyzer/Utils.h
@ -31,9 +31,12 @@ QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node);
  */
 QueryTreeNodes buildTableExpressionsStack(const QueryTreeNodePtr & join_tree_node);

-/** Get column source for JOIN node with USING.
-  * Example: SELECT id FROM test_table_1 AS t1 INNER JOIN test_table_2 AS t2 USING (id);
+/** Returns true if nested identifier can be resolved from compound type.
+  * Compound type can be tuple or array of tuples.
+  *
+  * Example: Compound type: Tuple(nested_path Tuple(nested_path_2 UInt64)). Nested identifier: nested_path_1.nested_path_2.
+  * Result: true.
  */
-QueryTreeNodePtr getColumnSourceForJoinNodeWithUsing(const QueryTreeNodePtr & join_node);
+bool nestedIdentifierCanBeResolved(const DataTypePtr & compound_type, IdentifierView nested_identifier);

 }
--- a/src/Analyzer/tests/gtest_identifier.cpp
+++ b/src/Analyzer/tests/gtest_identifier.cpp
@ -70,6 +70,22 @@ TEST(Identifier, IdentifierBasics)
    }
 }

+TEST(Identifier, IdentifierPushParts)
+{
+    {
+        Identifier identifier;
+
+        identifier.push_back("value1");
+        ASSERT_EQ(identifier.getFullName(), "value1");
+        identifier.push_back("value2");
+        ASSERT_EQ(identifier.getFullName(), "value1.value2");
+        identifier.push_back("value3");
+        ASSERT_EQ(identifier.getFullName(), "value1.value2.value3");
+        ASSERT_FALSE(identifier.isEmpty());
+    }
+}
+
+
 TEST(Identifier, IdentifierPopParts)
 {
    {
--- a/src/Backups/BackupEntriesCollector.cpp
+++ b/src/Backups/BackupEntriesCollector.cpp
@ -442,7 +442,7 @@ void BackupEntriesCollector::gatherTablesMetadata()
                if (it != database_info.tables.end())
                {
                    const auto & partitions = it->second.partitions;
-                    if (partitions && !storage->supportsBackupPartition())
+                    if (partitions && storage && !storage->supportsBackupPartition())
                    {
                        throw Exception(
                            ErrorCodes::CANNOT_BACKUP_TABLE,
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -119,22 +119,27 @@ namespace ProfileEvents
 namespace DB
 {

+ProgressOption toProgressOption(std::string progress)
+{
+    boost::to_upper(progress);
+
+    if (progress == "OFF" || progress == "FALSE" || progress == "0" || progress == "NO")
+        return ProgressOption::OFF;
+    if (progress == "TTY" || progress == "ON" || progress == "TRUE" || progress == "1" || progress == "YES")
+        return ProgressOption::TTY;
+    if (progress == "ERR")
+        return ProgressOption::ERR;
+    if (progress == "DEFAULT")
+        return ProgressOption::DEFAULT;
+
+    throw boost::program_options::validation_error(boost::program_options::validation_error::invalid_option_value);
+}
+
 std::istream& operator>> (std::istream & in, ProgressOption & progress)
 {
    std::string token;
    in >> token;
-
-    boost::to_upper(token);
-
-    if (token == "OFF" || token == "FALSE" || token == "0" || token == "NO")
-        progress = ProgressOption::OFF;
-    else if (token == "TTY" || token == "ON" || token == "TRUE" || token == "1" || token == "YES")
-        progress = ProgressOption::TTY;
-    else if (token == "ERR")
-        progress = ProgressOption::ERR;
-    else
-        throw boost::program_options::validation_error(boost::program_options::validation_error::invalid_option_value);
-
+    progress = toProgressOption(token);
    return in;
 }

@ -662,56 +667,62 @@ void ClientBase::initLogsOutputStream()
    }
 }

-void ClientBase::initTtyBuffer(bool to_err)
+void ClientBase::initTtyBuffer(ProgressOption progress)
 {
-    if (!tty_buf)
+    if (tty_buf)
+        return;
+
+    if (progress == ProgressOption::OFF || (!is_interactive && progress == ProgressOption::DEFAULT))
    {
-        static constexpr auto tty_file_name = "/dev/tty";
+         need_render_progress = false;
+         return;
+    }

-        /// Output all progress bar commands to terminal at once to avoid flicker.
-        /// This size is usually greater than the window size.
-        static constexpr size_t buf_size = 1024;
+    static constexpr auto tty_file_name = "/dev/tty";

-        if (!to_err)
+    /// Output all progress bar commands to terminal at once to avoid flicker.
+    /// This size is usually greater than the window size.
+    static constexpr size_t buf_size = 1024;
+
+    if (is_interactive || progress == ProgressOption::TTY)
+    {
+        std::error_code ec;
+        std::filesystem::file_status tty = std::filesystem::status(tty_file_name, ec);
+
+        if (!ec && exists(tty) && is_character_file(tty)
+            && (tty.permissions() & std::filesystem::perms::others_write) != std::filesystem::perms::none)
        {
-            std::error_code ec;
-            std::filesystem::file_status tty = std::filesystem::status(tty_file_name, ec);
-
-            if (!ec && exists(tty) && is_character_file(tty)
-                && (tty.permissions() & std::filesystem::perms::others_write) != std::filesystem::perms::none)
+            try
            {
-                try
-                {
-                    tty_buf = std::make_unique<WriteBufferFromFile>(tty_file_name, buf_size);
+                tty_buf = std::make_unique<WriteBufferFromFile>(tty_file_name, buf_size);

-                    /// It is possible that the terminal file has writeable permissions
-                    /// but we cannot write anything there. Check it with invisible character.
-                    tty_buf->write('\0');
-                    tty_buf->next();
+                /// It is possible that the terminal file has writeable permissions
+                /// but we cannot write anything there. Check it with invisible character.
+                tty_buf->write('\0');
+                tty_buf->next();

-                    return;
-                }
-                catch (const Exception & e)
-                {
-                    if (tty_buf)
-                        tty_buf.reset();
+                return;
+            }
+            catch (const Exception & e)
+            {
+                if (tty_buf)
+                    tty_buf.reset();

-                    if (e.code() != ErrorCodes::CANNOT_OPEN_FILE)
-                        throw;
+                if (e.code() != ErrorCodes::CANNOT_OPEN_FILE)
+                    throw;

-                    /// It is normal if file exists, indicated as writeable but still cannot be opened.
-                    /// Fallback to other options.
-                }
+                /// It is normal if file exists, indicated as writeable but still cannot be opened.
+                /// Fallback to other options.
            }
        }
-
-        if (stderr_is_a_tty)
-        {
-            tty_buf = std::make_unique<WriteBufferFromFileDescriptor>(STDERR_FILENO, buf_size);
-        }
-        else
-            need_render_progress = false;
    }
+
+    if (stderr_is_a_tty || progress == ProgressOption::ERR)
+    {
+        tty_buf = std::make_unique<WriteBufferFromFileDescriptor>(STDERR_FILENO, buf_size);
+    }
+    else
+        need_render_progress = false;
 }

 void ClientBase::updateSuggest(const ASTPtr & ast)
@ -2324,7 +2335,7 @@ void ClientBase::init(int argc, char ** argv)
        ("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
        ("query_kind", po::value<std::string>()->default_value("initial_query"), "One of initial_query/secondary_query/no_query")
        ("query_id", po::value<std::string>(), "query_id")
-        ("progress", po::value<ProgressOption>()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::TTY, "tty"), "Print progress of queries execution - to TTY (default): tty|on|1|true|yes; to STDERR: err; OFF: off|0|false|no")
+        ("progress", po::value<ProgressOption>()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::DEFAULT, "default"), "Print progress of queries execution - to TTY: tty|on|1|true|yes; to STDERR non-interactive mode: err; OFF: off|0|false|no; DEFAULT - interactive to TTY, non-interactive is off")

        ("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.")
        ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)")
@ -2379,11 +2390,6 @@ void ClientBase::init(int argc, char ** argv)
    parseAndCheckOptions(options_description, options, common_arguments);
    po::notify(options);

-    if (options["progress"].as<ProgressOption>() == ProgressOption::OFF)
-        need_render_progress = false;
-    else
-        initTtyBuffer(options["progress"].as<ProgressOption>() == ProgressOption::ERR);
-
    if (options.count("version") || options.count("V"))
    {
        showClientVersion();
@ -2437,6 +2443,9 @@ void ClientBase::init(int argc, char ** argv)
    {
        switch (options["progress"].as<ProgressOption>())
        {
+            case DEFAULT:
+                config().setString("progress", "default");
+                break;
            case OFF:
                config().setString("progress", "off");
                break;
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@ -38,10 +38,12 @@ enum MultiQueryProcessingStage

 enum ProgressOption
 {
+    DEFAULT,
    OFF,
    TTY,
    ERR,
 };
+ProgressOption toProgressOption(std::string progress);
 std::istream& operator>> (std::istream & in, ProgressOption & progress);

 void interruptSignalHandler(int signum);
@ -153,7 +155,6 @@ private:

    void initOutputFormat(const Block & block, ASTPtr parsed_query);
    void initLogsOutputStream();
-    void initTtyBuffer(bool to_err = false);

    String prompt() const;

@ -168,6 +169,8 @@ protected:
    static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context);
    bool processMultiQueryFromFile(const String & file_name);

+    void initTtyBuffer(ProgressOption progress);
+
    bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
    bool is_multiquery = false;
    bool delayed_interactive = false;
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -637,6 +637,8 @@
    M(666, CANNOT_USE_CACHE) \
    M(667, NOT_INITIALIZED) \
    M(668, INVALID_STATE) \
+    M(669, UNKNOWN_NAMED_COLLECTION) \
+    M(670, NAMED_COLLECTION_ALREADY_EXISTS) \
    \
    M(999, KEEPER_EXCEPTION) \
    M(1000, POCO_EXCEPTION) \
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@ -12,6 +12,7 @@

 #include <fmt/format.h>

+
 namespace Poco { class Logger; }


--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@ -161,14 +161,9 @@ template <typename T>
 requires (sizeof(T) <= sizeof(UInt64))
 inline size_t DefaultHash64(T key)
 {
-    union
-    {
-        T in;
-        DB::UInt64 out;
-    } u;
-    u.out = 0;
-    u.in = key;
-    return intHash64(u.out);
+    DB::UInt64 out {0};
+    std::memcpy(&out, &key, sizeof(T));
+    return intHash64(out);
 }


@ -224,14 +219,9 @@ template <typename T>
 requires (sizeof(T) <= sizeof(UInt64))
 inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1)
 {
-    union
-    {
-        T in;
-        DB::UInt64 out;
-    } u;
-    u.out = 0;
-    u.in = key;
-    return intHashCRC32(u.out, updated_value);
+    DB::UInt64 out {0};
+    std::memcpy(&out, &key, sizeof(T));
+    return intHashCRC32(out, updated_value);
 }

 template <typename T>
@ -446,14 +436,9 @@ struct IntHash32
        }
        else if constexpr (sizeof(T) <= sizeof(UInt64))
        {
-            union
-            {
-                T in;
-                DB::UInt64 out;
-            } u;
-            u.out = 0;
-            u.in = key;
-            return intHash32<salt>(u.out);
+            DB::UInt64 out {0};
+            std::memcpy(&out, &key, sizeof(T));
+            return intHash32<salt>(out);
        }

        UNREACHABLE();
--- a/src/Common/NamePrompter.cpp
+++ b/src/Common/NamePrompter.cpp
@ -1,15 +1,20 @@
 #include <IO/WriteHelpers.h>
 #include <Common/NamePrompter.h>

-namespace DB::detail
+namespace DB
 {
-void appendHintsMessageImpl(String & message, const std::vector<String> & hints)
+
+String getHintsErrorMessageSuffix(const std::vector<String> & hints)
 {
    if (hints.empty())
-    {
-        return;
-    }
+        return {};

-    message += ". Maybe you meant: " + toString(hints);
+    return ". Maybe you meant: " + toString(hints);
 }
+
+void appendHintsMessage(String & message, const std::vector<String> & hints)
+{
+    message += getHintsErrorMessageSuffix(hints);
+}
+
 }
--- a/src/Common/NamePrompter.h
+++ b/src/Common/NamePrompter.h
@ -12,6 +12,7 @@

 namespace DB
 {
+
 template <size_t MaxNumHints>
 class NamePrompter
 {
@ -90,10 +91,9 @@ private:
    }
 };

-namespace detail
-{
-void appendHintsMessageImpl(String & message, const std::vector<String> & hints);
-}
+String getHintsErrorMessageSuffix(const std::vector<String> & hints);
+
+void appendHintsMessage(String & error_message, const std::vector<String> & hints);

 template <size_t MaxNumHints, typename Self>
 class IHints
@ -106,10 +106,10 @@ public:
        return prompter.getHints(name, getAllRegisteredNames());
    }

-    void appendHintsMessage(String & message, const String & name) const
+    void appendHintsMessage(String & error_message, const String & name) const
    {
        auto hints = getHints(name);
-        detail::appendHintsMessageImpl(message, hints);
+        DB::appendHintsMessage(error_message, hints);
    }

    IHints() = default;
--- a/src/Common/ZooKeeper/TestKeeper.h
+++ b/src/Common/ZooKeeper/TestKeeper.h
@ -34,7 +34,7 @@ using TestKeeperRequestPtr = std::shared_ptr<TestKeeperRequest>;
 class TestKeeper final : public IKeeper
 {
 public:
-    TestKeeper(const zkutil::ZooKeeperArgs & args_);
+    explicit TestKeeper(const zkutil::ZooKeeperArgs & args_);
    ~TestKeeper() override;

    bool isExpired() const override { return expired; }
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@ -156,7 +156,7 @@ public:
    using Ptr = std::shared_ptr<ZooKeeper>;
    using ErrorsList = std::initializer_list<Coordination::Error>;

-    ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);
+    explicit ZooKeeper(const ZooKeeperArgs & args_, std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);

    /** Config of the form:
        <zookeeper>
--- a/src/Compression/CompressionCodecDeflateQpl.cpp
+++ b/src/Compression/CompressionCodecDeflateQpl.cpp
@ -240,7 +240,7 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataAsynchronous(const char * source,

 void HardwareCodecDeflateQpl::flushAsynchronousDecompressRequests()
 {
-    UInt32 n_jobs_processing = decomp_async_job_map.size();
+    auto n_jobs_processing = decomp_async_job_map.size();
    std::map<UInt32, qpl_job *>::iterator it = decomp_async_job_map.begin();

    while (n_jobs_processing)
--- a/src/Core/MySQL/MySQLReplication.cpp
+++ b/src/Core/MySQL/MySQLReplication.cpp
@ -116,9 +116,10 @@ namespace MySQLReplication
            if (!query.starts_with("XA COMMIT"))
                transaction_complete = false;
        }
-        else if (query.starts_with("SAVEPOINT"))
+        else if (query.starts_with("SAVEPOINT") || query.starts_with("ROLLBACK")
+                 || query.starts_with("RELEASE SAVEPOINT"))
        {
-            throw ReplicationError("ParseQueryEvent: Unsupported query event:" + query, ErrorCodes::LOGICAL_ERROR);
+            typ = QUERY_SAVEPOINT;
        }
    }

@ -941,6 +942,8 @@ namespace MySQLReplication
                {
                    case QUERY_EVENT_MULTI_TXN_FLAG:
                    case QUERY_EVENT_XA:
+                    /// Ignore queries that have no impact on the data.
+                    case QUERY_SAVEPOINT:
                    {
                        event = std::make_shared<DryRunEvent>(std::move(query->header));
                        break;
--- a/src/Core/MySQL/MySQLReplication.h
+++ b/src/Core/MySQL/MySQLReplication.h
@ -368,7 +368,8 @@ namespace MySQLReplication
    {
        QUERY_EVENT_DDL = 0,
        QUERY_EVENT_MULTI_TXN_FLAG = 1,
-        QUERY_EVENT_XA = 2
+        QUERY_EVENT_XA = 2,
+        QUERY_SAVEPOINT = 3,
    };

    class QueryEvent : public EventBase
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -523,7 +523,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \
    M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
    M(Bool, optimize_syntax_fuse_functions, false, "Not ready for production, do not use. Allow apply syntax optimisation: fuse aggregate functions", 0) \
-    M(Bool, optimize_fuse_sum_count_avg, false, "Not ready for production, do not use. Fuse functions `sum, avg, count` with identical arguments into one `sumCount` (`optimize_syntax_fuse_functions should be enabled)", 0) \
+    M(Bool, optimize_fuse_sum_count_avg, false, "Replace calls of functions `sum`, `avg`, `count` with identical arguments into one `sumCount`", 0) \
    M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \
    M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \
    M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
@ -576,6 +576,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
    M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \
    M(Bool, query_plan_optimize_primary_key, true, "Analyze primary key using query plan (instead of AST)", 0) \
+    M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimisation", 0) \
    M(UInt64, regexp_max_matches_per_row, 1000, "Max matches of any single regexp per row, used to safeguard 'extractAllGroupsHorizontal' against consuming too much memory with greedy RE.", 0) \
    \
    M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
@ -613,7 +614,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    \
    M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \
    \
-    M(Bool, use_structure_from_insertion_table_in_table_functions, false, "Use structure from insertion table instead of schema inference from data", 0) \
+    M(UInt64, use_structure_from_insertion_table_in_table_functions, 2, "Use structure from insertion table instead of schema inference from data. Possible values: 0 - disabled, 1 - enabled, 2 - auto", 0) \
    \
    M(UInt64, http_max_tries, 10, "Max attempts to read via http.", 0) \
    M(UInt64, http_retry_initial_backoff_ms, 100, "Min milliseconds for backoff, when retrying read via http", 0) \
@ -658,6 +659,11 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
    M(Bool, multiple_joins_try_to_keep_original_names, false, "Do not add aliases to top level expression list on multiple joins rewrite", 0) \
    M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
    M(Bool, optimize_sorting_by_input_stream_properties, true, "Optimize sorting by sorting properties of input stream", 0) \
+    M(UInt64, insert_keeper_max_retries, 0, "Max retries for keeper operations during insert", 0) \
+    M(UInt64, insert_keeper_retry_initial_backoff_ms, 100, "Initial backoff timeout for keeper operations during insert", 0) \
+    M(UInt64, insert_keeper_retry_max_backoff_ms, 10000, "Max backoff timeout for keeper operations during insert", 0) \
+    M(Float, insert_keeper_fault_injection_probability, 0.0f, "Approximate probability of failure for a keeper request during insert. Valid value is in interval [0.0f, 1.0f]", 0) \
+    M(UInt64, insert_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
    // End of COMMON_SETTINGS
    // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@ -78,6 +78,7 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+        {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
        {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
        {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
                  {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@ -355,27 +355,33 @@ private:
 #if defined(OS_LINUX)
        /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
        /// Please keep the below log messages in-sync with the ones in programs/server/Server.cpp
-        String calculated_binary_hash = getHashOfLoadedBinaryHex();
+
        if (daemon.stored_binary_hash.empty())
        {
-            LOG_FATAL(log, "Integrity check of the executable skipped because the reference checksum could not be read."
-                " (calculated checksum: {})", calculated_binary_hash);
-        }
-        else if (calculated_binary_hash == daemon.stored_binary_hash)
-        {
-            LOG_FATAL(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
+            LOG_FATAL(log, "Integrity check of the executable skipped because the reference checksum could not be read.");
        }
        else
        {
-            LOG_FATAL(log, "Calculated checksum of the executable ({0}) does not correspond"
-                " to the reference checksum stored in the executable ({1})."
-                " This may indicate one of the following:"
-                " - the executable was changed just after startup;"
-                " - the executable was corrupted on disk due to faulty hardware;"
-                " - the loaded executable was corrupted in memory due to faulty hardware;"
-                " - the file was intentionally modified;"
-                " - a logical error in the code."
-                , calculated_binary_hash, daemon.stored_binary_hash);
+            String calculated_binary_hash = getHashOfLoadedBinaryHex();
+            if (calculated_binary_hash == daemon.stored_binary_hash)
+            {
+                LOG_FATAL(log, "Integrity check of the executable successfully passed (checksum: {})", calculated_binary_hash);
+            }
+            else
+            {
+                LOG_FATAL(
+                    log,
+                    "Calculated checksum of the executable ({0}) does not correspond"
+                    " to the reference checksum stored in the executable ({1})."
+                    " This may indicate one of the following:"
+                    " - the executable was changed just after startup;"
+                    " - the executable was corrupted on disk due to faulty hardware;"
+                    " - the loaded executable was corrupted in memory due to faulty hardware;"
+                    " - the file was intentionally modified;"
+                    " - a logical error in the code.",
+                    calculated_binary_hash,
+                    daemon.stored_binary_hash);
+            }
        }
 #endif

--- a/src/DataTypes/NumberTraits.h
+++ b/src/DataTypes/NumberTraits.h
@ -116,6 +116,15 @@ template <typename A, typename B> struct ResultOfModulo
    using Type = std::conditional_t<std::is_floating_point_v<A> || std::is_floating_point_v<B>, Float64, Type0>;
 };

+template <typename A, typename B> struct ResultOfPositiveModulo
+{
+    /// function positive_modulo always return non-negative number.
+    static constexpr size_t size_of_result = sizeof(B);
+    using Type0 = typename Construct<false, false, size_of_result>::Type;
+    using Type = std::conditional_t<std::is_floating_point_v<A> || std::is_floating_point_v<B>, Float64, Type0>;
+};
+
+
 template <typename A, typename B> struct ResultOfModuloLegacy
 {
    using Type0 = typename Construct<is_signed_v<A> || is_signed_v<B>, false, sizeof(B)>::Type;
--- a/src/DataTypes/convertMySQLDataType.cpp
+++ b/src/DataTypes/convertMySQLDataType.cpp
@ -55,7 +55,7 @@ DataTypePtr convertMySQLDataType(MultiEnum<MySQLDataTypesSupport> type_support,
        else
            res = std::make_shared<DataTypeInt16>();
    }
-    else if (type_name == "int" || type_name == "mediumint")
+    else if (type_name == "int" || type_name == "mediumint" || type_name == "integer")
    {
        if (is_unsigned)
            res = std::make_shared<DataTypeUInt32>();
--- a/src/Databases/DatabaseMemory.cpp
+++ b/src/Databases/DatabaseMemory.cpp
@ -177,6 +177,7 @@ std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseMemory::getTablesForBackup(co
        if (create.getTable() != table_name)
            throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for temporary table {}", backQuoteIfNeed(create.getTable()), backQuoteIfNeed(table_name));

+        chassert(storage);
        storage->adjustCreateQueryForBackup(create_table_query);
        res.emplace_back(create_table_query, storage);
    }
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@ -1214,6 +1214,7 @@ DatabaseReplicated::getTablesForBackup(const FilterByNameFunction & filter, cons
        String table_name = unescapeForFileName(escaped_table_name);
        if (!filter(table_name))
            continue;
+
        String zk_metadata;
        if (!zookeeper->tryGet(zookeeper_path + "/metadata/" + escaped_table_name, zk_metadata))
            throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Metadata for table {} was not found in ZooKeeper", table_name);
@ -1233,6 +1234,10 @@ DatabaseReplicated::getTablesForBackup(const FilterByNameFunction & filter, cons
            if (storage)
                storage->adjustCreateQueryForBackup(create_table_query);
        }
+
+        /// `storage` is allowed to be null here. In this case it means that this storage exists on other replicas
+        /// but it has not been created on this replica yet.
+
        res.emplace_back(create_table_query, storage);
    }

--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@ -329,6 +329,10 @@ std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseWithOwnTablesBase::getTablesF

    for (auto it = getTablesIterator(local_context, filter); it->isValid(); it->next())
    {
+        auto storage = it->table();
+        if (!storage)
+            continue; /// Probably the table has been just dropped.
+
        auto create_table_query = tryGetCreateTableQuery(it->name(), local_context);
        if (!create_table_query)
            throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for table {}.{}", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(it->name()));
@ -337,7 +341,6 @@ std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseWithOwnTablesBase::getTablesF
        if (create.getTable() != it->name())
            throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for table {}.{}", backQuoteIfNeed(create.getTable()), backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(it->name()));

-        auto storage = it->table();
        storage->adjustCreateQueryForBackup(create_table_query);
        res.emplace_back(create_table_query, storage);
    }
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@ -464,6 +464,9 @@ bool tryInferDate(const std::string_view & field)

 bool tryInferDateTime(const std::string_view & field, const FormatSettings & settings)
 {
+    if (field.empty())
+        return false;
+
    ReadBufferFromString buf(field);
    Float64 tmp_float;
    /// Check if it's just a number, and if so, don't try to infer DateTime from it,
--- a/src/Functions/DivisionUtils.h
+++ b/src/Functions/DivisionUtils.h
@ -178,4 +178,32 @@ struct ModuloLegacyImpl : ModuloImpl<A, B>
    using ResultType = typename NumberTraits::ResultOfModuloLegacy<A, B>::Type;
 };

+template <typename A, typename B>
+struct PositiveModuloImpl : ModuloImpl<A, B>
+{
+    using OriginResultType = typename ModuloImpl<A, B>::ResultType;
+    using ResultType = typename NumberTraits::ResultOfPositiveModulo<A, B>::Type;
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b)
+    {
+        auto res = ModuloImpl<A, B>::template apply<OriginResultType>(a, b);
+        if constexpr (is_signed_v<A>)
+        {
+            if (res < 0)
+            {
+                if constexpr (is_unsigned_v<B>)
+                    res += static_cast<OriginResultType>(b);
+                else
+                {
+                    if (b == std::numeric_limits<B>::lowest())
+                        throw Exception("Division by the most negative number", ErrorCodes::ILLEGAL_DIVISION);
+                    res += b >= 0 ? static_cast<OriginResultType>(b) : static_cast<OriginResultType>(-b);
+                }
+            }
+        }
+        return static_cast<ResultType>(res);
+    }
+};
+
 }
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@ -131,50 +131,53 @@ public:
    using ResultDataType = Switch<
        /// Decimal cases
        Case<!allow_decimal && (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>), InvalidType>,
-        Case<IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> && UseLeftDecimal<LeftDataType, RightDataType>, LeftDataType>,
+        Case<
+            IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> && UseLeftDecimal<LeftDataType, RightDataType>,
+            LeftDataType>,
        Case<IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>, RightDataType>,
        Case<IsDataTypeDecimal<LeftDataType> && IsIntegralOrExtended<RightDataType>, LeftDataType>,
        Case<IsDataTypeDecimal<RightDataType> && IsIntegralOrExtended<LeftDataType>, RightDataType>,

        /// e.g Decimal +-*/ Float, least(Decimal, Float), greatest(Decimal, Float) = Float64
-        Case<IsOperation<Operation>::allow_decimal && IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>,
-            DataTypeFloat64>,
-        Case<IsOperation<Operation>::allow_decimal && IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>,
-            DataTypeFloat64>,
+        Case<IsOperation<Operation>::allow_decimal && IsDataTypeDecimal<LeftDataType> && IsFloatingPoint<RightDataType>, DataTypeFloat64>,
+        Case<IsOperation<Operation>::allow_decimal && IsDataTypeDecimal<RightDataType> && IsFloatingPoint<LeftDataType>, DataTypeFloat64>,

-        Case<IsOperation<Operation>::bit_hamming_distance && IsIntegral<LeftDataType> && IsIntegral<RightDataType>,
-            DataTypeUInt8>,
+        Case<IsOperation<Operation>::bit_hamming_distance && IsIntegral<LeftDataType> && IsIntegral<RightDataType>, DataTypeUInt8>,

        /// Decimal <op> Real is not supported (traditional DBs convert Decimal <op> Real to Real)
        Case<IsDataTypeDecimal<LeftDataType> && !IsIntegralOrExtendedOrDecimal<RightDataType>, InvalidType>,
        Case<IsDataTypeDecimal<RightDataType> && !IsIntegralOrExtendedOrDecimal<LeftDataType>, InvalidType>,

        /// number <op> number -> see corresponding impl
-        Case<!IsDateOrDateTime<LeftDataType> && !IsDateOrDateTime<RightDataType>,
-            DataTypeFromFieldType<typename Op::ResultType>>,
+        Case<!IsDateOrDateTime<LeftDataType> && !IsDateOrDateTime<RightDataType>, DataTypeFromFieldType<typename Op::ResultType>>,

        /// Date + Integral -> Date
        /// Integral + Date -> Date
-        Case<IsOperation<Operation>::plus, Switch<
-            Case<IsIntegral<RightDataType>, LeftDataType>,
-            Case<IsIntegral<LeftDataType>, RightDataType>>>,
+        Case<
+            IsOperation<Operation>::plus,
+            Switch<Case<IsIntegral<RightDataType>, LeftDataType>, Case<IsIntegral<LeftDataType>, RightDataType>>>,

        /// Date - Date     -> Int32
        /// Date - Integral -> Date
-        Case<IsOperation<Operation>::minus, Switch<
-            Case<std::is_same_v<LeftDataType, RightDataType>, DataTypeInt32>,
-            Case<IsDateOrDateTime<LeftDataType> && IsIntegral<RightDataType>, LeftDataType>>>,
+        Case<
+            IsOperation<Operation>::minus,
+            Switch<
+                Case<std::is_same_v<LeftDataType, RightDataType>, DataTypeInt32>,
+                Case<IsDateOrDateTime<LeftDataType> && IsIntegral<RightDataType>, LeftDataType>>>,

        /// least(Date, Date) -> Date
        /// greatest(Date, Date) -> Date
-        Case<std::is_same_v<LeftDataType, RightDataType> && (IsOperation<Operation>::least || IsOperation<Operation>::greatest),
+        Case<
+            std::is_same_v<LeftDataType, RightDataType> && (IsOperation<Operation>::least || IsOperation<Operation>::greatest),
            LeftDataType>,

        /// Date % Int32 -> Int32
        /// Date % Float -> Float64
-        Case<IsOperation<Operation>::modulo, Switch<
-            Case<IsDateOrDateTime<LeftDataType> && IsIntegral<RightDataType>, RightDataType>,
-            Case<IsDateOrDateTime<LeftDataType> && IsFloatingPoint<RightDataType>, DataTypeFloat64>>>>;
+        Case<
+            IsOperation<Operation>::modulo || IsOperation<Operation>::positive_modulo,
+            Switch<
+                Case<IsDateOrDateTime<LeftDataType> && IsIntegral<RightDataType>, RightDataType>,
+                Case<IsDateOrDateTime<LeftDataType> && IsFloatingPoint<RightDataType>, DataTypeFloat64>>>>;
 };
 }

@ -1176,8 +1179,9 @@ public:

    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override
    {
-        return ((IsOperation<Op>::div_int || IsOperation<Op>::modulo) && !arguments[1].is_const)
-            || (IsOperation<Op>::div_floating && (isDecimalOrNullableDecimal(arguments[0].type) || isDecimalOrNullableDecimal(arguments[1].type)));
+        return ((IsOperation<Op>::div_int || IsOperation<Op>::modulo || IsOperation<Op>::positive_modulo) && !arguments[1].is_const)
+            || (IsOperation<Op>::div_floating
+                && (isDecimalOrNullableDecimal(arguments[0].type) || isDecimalOrNullableDecimal(arguments[1].type)));
    }

    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
@ -2080,7 +2084,7 @@ public:
        /// Check the case when operation is divide, intDiv or modulo and denominator is Nullable(Something).
        /// For divide operation we should check only Nullable(Decimal), because only this case can throw division by zero error.
        bool division_by_nullable = !arguments[0].type->onlyNull() && !arguments[1].type->onlyNull() && arguments[1].type->isNullable()
-            && (IsOperation<Op>::div_int || IsOperation<Op>::modulo
+            && (IsOperation<Op>::div_int || IsOperation<Op>::modulo || IsOperation<Op>::positive_modulo
                || (IsOperation<Op>::div_floating
                    && (isDecimalOrNullableDecimal(arguments[0].type) || isDecimalOrNullableDecimal(arguments[1].type))));

--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@ -2828,6 +2828,31 @@ private:
        };
    }

+#define GENERATE_INTERVAL_CASE(INTERVAL_KIND) \
+            case IntervalKind::INTERVAL_KIND: \
+                return createFunctionAdaptor(FunctionConvert<DataTypeInterval, NameToInterval##INTERVAL_KIND, PositiveMonotonicity>::create(), from_type);
+
+    static WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind kind)
+    {
+        switch (kind)
+        {
+            GENERATE_INTERVAL_CASE(Nanosecond)
+            GENERATE_INTERVAL_CASE(Microsecond)
+            GENERATE_INTERVAL_CASE(Millisecond)
+            GENERATE_INTERVAL_CASE(Second)
+            GENERATE_INTERVAL_CASE(Minute)
+            GENERATE_INTERVAL_CASE(Hour)
+            GENERATE_INTERVAL_CASE(Day)
+            GENERATE_INTERVAL_CASE(Week)
+            GENERATE_INTERVAL_CASE(Month)
+            GENERATE_INTERVAL_CASE(Quarter)
+            GENERATE_INTERVAL_CASE(Year)
+        }
+        throw Exception{ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion to unexpected IntervalKind: {}", kind.toString()};
+    }
+
+#undef GENERATE_INTERVAL_CASE
+
    template <typename ToDataType>
    requires IsDataTypeDecimal<ToDataType>
    WrapperType createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type, bool requested_result_is_nullable) const
@ -3853,6 +3878,8 @@ private:
                return createObjectWrapper(from_type, checkAndGetDataType<DataTypeObject>(to_type.get()));
            case TypeIndex::AggregateFunction:
                return createAggregateFunctionWrapper(from_type, checkAndGetDataType<DataTypeAggregateFunction>(to_type.get()));
+            case TypeIndex::Interval:
+                return createIntervalWrapper(from_type, checkAndGetDataType<DataTypeInterval>(to_type.get())->getKind());
            default:
                break;
        }
--- a/src/Functions/FunctionsStringArray.cpp
+++ b/src/Functions/FunctionsStringArray.cpp
@ -30,7 +30,9 @@ DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const DataTypes & argum
 REGISTER_FUNCTION(StringArray)
 {
    factory.registerFunction<FunctionExtractAll>();
-    factory.registerFunction<FunctionAlphaTokens>();
+
+    factory.registerFunction<FunctionSplitByAlpha>();
+    factory.registerAlias("splitByAlpha", FunctionSplitByAlpha::name);
    factory.registerFunction<FunctionSplitByNonAlpha>();
    factory.registerFunction<FunctionSplitByWhitespace>();
    factory.registerFunction<FunctionSplitByChar>();
--- a/src/Functions/FunctionsStringArray.h
+++ b/src/Functions/FunctionsStringArray.h
@ -32,12 +32,12 @@ namespace ErrorCodes

 /** Functions that split strings into an array of strings or vice versa.
  *
-  * splitByChar(sep, s)
-  * splitByString(sep, s)
-  * splitByRegexp(regexp, s)
+  * splitByChar(sep, s[, max_substrings])
+  * splitByString(sep, s[, max_substrings])
+  * splitByRegexp(regexp, s[, max_substrings])
  *
-  * splitByWhitespace(s)      - split the string by whitespace characters
-  * splitByNonAlpha(s)        - split the string by whitespace and punctuation characters
+  * splitByWhitespace(s[, max_substrings])      - split the string by whitespace characters
+  * splitByNonAlpha(s[, max_substrings])        - split the string by whitespace and punctuation characters
  *
  * extractAll(s, regexp)     - select from the string the subsequences corresponding to the regexp.
  * - first subpattern, if regexp has subpattern;
@ -48,7 +48,7 @@ namespace ErrorCodes
  * arrayStringConcat(arr, delimiter)
  * - join an array of strings into one string via a separator.
  *
-  * alphaTokens(s)            - select from the string subsequence `[a-zA-Z]+`.
+  * alphaTokens(s[, max_substrings])            - select from the string subsequence `[a-zA-Z]+`.
  *
  * URL functions are located separately.
  */
@ -59,7 +59,7 @@ using Pos = const char *;

 /// Substring generators. All of them have a common interface.

-class AlphaTokensImpl
+class SplitByAlphaImpl
 {
 private:
    Pos pos;
@ -70,13 +70,16 @@ public:
    static constexpr auto name = "alphaTokens";
    static String getName() { return name; }

-    static bool isVariadic() { return false; }
+    static bool isVariadic() { return true; }

-    static size_t getNumberOfArguments() { return 1; }
+    static size_t getNumberOfArguments() { return 0; }

    /// Check the type of the function's arguments.
    static void checkArguments(const DataTypes & arguments)
    {
+        if (arguments.empty() || arguments.size() > 2)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", getName());
+
        if (!isString(arguments[0]))
            throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -98,6 +101,12 @@ public:
        return 0;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return 1;
+    }
+
    /// Get the next token, if any, or return false.
    bool get(Pos & token_begin, Pos & token_end)
    {
@ -130,12 +139,15 @@ public:
    static constexpr auto name = "splitByNonAlpha";
    static String getName() { return name; }

-    static bool isVariadic() { return false; }
-    static size_t getNumberOfArguments() { return 1; }
+    static bool isVariadic() { return true; }
+    static size_t getNumberOfArguments() { return 0; }

    /// Check the type of the function's arguments.
    static void checkArguments(const DataTypes & arguments)
    {
+        if (arguments.empty() || arguments.size() > 2)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", getName());
+
        if (!isString(arguments[0]))
            throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -157,6 +169,12 @@ public:
        return 0;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return 1;
+    }
+
    /// Get the next token, if any, or return false.
    bool get(Pos & token_begin, Pos & token_end)
    {
@ -189,12 +207,15 @@ public:
    static constexpr auto name = "splitByWhitespace";
    static String getName() { return name; }

-    static bool isVariadic() { return false; }
-    static size_t getNumberOfArguments() { return 1; }
+    static bool isVariadic() { return true; }
+    static size_t getNumberOfArguments() { return 0; }

    /// Check the type of the function's arguments.
    static void checkArguments(const DataTypes & arguments)
    {
+        if (arguments.empty() || arguments.size() > 2)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", getName());
+
        if (!isString(arguments[0]))
            throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -216,6 +237,12 @@ public:
        return 0;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return 1;
+    }
+
    /// Get the next token, if any, or return false.
    bool get(Pos & token_begin, Pos & token_end)
    {
@ -242,10 +269,7 @@ class SplitByCharImpl
 private:
    Pos pos;
    Pos end;
-
    char sep;
-    std::optional<UInt64> max_split;
-    UInt64 curr_split = 0;

 public:
    static constexpr auto name = "splitByChar";
@ -268,13 +292,6 @@ public:
        if (!isString(arguments[1]))
            throw Exception("Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + ". Must be String.",
                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-
-        if (arguments.size() == 3 && !isNativeInteger(arguments[2]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Third argument for function '{}' must be integer, got '{}' instead",
-                getName(),
-                arguments[2]->getName());
    }

    void init(const ColumnsWithTypeAndName & arguments)
@ -292,39 +309,6 @@ public:
            throw Exception("Illegal separator for function " + getName() + ". Must be exactly one byte.", ErrorCodes::BAD_ARGUMENTS);

        sep = sep_str[0];
-
-        if (arguments.size() > 2)
-        {
-            if (!((max_split = getMaxSplit<UInt8>(arguments[2]))
-                || (max_split = getMaxSplit<Int8>(arguments[2]))
-                || (max_split = getMaxSplit<UInt16>(arguments[2]))
-                || (max_split = getMaxSplit<Int16>(arguments[2]))
-                || (max_split = getMaxSplit<UInt32>(arguments[2]))
-                || (max_split = getMaxSplit<Int32>(arguments[2]))
-                || (max_split = getMaxSplit<UInt64>(arguments[2]))
-                || (max_split = getMaxSplit<Int64>(arguments[2]))))
-            {
-                throw Exception(
-                    ErrorCodes::ILLEGAL_COLUMN,
-                    "Illegal column {} of third argument of function {}",
-                    arguments[2].column->getName(),
-                    getName());
-            }
-        }
-    }
-
-    template <typename DataType>
-    std::optional<UInt64> getMaxSplit(const ColumnWithTypeAndName & argument)
-    {
-        const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
-        if (!col)
-            return std::nullopt;
-
-        auto value = col->template getValue<DataType>();
-        if (value < 0)
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of third argument of function {}", argument.column->getName(), getName());
-        return value;
    }

    /// Returns the position of the argument, that is the column of strings
@ -333,11 +317,16 @@ public:
        return 1;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return 2;
+    }
+
    void set(Pos pos_, Pos end_)
    {
        pos = pos_;
        end = end_;
-        curr_split = 0;
    }

    bool get(Pos & token_begin, Pos & token_end)
@ -346,19 +335,12 @@ public:
            return false;

        token_begin = pos;
-        if (unlikely(max_split && curr_split >= *max_split))
-        {
-            token_end = end;
-            pos = nullptr;
-            return true;
-        }
-
        pos = reinterpret_cast<Pos>(memchr(pos, sep, end - pos));
+
        if (pos)
        {
            token_end = pos;
            ++pos;
-            ++curr_split;
        }
        else
            token_end = end;
@ -379,8 +361,8 @@ private:
 public:
    static constexpr auto name = "splitByString";
    static String getName() { return name; }
-    static bool isVariadic() { return false; }
-    static size_t getNumberOfArguments() { return 2; }
+    static bool isVariadic() { return true; }
+    static size_t getNumberOfArguments() { return 0; }

    static void checkArguments(const DataTypes & arguments)
    {
@ -405,6 +387,12 @@ public:
        return 1;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return 2;
+    }
+
    /// Called for each next string.
    void set(Pos pos_, Pos end_)
    {
@ -454,12 +442,13 @@ private:

    Pos pos;
    Pos end;
+
 public:
    static constexpr auto name = "splitByRegexp";
    static String getName() { return name; }

-    static bool isVariadic() { return false; }
-    static size_t getNumberOfArguments() { return 2; }
+    static bool isVariadic() { return true; }
+    static size_t getNumberOfArguments() { return 0; }

    /// Check the type of function arguments.
    static void checkArguments(const DataTypes & arguments)
@ -479,7 +468,6 @@ public:

        if (!col->getValue<String>().empty())
            re = std::make_shared<Regexps::Regexp>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
-
    }

    /// Returns the position of the argument that is the column of strings
@ -488,6 +476,12 @@ public:
        return 1;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return 2;
+    }
+
    /// Called for each next string.
    void set(Pos pos_, Pos end_)
    {
@ -573,6 +567,12 @@ public:
        return 0;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return std::nullopt;
+    }
+
    /// Called for each next string.
    void set(Pos pos_, Pos end_)
    {
@ -630,6 +630,15 @@ public:
    {
        Generator::checkArguments(arguments);

+        const auto max_substrings_pos = Generator::getMaxSubstringsArgumentPosition();
+        if (max_substrings_pos && *max_substrings_pos < arguments.size() && !isNativeInteger(arguments[*max_substrings_pos]))
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "{}-th argument for function '{}' must be integer, got '{}' instead",
+                *max_substrings_pos + 1,
+                getName(),
+                arguments[*max_substrings_pos]->getName());
+
        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
    }

@ -639,6 +648,10 @@ public:
        generator.init(arguments);
        const auto & array_argument = arguments[generator.getStringsArgumentPosition()];

+        /// Whether we need to limit max tokens returned by Generator::get
+        /// If max_substrings is std::nullopt, no limit is applied.
+        auto max_substrings = getMaxSubstrings(arguments);
+
        const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
        const ColumnConst * col_const_str =
                checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
@ -672,9 +685,8 @@ public:
                Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;

                generator.set(pos, end);
-
                size_t j = 0;
-                while (generator.get(token_begin, token_end))
+                while (generator.get(token_begin, token_end) && !(max_substrings && j >= *max_substrings))
                {
                    size_t token_size = token_end - token_begin;

@ -702,7 +714,7 @@ public:
            Pos token_begin = nullptr;
            Pos token_end = nullptr;

-            while (generator.get(token_begin, token_end))
+            while (generator.get(token_begin, token_end) && !(max_substrings && dst.size() >= *max_substrings))
                dst.push_back(String(token_begin, token_end - token_begin));

            return result_type->createColumnConst(col_const_str->size(), dst);
@ -713,6 +725,47 @@ public:
                    + " of arguments of function " + getName(),
                ErrorCodes::ILLEGAL_COLUMN);
    }
+
+private:
+    template <typename DataType>
+    std::optional<Int64> getMaxSubstringsImpl(const ColumnWithTypeAndName & argument) const
+    {
+        const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
+        if (!col)
+            return {};
+
+        auto value = col->template getValue<DataType>();
+        return static_cast<Int64>(value);
+    }
+
+    std::optional<size_t> getMaxSubstrings(const ColumnsWithTypeAndName & arguments) const
+    {
+        const auto pos = Generator::getMaxSubstringsArgumentPosition();
+        if (!pos)
+            return std::nullopt;
+
+        if (*pos >= arguments.size())
+            return std::nullopt;
+
+        std::optional<Int64> max_substrings;
+        if (!((max_substrings = getMaxSubstringsImpl<UInt8>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int8>(arguments[*pos]))
+              || (max_substrings = getMaxSubstringsImpl<UInt16>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int16>(arguments[*pos]))
+              || (max_substrings = getMaxSubstringsImpl<UInt32>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int32>(arguments[*pos]))
+              || (max_substrings = getMaxSubstringsImpl<UInt64>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int64>(arguments[*pos]))))
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {}, which is {}-th argument of function {}",
+                arguments[*pos].column->getName(),
+                *pos + 1,
+                getName());
+
+        /// If max_substrings is negative or zero, tokenize will be applied as many times as possible, which is equivalent to
+        /// no max_substrings argument in function
+        if (max_substrings && *max_substrings <= 0)
+            return std::nullopt;
+
+        return *max_substrings;
+    }
 };


@ -884,7 +937,7 @@ public:
 };


-using FunctionAlphaTokens = FunctionTokens<AlphaTokensImpl>;
+using FunctionSplitByAlpha = FunctionTokens<SplitByAlphaImpl>;
 using FunctionSplitByNonAlpha = FunctionTokens<SplitByNonAlphaImpl>;
 using FunctionSplitByWhitespace = FunctionTokens<SplitByWhitespaceImpl>;
 using FunctionSplitByChar = FunctionTokens<SplitByCharImpl>;
--- a/src/Functions/IsOperation.h
+++ b/src/Functions/IsOperation.h
@ -15,6 +15,7 @@ template <typename, typename> struct DivideIntegralOrZeroImpl;
 template <typename, typename> struct LeastBaseImpl;
 template <typename, typename> struct GreatestBaseImpl;
 template <typename, typename> struct ModuloImpl;
+template <typename, typename> struct PositiveModuloImpl;
 template <typename, typename> struct EqualsOp;
 template <typename, typename> struct NotEqualsOp;
 template <typename, typename> struct LessOrEqualsOp;
@ -53,6 +54,7 @@ struct IsOperation
    static constexpr bool div_int = IsSameOperation<Op, DivideIntegralImpl>::value;
    static constexpr bool div_int_or_zero = IsSameOperation<Op, DivideIntegralOrZeroImpl>::value;
    static constexpr bool modulo = IsSameOperation<Op, ModuloImpl>::value;
+    static constexpr bool positive_modulo = IsSameOperation<Op, PositiveModuloImpl>::value;
    static constexpr bool least = IsSameOperation<Op, LeastBaseImpl>::value;
    static constexpr bool greatest = IsSameOperation<Op, GreatestBaseImpl>::value;

--- a/src/Functions/URL/URLHierarchy.cpp
+++ b/src/Functions/URL/URLHierarchy.cpp
@ -38,6 +38,12 @@ public:
        return 0;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return std::nullopt;
+    }
+
    /// Called for each next string.
    void set(Pos pos_, Pos end_)
    {
--- a/src/Functions/URL/URLPathHierarchy.cpp
+++ b/src/Functions/URL/URLPathHierarchy.cpp
@ -37,6 +37,12 @@ public:
        return 0;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return std::nullopt;
+    }
+
    /// Called for each next string.
    void set(Pos pos_, Pos end_)
    {
--- a/src/Functions/URL/extractURLParameterNames.cpp
+++ b/src/Functions/URL/extractURLParameterNames.cpp
@ -35,6 +35,13 @@ public:
        return 0;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return std::nullopt;
+    }
+
+
    void init(const ColumnsWithTypeAndName & /*arguments*/) {}

    /// Called for each next string.
--- a/src/Functions/URL/extractURLParameters.cpp
+++ b/src/Functions/URL/extractURLParameters.cpp
@ -37,6 +37,12 @@ public:
        return 0;
    }

+    /// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
+    static std::optional<size_t> getMaxSubstringsArgumentPosition()
+    {
+        return std::nullopt;
+    }
+
    /// Called for each next string.
    void set(Pos pos_, Pos end_)
    {
--- a/src/Functions/UTCTimestamp.cpp
+++ b/src/Functions/UTCTimestamp.cpp
@ -0,0 +1,125 @@
+#include <DataTypes/DataTypeDateTime.h>
+
+#include <Functions/IFunction.h>
+#include <Core/DecimalFunctions.h>
+#include <Functions/FunctionFactory.h>
+#include <Core/Field.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+namespace
+{
+
+/// Get the UTC time. (It is a constant, it is evaluated once for the entire query.)
+class ExecutableFunctionUTCTimestamp : public IExecutableFunction
+{
+public:
+    explicit ExecutableFunctionUTCTimestamp(time_t time_) : time_value(time_) {}
+
+    String getName() const override { return "UTCTimestamp"; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        return DataTypeDateTime().createColumnConst(
+                input_rows_count,
+                static_cast<UInt64>(time_value));
+    }
+
+private:
+    time_t time_value;
+};
+
+class FunctionBaseUTCTimestamp : public IFunctionBase
+{
+public:
+    explicit FunctionBaseUTCTimestamp(time_t time_, DataTypes argument_types_, DataTypePtr return_type_)
+        : time_value(time_), argument_types(std::move(argument_types_)), return_type(std::move(return_type_)) {}
+
+    String getName() const override { return "UTCTimestamp"; }
+
+    const DataTypes & getArgumentTypes() const override
+    {
+        return argument_types;
+    }
+
+    const DataTypePtr & getResultType() const override
+    {
+        return return_type;
+    }
+
+    ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override
+    {
+        return std::make_unique<ExecutableFunctionUTCTimestamp>(time_value);
+    }
+
+    bool isDeterministic() const override { return false; }
+    bool isDeterministicInScopeOfQuery() const override { return true; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+
+private:
+    time_t time_value;
+    DataTypes argument_types;
+    DataTypePtr return_type;
+};
+
+class UTCTimestampOverloadResolver : public IFunctionOverloadResolver
+{
+public:
+    static constexpr auto name = "UTCTimestamp";
+
+    String getName() const override { return name; }
+
+    bool isDeterministic() const override { return false; }
+
+    bool isVariadic() const override { return false; }
+
+    size_t getNumberOfArguments() const override { return 0; }
+    static FunctionOverloadResolverPtr create(ContextPtr) { return std::make_unique<UTCTimestampOverloadResolver>(); }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        if (!arguments.empty())
+        {
+            throw Exception("Arguments size of function " + getName() + " should be 0", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+        }
+
+        return std::make_shared<DataTypeDateTime>();
+    }
+
+    FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &) const override
+    {
+        if (!arguments.empty())
+        {
+            throw Exception("Arguments size of function " + getName() + " should be 0", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+        }
+
+        return std::make_unique<FunctionBaseUTCTimestamp>(time(nullptr), DataTypes(), std::make_shared<DataTypeDateTime>("UTC"));
+    }
+};
+
+}
+
+/// UTC_timestamp for MySQL interface support
+REGISTER_FUNCTION(UTCTimestamp)
+{
+    factory.registerFunction<UTCTimestampOverloadResolver>({
+        R"(
+Returns the current date and time at the moment of query analysis. The function is a constant expression.
+Same as `now('UTC')`. Was added only for MySQL support. `now` is preferred.
+
+Example:
+[example:typical]
+)",
+    Documentation::Examples{
+        {"typical", "SELECT UTCTimestamp();"}},
+    Documentation::Categories{"Dates and Times"}}, FunctionFactory::CaseInsensitive);
+    factory.registerAlias("UTC_timestamp", UTCTimestampOverloadResolver::name, FunctionFactory::CaseInsensitive);
+}
+
+}
--- a/src/Functions/canonicalRand.cpp
+++ b/src/Functions/canonicalRand.cpp
@ -0,0 +1,59 @@
+#include <Common/randomSeed.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsRandom.h>
+#include <pcg-random/pcg_random.hpp>
+
+namespace DB
+{
+
+namespace
+{
+
+struct CanonicalRandImpl
+{
+    static void execute(char * output, size_t size)
+    {
+        pcg64_fast rng1(randomSeed());
+        pcg64_fast rng2(randomSeed());
+        std::uniform_real_distribution<Float64> distribution1(min, max);
+        std::uniform_real_distribution<Float64> distribution2(min, max);
+
+        for (const char * end = output + size; output < end; output += 16)
+        {
+            unalignedStore<Float64>(output, distribution1(rng1));
+            unalignedStore<Float64>(output + 8, distribution2(rng2));
+        }
+    }
+    /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end.
+
+private:
+    const static constexpr Float64 min = 0;
+    const static constexpr Float64 max = 1;
+};
+
+
+struct NameCanonicalRand
+{
+    static constexpr auto name = "canonicalRand";
+};
+
+class FunctionCanonicalRand : public FunctionRandomImpl<CanonicalRandImpl, Float64, NameCanonicalRand>
+{
+public:
+    static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionCanonicalRand>(); }
+};
+
+}
+
+REGISTER_FUNCTION(CanonicalRand)
+{
+    factory.registerFunction<FunctionCanonicalRand>({
+        R"(
+The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1).
+Non-deterministic. Return type is Float64.
+        )",
+        Documentation::Examples{{"canonicalRand", "SELECT canonicalRand()"}},
+        Documentation::Categories{"Mathematical"}});
+}
+
+}
--- a/src/Functions/dateDiff.cpp
+++ b/src/Functions/dateDiff.cpp
@ -34,6 +34,164 @@ namespace ErrorCodes
 namespace
 {

+class DateDiffImpl
+{
+public:
+    using ColumnDateTime64 = ColumnDecimal<DateTime64>;
+
+    explicit DateDiffImpl(const String & name_) : name(name_) {}
+
+    template <typename Transform>
+    void dispatchForColumns(
+        const IColumn & x, const IColumn & y,
+        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
+        ColumnInt64::Container & result) const
+    {
+        if (const auto * x_vec_16 = checkAndGetColumn<ColumnDate>(&x))
+            dispatchForSecondColumn<Transform>(*x_vec_16, y, timezone_x, timezone_y, result);
+        else if (const auto * x_vec_32 = checkAndGetColumn<ColumnDateTime>(&x))
+            dispatchForSecondColumn<Transform>(*x_vec_32, y, timezone_x, timezone_y, result);
+        else if (const auto * x_vec_32_s = checkAndGetColumn<ColumnDate32>(&x))
+            dispatchForSecondColumn<Transform>(*x_vec_32_s, y, timezone_x, timezone_y, result);
+        else if (const auto * x_vec_64 = checkAndGetColumn<ColumnDateTime64>(&x))
+            dispatchForSecondColumn<Transform>(*x_vec_64, y, timezone_x, timezone_y, result);
+        else if (const auto * x_const_16 = checkAndGetColumnConst<ColumnDate>(&x))
+            dispatchConstForSecondColumn<Transform>(x_const_16->getValue<UInt16>(), y, timezone_x, timezone_y, result);
+        else if (const auto * x_const_32 = checkAndGetColumnConst<ColumnDateTime>(&x))
+            dispatchConstForSecondColumn<Transform>(x_const_32->getValue<UInt32>(), y, timezone_x, timezone_y, result);
+        else if (const auto * x_const_32_s = checkAndGetColumnConst<ColumnDate32>(&x))
+            dispatchConstForSecondColumn<Transform>(x_const_32_s->getValue<Int32>(), y, timezone_x, timezone_y, result);
+        else if (const auto * x_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&x))
+            dispatchConstForSecondColumn<Transform>(x_const_64->getValue<DecimalField<DateTime64>>(), y, timezone_x, timezone_y, result);
+        else
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64",
+                name);
+    }
+
+    template <typename Transform, typename LeftColumnType>
+    void dispatchForSecondColumn(
+        const LeftColumnType & x, const IColumn & y,
+        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
+        ColumnInt64::Container & result) const
+    {
+        if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&y))
+            vectorVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, result);
+        else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&y))
+            vectorVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, result);
+        else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&y))
+            vectorVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, result);
+        else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&y))
+            vectorVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, result);
+        else if (const auto * y_const_16 = checkAndGetColumnConst<ColumnDate>(&y))
+            vectorConstant<Transform>(x, y_const_16->getValue<UInt16>(), timezone_x, timezone_y, result);
+        else if (const auto * y_const_32 = checkAndGetColumnConst<ColumnDateTime>(&y))
+            vectorConstant<Transform>(x, y_const_32->getValue<UInt32>(), timezone_x, timezone_y, result);
+        else if (const auto * y_const_32_s = checkAndGetColumnConst<ColumnDate32>(&y))
+            vectorConstant<Transform>(x, y_const_32_s->getValue<Int32>(), timezone_x, timezone_y, result);
+        else if (const auto * y_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&y))
+            vectorConstant<Transform>(x, y_const_64->getValue<DecimalField<DateTime64>>(), timezone_x, timezone_y, result);
+        else
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64",
+                name);
+    }
+
+    template <typename Transform, typename T1>
+    void dispatchConstForSecondColumn(
+        T1 x, const IColumn & y,
+        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
+        ColumnInt64::Container & result) const
+    {
+        if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&y))
+            constantVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, result);
+        else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&y))
+            constantVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, result);
+        else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&y))
+            constantVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, result);
+        else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&y))
+            constantVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, result);
+        else
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64",
+                name);
+    }
+
+    template <typename Transform, typename LeftColumnType, typename RightColumnType>
+    void vectorVector(
+        const LeftColumnType & x, const RightColumnType & y,
+        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
+        ColumnInt64::Container & result) const
+    {
+        const auto & x_data = x.getData();
+        const auto & y_data = y.getData();
+
+        const auto transform_x = TransformDateTime64<Transform>(getScale(x));
+        const auto transform_y = TransformDateTime64<Transform>(getScale(y));
+        for (size_t i = 0, size = x.size(); i < size; ++i)
+                result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y);
+    }
+
+    template <typename Transform, typename LeftColumnType, typename T2>
+    void vectorConstant(
+        const LeftColumnType & x, T2 y,
+        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
+        ColumnInt64::Container & result) const
+    {
+        const auto & x_data = x.getData();
+        const auto transform_x = TransformDateTime64<Transform>(getScale(x));
+        const auto transform_y = TransformDateTime64<Transform>(getScale(y));
+        const auto y_value = stripDecimalFieldValue(y);
+
+        for (size_t i = 0, size = x.size(); i < size; ++i)
+            result[i] = calculate(transform_x, transform_y, x_data[i], y_value, timezone_x, timezone_y);
+    }
+
+    template <typename Transform, typename T1, typename RightColumnType>
+    void constantVector(
+        T1 x, const RightColumnType & y,
+        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
+        ColumnInt64::Container & result) const
+    {
+        const auto & y_data = y.getData();
+        const auto transform_x = TransformDateTime64<Transform>(getScale(x));
+        const auto transform_y = TransformDateTime64<Transform>(getScale(y));
+        const auto x_value = stripDecimalFieldValue(x);
+
+        for (size_t i = 0, size = y.size(); i < size; ++i)
+            result[i] = calculate(transform_x, transform_y, x_value, y_data[i], timezone_x, timezone_y);
+    }
+
+    template <typename TransformX, typename TransformY, typename T1, typename T2>
+    Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const
+    {
+        return static_cast<Int64>(transform_y.execute(y, timezone_y))
+                - static_cast<Int64>(transform_x.execute(x, timezone_x));
+    }
+
+    template <typename T>
+    static UInt32 getScale(const T & v)
+    {
+        if constexpr (std::is_same_v<T, ColumnDateTime64>)
+            return v.getScale();
+        else if constexpr (std::is_same_v<T, DecimalField<DateTime64>>)
+            return v.getScale();
+
+        return 0;
+    }
+    template <typename T>
+    static auto stripDecimalFieldValue(T && v)
+    {
+        if constexpr (std::is_same_v<std::decay_t<T>, DecimalField<DateTime64>>)
+            return v.getValue();
+        else
+            return v;
+    }
+private:
+    String name;
+};
+
+
 /** dateDiff('unit', t1, t2, [timezone])
  * t1 and t2 can be Date or DateTime
  *
@ -112,175 +270,89 @@ public:
        const auto & timezone_y = extractTimeZoneFromFunctionArguments(arguments, 3, 2);

        if (unit == "year" || unit == "yy" || unit == "yyyy")
-            dispatchForColumns<ToRelativeYearNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
+            impl.dispatchForColumns<ToRelativeYearNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
        else if (unit == "quarter" || unit == "qq" || unit == "q")
-            dispatchForColumns<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
+            impl.dispatchForColumns<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
        else if (unit == "month" || unit == "mm" || unit == "m")
-            dispatchForColumns<ToRelativeMonthNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
+            impl.dispatchForColumns<ToRelativeMonthNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
        else if (unit == "week" || unit == "wk" || unit == "ww")
-            dispatchForColumns<ToRelativeWeekNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
+            impl.dispatchForColumns<ToRelativeWeekNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
        else if (unit == "day" || unit == "dd" || unit == "d")
-            dispatchForColumns<ToRelativeDayNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
+            impl.dispatchForColumns<ToRelativeDayNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
        else if (unit == "hour" || unit == "hh" || unit == "h")
-            dispatchForColumns<ToRelativeHourNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
+            impl.dispatchForColumns<ToRelativeHourNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
        else if (unit == "minute" || unit == "mi" || unit == "n")
-            dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
+            impl.dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
        else if (unit == "second" || unit == "ss" || unit == "s")
-            dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
+            impl.dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
        else
            throw Exception(ErrorCodes::BAD_ARGUMENTS,
                "Function {} does not support '{}' unit", getName(), unit);

        return res;
    }
-
 private:
-    template <typename Transform>
-    void dispatchForColumns(
-        const IColumn & x, const IColumn & y,
-        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
-        ColumnInt64::Container & result) const
+    DateDiffImpl impl{name};
+};
+
+
+/** TimeDiff(t1, t2)
+  * t1 and t2 can be Date or DateTime
+  */
+class FunctionTimeDiff : public IFunction
+{
+    using ColumnDateTime64 = ColumnDecimal<DateTime64>;
+public:
+    static constexpr auto name = "TimeDiff";
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionTimeDiff>(); }
+
+    String getName() const override
    {
-        if (const auto * x_vec_16 = checkAndGetColumn<ColumnDate>(&x))
-            dispatchForSecondColumn<Transform>(*x_vec_16, y, timezone_x, timezone_y, result);
-        else if (const auto * x_vec_32 = checkAndGetColumn<ColumnDateTime>(&x))
-            dispatchForSecondColumn<Transform>(*x_vec_32, y, timezone_x, timezone_y, result);
-        else if (const auto * x_vec_32_s = checkAndGetColumn<ColumnDate32>(&x))
-            dispatchForSecondColumn<Transform>(*x_vec_32_s, y, timezone_x, timezone_y, result);
-        else if (const auto * x_vec_64 = checkAndGetColumn<ColumnDateTime64>(&x))
-            dispatchForSecondColumn<Transform>(*x_vec_64, y, timezone_x, timezone_y, result);
-        else if (const auto * x_const_16 = checkAndGetColumnConst<ColumnDate>(&x))
-            dispatchConstForSecondColumn<Transform>(x_const_16->getValue<UInt16>(), y, timezone_x, timezone_y, result);
-        else if (const auto * x_const_32 = checkAndGetColumnConst<ColumnDateTime>(&x))
-            dispatchConstForSecondColumn<Transform>(x_const_32->getValue<UInt32>(), y, timezone_x, timezone_y, result);
-        else if (const auto * x_const_32_s = checkAndGetColumnConst<ColumnDate32>(&x))
-            dispatchConstForSecondColumn<Transform>(x_const_32_s->getValue<Int32>(), y, timezone_x, timezone_y, result);
-        else if (const auto * x_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&x))
-            dispatchConstForSecondColumn<Transform>(x_const_64->getValue<DecimalField<DateTime64>>(), y, timezone_x, timezone_y, result);
-        else
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column for first argument of function {}, must be Date, Date32, DateTime or DateTime64",
+        return name;
+    }
+
+    bool isVariadic() const override { return false; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+    size_t getNumberOfArguments() const override { return 2; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (arguments.size() != 2)
+            throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+                + toString(arguments.size()) + ", should be 2",
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        if (!isDate(arguments[0]) && !isDate32(arguments[0]) && !isDateTime(arguments[0]) && !isDateTime64(arguments[0]))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "First argument for function {} must be Date, Date32, DateTime or DateTime64",
                getName());
+
+        if (!isDate(arguments[1]) && !isDate32(arguments[1]) && !isDateTime(arguments[1]) && !isDateTime64(arguments[1]))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Second argument for function {} must be Date, Date32, DateTime or DateTime64",
+                getName()
+                );
+
+        return std::make_shared<DataTypeInt64>();
    }

-    template <typename Transform, typename LeftColumnType>
-    void dispatchForSecondColumn(
-        const LeftColumnType & x, const IColumn & y,
-        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
-        ColumnInt64::Container & result) const
+    bool useDefaultImplementationForConstants() const override { return true; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
    {
-        if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&y))
-            vectorVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, result);
-        else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&y))
-            vectorVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, result);
-        else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&y))
-            vectorVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, result);
-        else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&y))
-            vectorVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, result);
-        else if (const auto * y_const_16 = checkAndGetColumnConst<ColumnDate>(&y))
-            vectorConstant<Transform>(x, y_const_16->getValue<UInt16>(), timezone_x, timezone_y, result);
-        else if (const auto * y_const_32 = checkAndGetColumnConst<ColumnDateTime>(&y))
-            vectorConstant<Transform>(x, y_const_32->getValue<UInt32>(), timezone_x, timezone_y, result);
-        else if (const auto * y_const_32_s = checkAndGetColumnConst<ColumnDate32>(&y))
-            vectorConstant<Transform>(x, y_const_32_s->getValue<Int32>(), timezone_x, timezone_y, result);
-        else if (const auto * y_const_64 = checkAndGetColumnConst<ColumnDateTime64>(&y))
-            vectorConstant<Transform>(x, y_const_64->getValue<DecimalField<DateTime64>>(), timezone_x, timezone_y, result);
-        else
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64",
-                getName());
-    }
-
-    template <typename Transform, typename T1>
-    void dispatchConstForSecondColumn(
-        T1 x, const IColumn & y,
-        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
-        ColumnInt64::Container & result) const
-    {
-        if (const auto * y_vec_16 = checkAndGetColumn<ColumnDate>(&y))
-            constantVector<Transform>(x, *y_vec_16, timezone_x, timezone_y, result);
-        else if (const auto * y_vec_32 = checkAndGetColumn<ColumnDateTime>(&y))
-            constantVector<Transform>(x, *y_vec_32, timezone_x, timezone_y, result);
-        else if (const auto * y_vec_32_s = checkAndGetColumn<ColumnDate32>(&y))
-            constantVector<Transform>(x, *y_vec_32_s, timezone_x, timezone_y, result);
-        else if (const auto * y_vec_64 = checkAndGetColumn<ColumnDateTime64>(&y))
-            constantVector<Transform>(x, *y_vec_64, timezone_x, timezone_y, result);
-        else
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column for second argument of function {}, must be Date, Date32, DateTime or DateTime64",
-                getName());
-    }
-
-    template <typename Transform, typename LeftColumnType, typename RightColumnType>
-    void vectorVector(
-        const LeftColumnType & x, const RightColumnType & y,
-        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
-        ColumnInt64::Container & result) const
-    {
-        const auto & x_data = x.getData();
-        const auto & y_data = y.getData();
-
-        const auto transform_x = TransformDateTime64<Transform>(getScale(x));
-        const auto transform_y = TransformDateTime64<Transform>(getScale(y));
-        for (size_t i = 0, size = x.size(); i < size; ++i)
-                result[i] = calculate(transform_x, transform_y, x_data[i], y_data[i], timezone_x, timezone_y);
-    }
-
-    template <typename Transform, typename LeftColumnType, typename T2>
-    void vectorConstant(
-        const LeftColumnType & x, T2 y,
-        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
-        ColumnInt64::Container & result) const
-    {
-        const auto & x_data = x.getData();
-        const auto transform_x = TransformDateTime64<Transform>(getScale(x));
-        const auto transform_y = TransformDateTime64<Transform>(getScale(y));
-        const auto y_value = stripDecimalFieldValue(y);
-
-        for (size_t i = 0, size = x.size(); i < size; ++i)
-            result[i] = calculate(transform_x, transform_y, x_data[i], y_value, timezone_x, timezone_y);
-    }
-
-    template <typename Transform, typename T1, typename RightColumnType>
-    void constantVector(
-        T1 x, const RightColumnType & y,
-        const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y,
-        ColumnInt64::Container & result) const
-    {
-        const auto & y_data = y.getData();
-        const auto transform_x = TransformDateTime64<Transform>(getScale(x));
-        const auto transform_y = TransformDateTime64<Transform>(getScale(y));
-        const auto x_value = stripDecimalFieldValue(x);
-
-        for (size_t i = 0, size = y.size(); i < size; ++i)
-            result[i] = calculate(transform_x, transform_y, x_value, y_data[i], timezone_x, timezone_y);
-    }
-
-    template <typename TransformX, typename TransformY, typename T1, typename T2>
-    Int64 calculate(const TransformX & transform_x, const TransformY & transform_y, T1 x, T2 y, const DateLUTImpl & timezone_x, const DateLUTImpl & timezone_y) const
-    {
-        return static_cast<Int64>(transform_y.execute(y, timezone_y))
-             - static_cast<Int64>(transform_x.execute(x, timezone_x));
-    }
-
-    template <typename T>
-    static UInt32 getScale(const T & v)
-    {
-        if constexpr (std::is_same_v<T, ColumnDateTime64>)
-            return v.getScale();
-        else if constexpr (std::is_same_v<T, DecimalField<DateTime64>>)
-            return v.getScale();
-
-        return 0;
-    }
-    template <typename T>
-    static auto stripDecimalFieldValue(T && v)
-    {
-        if constexpr (std::is_same_v<std::decay_t<T>, DecimalField<DateTime64>>)
-            return v.getValue();
-        else
-            return v;
+        const IColumn & x = *arguments[0].column;
+        const IColumn & y = *arguments[1].column;
+
+        size_t rows = input_rows_count;
+        auto res = ColumnInt64::create(rows);
+
+        impl.dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, DateLUT::instance(), DateLUT::instance(), res->getData());
+
+        return res;
    }
+private:
+    DateDiffImpl impl{name};
 };

 }
@ -290,4 +362,18 @@ REGISTER_FUNCTION(DateDiff)
    factory.registerFunction<FunctionDateDiff>({}, FunctionFactory::CaseInsensitive);
 }

+REGISTER_FUNCTION(TimeDiff)
+{
+    factory.registerFunction<FunctionTimeDiff>({R"(
+Returns the difference between two dates or dates with time values. The difference is calculated in seconds units (see toRelativeSecondNum).
+It is same as `dateDiff` and was added only for MySQL support. `dateDiff` is preferred.
+
+Example:
+[example:typical]
+)",
+    Documentation::Examples{
+        {"typical", "SELECT timeDiff(UTCTimestamp(), now());"}},
+    Documentation::Categories{"Dates and Times"}}, FunctionFactory::CaseInsensitive);
+}
+
 }
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@ -904,6 +904,7 @@ private:

            if (cond_col)
            {
+                arg_else_column = arg_else_column->convertToFullColumnIfConst();
                auto result_column = IColumn::mutate(std::move(arg_else_column));
                if (else_is_short)
                    result_column->expand(cond_col->getData(), true);
@ -941,6 +942,7 @@ private:

            if (cond_col)
            {
+                arg_then_column = arg_then_column->convertToFullColumnIfConst();
                auto result_column = IColumn::mutate(std::move(arg_then_column));
                if (then_is_short)
                    result_column->expand(cond_col->getData(), false);
--- a/src/Functions/modulo.cpp
+++ b/src/Functions/modulo.cpp
@ -133,6 +133,7 @@ struct ModuloLegacyByConstantImpl : ModuloByConstantImpl<A, B>
 {
    using Op = ModuloLegacyImpl<A, B>;
 };
+
 }

 /** Specializations are specified for dividing numbers of the type UInt64 and UInt32 by the numbers of the same sign.
@ -179,4 +180,22 @@ REGISTER_FUNCTION(ModuloLegacy)
    factory.registerFunction<FunctionModuloLegacy>();
 }

+struct NamePositiveModulo
+{
+    static constexpr auto name = "positive_modulo";
+};
+using FunctionPositiveModulo = BinaryArithmeticOverloadResolver<PositiveModuloImpl, NamePositiveModulo, false>;
+
+REGISTER_FUNCTION(PositiveModulo)
+{
+    factory.registerFunction<FunctionPositiveModulo>(
+        {
+            R"(
+Calculates the remainder when dividing `a` by `b`. Similar to function `modulo` except that `positive_modulo` always return non-negative number.
+        )",
+            Documentation::Examples{{"positive_modulo", "SELECT positive_modulo(-1000, 32);"}},
+            Documentation::Categories{"Arithmetic"}},
+        FunctionFactory::CaseInsensitive);
+}
+
 }
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@ -964,15 +964,16 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
        components.whole = components.whole / common::exp10_i32(scale);
    }

+    bool is_ok = true;
    if constexpr (std::is_same_v<ReturnType, void>)
        datetime64 = DecimalUtils::decimalFromComponents<DateTime64>(components, scale);
    else
-        DecimalUtils::tryGetDecimalFromComponents<DateTime64>(components, scale, datetime64);
+        is_ok = DecimalUtils::tryGetDecimalFromComponents<DateTime64>(components, scale, datetime64);

    datetime64 *= negative_multiplier;


-    return ReturnType(true);
+    return ReturnType(is_ok);
 }

 inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance())
--- a/src/IO/parseDateTimeBestEffort.cpp
+++ b/src/IO/parseDateTimeBestEffort.cpp
@ -659,6 +659,9 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf
        fractional *= common::exp10_i64(scale - subsecond.digits);
    }

+    if constexpr (std::is_same_v<ReturnType, bool>)
+        return DecimalUtils::tryGetDecimalFromComponents<DateTime64>(whole, fractional, scale, res);
+
    res = DecimalUtils::decimalFromComponents<DateTime64>(whole, fractional, scale);
    return ReturnType(true);
 }
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@ -1169,6 +1169,17 @@ ActionsDAGPtr ActionsDAG::makeAddingColumnActions(ColumnWithTypeAndName column)

 ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
 {
+    first.mergeInplace(std::move(second));
+
+    /// Drop unused inputs and, probably, some actions.
+    first.removeUnusedActions();
+
+    return std::make_shared<ActionsDAG>(std::move(first));
+}
+
+void ActionsDAG::mergeInplace(ActionsDAG && second)
+{
+    auto & first = *this;
    /// first: x (1), x (2), y ==> x (2), z, x (3)
    /// second: x (1), x (2), x (3) ==> x (3), x (2), x (1)
    /// merge: x (1), x (2), x (3), y =(first)=> x (2), z, x (4), x (3) =(second)=> x (3), x (4), x (2), z
@ -1256,11 +1267,6 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second)
    first.nodes.splice(first.nodes.end(), std::move(second.nodes));

    first.projected_output = second.projected_output;
-
-    /// Drop unused inputs and, probably, some actions.
-    first.removeUnusedActions();
-
-    return std::make_shared<ActionsDAG>(std::move(first));
 }

 ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@ -273,6 +273,11 @@ public:
    /// Otherwise, any two actions may be combined.
    static ActionsDAGPtr merge(ActionsDAG && first, ActionsDAG && second);

+    /// The result is similar to merge(*this, second);
+    /// Invariant : no nodes are removed from the first (this) DAG.
+    /// So that pointers to nodes are kept valid.
+    void mergeInplace(ActionsDAG && second);
+
    using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;

    /// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
--- a/src/Interpreters/AsynchronousMetricLog.cpp
+++ b/src/Interpreters/AsynchronousMetricLog.cpp
@ -47,7 +47,7 @@ void AsynchronousMetricLog::addValues(const AsynchronousMetricValues & values)
    for (const auto & [key, value] : values)
    {
        element.metric_name = key;
-        element.value = round(value * precision) / precision;
+        element.value = round(value.value * precision) / precision;

        add(element);
    }
--- a/src/Interpreters/AsynchronousMetricLog.h
+++ b/src/Interpreters/AsynchronousMetricLog.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Interpreters/SystemLog.h>
+#include <Interpreters/AsynchronousMetrics.h>
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentMetrics.h>
 #include <Core/NamesAndTypes.h>
@ -14,12 +15,8 @@
 namespace DB
 {

-using AsynchronousMetricValue = double;
-using AsynchronousMetricValues = std::unordered_map<std::string, AsynchronousMetricValue>;
-
 /** AsynchronousMetricLog is a log of metric values measured at regular time interval.
  */
-
 struct AsynchronousMetricLogElement
 {
    UInt16 event_date;
--- a/src/Interpreters/AsynchronousMetrics.cpp
+++ b/src/Interpreters/AsynchronousMetrics.cpp
@ -15,7 +15,6 @@
 #include <Common/getCurrentProcessFDCount.h>
 #include <Common/getMaxFileDescriptorCount.h>
 #include <Interpreters/Cache/FileCache.h>
-#include <Server/ProtocolServerAdapter.h>
 #include <Storages/MarkCache.h>
 #include <Storages/StorageMergeTree.h>
 #include <Storages/StorageReplicatedMergeTree.h>
@ -24,15 +23,16 @@
 #include <IO/MMappedFileCache.h>
 #include <IO/ReadHelpers.h>
 #include <Databases/IDatabase.h>
+#include <base/errnoToString.h>
 #include <chrono>

-
 #include "config.h"

 #if USE_JEMALLOC
 #    include <jemalloc/jemalloc.h>
 #endif

+
 namespace DB
 {

@ -123,9 +123,9 @@ void AsynchronousMetrics::openSensors()
        {
            LOG_WARNING(
                &Poco::Logger::get("AsynchronousMetrics"),
-                "Thermal monitor '{}' exists but could not be read, error {}.",
+                "Thermal monitor '{}' exists but could not be read: {}.",
                thermal_device_index,
-                e.getErrno());
+                errnoToString(e.getErrno()));
            continue;
        }

@ -252,10 +252,10 @@ void AsynchronousMetrics::openSensorsChips()
            {
                LOG_WARNING(
                    &Poco::Logger::get("AsynchronousMetrics"),
-                    "Hardware monitor '{}', sensor '{}' exists but could not be read, error {}.",
+                    "Hardware monitor '{}', sensor '{}' exists but could not be read: {}.",
                    hwmon_name,
                    sensor_name,
-                    e.getErrno());
+                    errnoToString(e.getErrno()));
                continue;
            }

@ -386,14 +386,15 @@ uint64_t updateJemallocEpoch()
 }

 template <typename Value>
-static Value saveJemallocMetricImpl(AsynchronousMetricValues & values,
+static Value saveJemallocMetricImpl(
+    AsynchronousMetricValues & values,
    const std::string & jemalloc_full_name,
    const std::string & clickhouse_full_name)
 {
    Value value{};
    size_t size = sizeof(value);
    mallctl(jemalloc_full_name.c_str(), &value, &size, nullptr, 0);
-    values[clickhouse_full_name] = value;
+    values[clickhouse_full_name] = AsynchronousMetricValue(value, "An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html");
    return value;
 }

@ -570,85 +571,93 @@ void AsynchronousMetrics::update(TimePoint update_time)
    previous_update_time = update_time;

    /// This is also a good indicator of system responsiveness.
-    new_values["Jitter"] = std::chrono::duration_cast<std::chrono::nanoseconds>(current_time - update_time).count() / 1e9;
+    new_values["Jitter"] = { std::chrono::duration_cast<std::chrono::nanoseconds>(current_time - update_time).count() / 1e9,
+        "The difference in time the thread for calculation of the asynchronous metrics was scheduled to wake up and the time it was in fact, woken up."
+        " A proxy-indicator of overall system latency and responsiveness." };

+    if (auto mark_cache = getContext()->getMarkCache())
    {
-        if (auto mark_cache = getContext()->getMarkCache())
-        {
-            new_values["MarkCacheBytes"] = mark_cache->weight();
-            new_values["MarkCacheFiles"] = mark_cache->count();
-        }
+        new_values["MarkCacheBytes"] = { mark_cache->weight(), "Total size of mark cache in bytes" };
+        new_values["MarkCacheFiles"] = { mark_cache->count(), "Total number of mark files cached in the mark cache" };
    }

+    if (auto uncompressed_cache = getContext()->getUncompressedCache())
    {
-        if (auto uncompressed_cache = getContext()->getUncompressedCache())
-        {
-            new_values["UncompressedCacheBytes"] = uncompressed_cache->weight();
-            new_values["UncompressedCacheCells"] = uncompressed_cache->count();
-        }
+        new_values["UncompressedCacheBytes"] = { uncompressed_cache->weight(),
+            "Total size of uncompressed cache in bytes. Uncompressed cache does not usually improve the performance and should be mostly avoided." };
+        new_values["UncompressedCacheCells"] = { uncompressed_cache->count(),
+            "Total number of entries in the uncompressed cache. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided." };
    }

+    if (auto index_mark_cache = getContext()->getIndexMarkCache())
    {
-        if (auto index_mark_cache = getContext()->getIndexMarkCache())
-        {
-            new_values["IndexMarkCacheBytes"] = index_mark_cache->weight();
-            new_values["IndexMarkCacheFiles"] = index_mark_cache->count();
-        }
+        new_values["IndexMarkCacheBytes"] = { index_mark_cache->weight(), "Total size of mark cache for secondary indices in bytes." };
+        new_values["IndexMarkCacheFiles"] = { index_mark_cache->count(), "Total number of mark files cached in the mark cache for secondary indices." };
    }

+    if (auto index_uncompressed_cache = getContext()->getIndexUncompressedCache())
    {
-        if (auto index_uncompressed_cache = getContext()->getIndexUncompressedCache())
-        {
-            new_values["IndexUncompressedCacheBytes"] = index_uncompressed_cache->weight();
-            new_values["IndexUncompressedCacheCells"] = index_uncompressed_cache->count();
-        }
+        new_values["IndexUncompressedCacheBytes"] = { index_uncompressed_cache->weight(),
+            "Total size of uncompressed cache in bytes for secondary indices. Uncompressed cache does not usually improve the performance and should be mostly avoided." };
+        new_values["IndexUncompressedCacheCells"] = { index_uncompressed_cache->count(),
+            "Total number of entries in the uncompressed cache for secondary indices. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided." };
    }

+    if (auto mmap_cache = getContext()->getMMappedFileCache())
    {
-        if (auto mmap_cache = getContext()->getMMappedFileCache())
-        {
-            new_values["MMapCacheCells"] = mmap_cache->count();
-        }
+        new_values["MMapCacheCells"] = { mmap_cache->count(),
+            "The number of files opened with `mmap` (mapped in memory)."
+            " This is used for queries with the setting `local_filesystem_read_method` set to  `mmap`."
+            " The files opened with `mmap` are kept in the cache to avoid costly TLB flushes."};
    }

    {
        auto caches = FileCacheFactory::instance().getAll();
+        size_t total_bytes = 0;
+        size_t total_files = 0;
+
        for (const auto & [_, cache_data] : caches)
        {
-            new_values["FilesystemCacheBytes"] = cache_data->cache->getUsedCacheSize();
-            new_values["FilesystemCacheFiles"] = cache_data->cache->getFileSegmentsNum();
+            total_bytes += cache_data->cache->getUsedCacheSize();
+            total_files += cache_data->cache->getFileSegmentsNum();
        }
+
+        new_values["FilesystemCacheBytes"] = { total_bytes,
+            "Total bytes in the `cache` virtual filesystem. This cache is hold on disk." };
+        new_values["FilesystemCacheFiles"] = { total_files,
+            "Total number of cached file segments in the `cache` virtual filesystem. This cache is hold on disk." };
    }

 #if USE_ROCKSDB
+    if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache())
    {
-        if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache())
-        {
-            new_values["MergeTreeMetadataCacheSize"] = metadata_cache->getEstimateNumKeys();
-        }
+        new_values["MergeTreeMetadataCacheSize"] = { metadata_cache->getEstimateNumKeys(),
+            "The size of the metadata cache for tables. This cache is experimental and not used in production." };
    }
 #endif

 #if USE_EMBEDDED_COMPILER
+    if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
    {
-        if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
-        {
-            new_values["CompiledExpressionCacheBytes"] = compiled_expression_cache->weight();
-            new_values["CompiledExpressionCacheCount"]  = compiled_expression_cache->count();
-        }
+        new_values["CompiledExpressionCacheBytes"] = { compiled_expression_cache->weight(),
+            "Total bytes used for the cache of JIT-compiled code." };
+        new_values["CompiledExpressionCacheCount"] = { compiled_expression_cache->count(),
+            "Total entries in the cache of JIT-compiled code." };
    }
 #endif

+    new_values["Uptime"] = { getContext()->getUptimeSeconds(),
+        "The server uptime in seconds. It includes the time spent for server initialization before accepting connections." };

-    new_values["Uptime"] = getContext()->getUptimeSeconds();
-
+    if (const auto stats = getHashTablesCacheStatistics())
    {
-        if (const auto stats = getHashTablesCacheStatistics())
-        {
-            new_values["HashTableStatsCacheEntries"] = stats->entries;
-            new_values["HashTableStatsCacheHits"] = stats->hits;
-            new_values["HashTableStatsCacheMisses"] = stats->misses;
-        }
+        new_values["HashTableStatsCacheEntries"] = { stats->entries,
+            "The number of entries in the cache of hash table sizes."
+            " The cache for hash table sizes is used for predictive optimization of GROUP BY." };
+        new_values["HashTableStatsCacheHits"] = { stats->hits,
+            "The number of times the prediction of a hash table size was correct." };
+        new_values["HashTableStatsCacheMisses"] = { stats->misses,
+            "The number of times the prediction of a hash table size was incorrect." };
    }

 #if defined(OS_LINUX) || defined(OS_FREEBSD)
@ -660,7 +669,7 @@ void AsynchronousMetrics::update(TimePoint update_time)
    // the following calls will return stale values. It increments and returns
    // the current epoch number, which might be useful to log as a sanity check.
    auto epoch = updateJemallocEpoch();
-    new_values["jemalloc.epoch"] = epoch;
+    new_values["jemalloc.epoch"] = { epoch, "An internal incremental update number of the statistics of jemalloc (Jason Evans' memory allocator), used in all other `jemalloc` metrics." };

    // Collect the statistics themselves.
    saveJemallocMetric<size_t>(new_values, "allocated");
@ -685,13 +694,24 @@ void AsynchronousMetrics::update(TimePoint update_time)
    {
        MemoryStatisticsOS::Data & data = memory_statistics_data;

-        new_values["MemoryVirtual"] = data.virt;
-        new_values["MemoryResident"] = data.resident;
+        new_values["MemoryVirtual"] = { data.virt,
+            "The size of the virtual address space allocated by the server process, in bytes."
+            " The size of the virtual address space is usually much greater than the physical memory consumption, and should not be used as an estimate for the memory consumption."
+            " The large values of this metric are totally normal, and makes only technical sense."};
+        new_values["MemoryResident"] = { data.resident,
+            "The amount of physical memory used by the server process, in bytes." };
 #if !defined(OS_FREEBSD)
-        new_values["MemoryShared"] = data.shared;
+        new_values["MemoryShared"] = { data.shared,
+            "The amount of memory used by the server process, that is also shared by another processes, in bytes."
+            " ClickHouse does not use shared memory, but some memory can be labeled by OS as shared for its own reasons."
+            " This metric does not make a lot of sense to watch, and it exists only for completeness reasons."};
 #endif
-        new_values["MemoryCode"] = data.code;
-        new_values["MemoryDataAndStack"] = data.data_and_stack;
+        new_values["MemoryCode"] = { data.code,
+            "The amount of virtual memory mapped for the pages of machine code of the server process, in bytes." };
+        new_values["MemoryDataAndStack"] = { data.data_and_stack,
+            "The amount of virtual memory mapped for the use of stack and for the allocated memory, in bytes."
+            " It is unspecified whether it includes the per-thread stacks and most of the allocated memory, that is allocated with the 'mmap' system call."
+            " This metric exists only for completeness reasons. I recommend to use the `MemoryResident` metric for monitoring."};

        /// We must update the value of total_memory_tracker periodically.
        /// Otherwise it might be calculated incorrectly - it can include a "drift" of memory amount.
@ -754,11 +774,22 @@ void AsynchronousMetrics::update(TimePoint update_time)
            assertChar('/', *loadavg);
            readText(threads_total, *loadavg);

-            new_values["LoadAverage1"] = loadavg1;
-            new_values["LoadAverage5"] = loadavg5;
-            new_values["LoadAverage15"] = loadavg15;
-            new_values["OSThreadsRunnable"] = threads_runnable;
-            new_values["OSThreadsTotal"] = threads_total;
+#define LOAD_AVERAGE_DOCUMENTATION \
+    " The load represents the number of threads across all the processes (the scheduling entities of the OS kernel)," \
+    " that are currently running by CPU or waiting for IO, or ready to run but not being scheduled at this point of time." \
+    " This number includes all the processes, not only clickhouse-server. The number can be greater than the number of CPU cores," \
+    " if the system is overloaded, and many processes are ready to run but waiting for CPU or IO."
+
+            new_values["LoadAverage1"] = { loadavg1,
+                "The whole system load, averaged with exponential smoothing over 1 minute." LOAD_AVERAGE_DOCUMENTATION };
+            new_values["LoadAverage5"] = { loadavg5,
+                "The whole system load, averaged with exponential smoothing over 5 minutes." LOAD_AVERAGE_DOCUMENTATION };
+            new_values["LoadAverage15"] = { loadavg15,
+                "The whole system load, averaged with exponential smoothing over 15 minutes." LOAD_AVERAGE_DOCUMENTATION };
+            new_values["OSThreadsRunnable"] = { threads_runnable,
+                "The total number of 'runnable' threads, as the OS kernel scheduler seeing it." };
+            new_values["OSThreadsTotal"] = { threads_total,
+                "The total number of threads, as the OS kernel scheduler seeing it." };
        }
        catch (...)
        {
@ -775,7 +806,7 @@ void AsynchronousMetrics::update(TimePoint update_time)
            Float64 uptime_seconds = 0;
            readText(uptime_seconds, *uptime);

-            new_values["OSUptime"] = uptime_seconds;
+            new_values["OSUptime"] = { uptime_seconds, "The uptime of the host server (the machine where ClickHouse is running), in seconds." };
        }
        catch (...)
        {
@ -838,16 +869,43 @@ void AsynchronousMetrics::update(TimePoint update_time)
                        else
                            delta_values_all_cpus = delta_values;

-                        new_values["OSUserTime" + cpu_suffix] = delta_values.user * multiplier;
-                        new_values["OSNiceTime" + cpu_suffix] = delta_values.nice * multiplier;
-                        new_values["OSSystemTime" + cpu_suffix] = delta_values.system * multiplier;
-                        new_values["OSIdleTime" + cpu_suffix] = delta_values.idle * multiplier;
-                        new_values["OSIOWaitTime" + cpu_suffix] = delta_values.iowait * multiplier;
-                        new_values["OSIrqTime" + cpu_suffix] = delta_values.irq * multiplier;
-                        new_values["OSSoftIrqTime" + cpu_suffix] = delta_values.softirq * multiplier;
-                        new_values["OSStealTime" + cpu_suffix] = delta_values.steal * multiplier;
-                        new_values["OSGuestTime" + cpu_suffix] = delta_values.guest * multiplier;
-                        new_values["OSGuestNiceTime" + cpu_suffix] = delta_values.guest_nice * multiplier;
+                        new_values["OSUserTime" + cpu_suffix] = { delta_values.user * multiplier,
+                            "The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+                            " This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core)."
+                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        new_values["OSNiceTime" + cpu_suffix] = { delta_values.nice * multiplier,
+                            "The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        new_values["OSSystemTime" + cpu_suffix] = { delta_values.system * multiplier,
+                            "The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        new_values["OSIdleTime" + cpu_suffix] = { delta_values.idle * multiplier,
+                            "The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+                            " This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core)."
+                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        new_values["OSIOWaitTime" + cpu_suffix] = { delta_values.iowait * multiplier,
+                            "The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        new_values["OSIrqTime" + cpu_suffix] = { delta_values.irq * multiplier,
+                            "The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+                            " A high number of this metric may indicate hardware misconfiguration or a very high network load."
+                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        new_values["OSSoftIrqTime" + cpu_suffix] = { delta_values.softirq * multiplier,
+                            "The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+                            " A high number of this metric may indicate inefficient software running on the system."
+                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        new_values["OSStealTime" + cpu_suffix] = { delta_values.steal * multiplier,
+                            "The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+                            " Not every virtualized environments present this metric, and most of them don't."
+                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        new_values["OSGuestTime" + cpu_suffix] = { delta_values.guest * multiplier,
+                            "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+                            " This metric is irrelevant for ClickHouse, but still exists for completeness."
+                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
+                        new_values["OSGuestNiceTime" + cpu_suffix] = { delta_values.guest_nice * multiplier,
+                            "The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server."
+                            " This metric is irrelevant for ClickHouse, but still exists for completeness."
+                            " The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]."};
                    }

                    prev_values = current_values;
@ -872,14 +930,18 @@ void AsynchronousMetrics::update(TimePoint update_time)
                    UInt64 processes_running = 0;
                    readText(processes_running, *proc_stat);
                    skipToNextLineOrEOF(*proc_stat);
-                    new_values["OSProcessesRunning"] = processes_running;
+                    new_values["OSProcessesRunning"] = { processes_running,
+                        "The number of runnable (running or ready to run) threads by the operating system."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
                }
                else if (name == "procs_blocked")
                {
                    UInt64 processes_blocked = 0;
                    readText(processes_blocked, *proc_stat);
                    skipToNextLineOrEOF(*proc_stat);
-                    new_values["OSProcessesBlocked"] = processes_blocked;
+                    new_values["OSProcessesBlocked"] = { processes_blocked,
+                        "Number of threads blocked waiting for I/O to complete (`man procfs`)."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
                }
                else
                    skipToNextLineOrEOF(*proc_stat);
@ -889,25 +951,45 @@ void AsynchronousMetrics::update(TimePoint update_time)
            {
                ProcStatValuesOther delta_values = current_other_values - proc_stat_values_other;

-                new_values["OSInterrupts"] = delta_values.interrupts;
-                new_values["OSContextSwitches"] = delta_values.context_switches;
-                new_values["OSProcessesCreated"] = delta_values.processes_created;
+                new_values["OSInterrupts"] = { delta_values.interrupts, "The number of interrupts on the host machine. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
+                new_values["OSContextSwitches"] = { delta_values.context_switches, "The number of context switches that the system underwent on the host machine. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
+                new_values["OSProcessesCreated"] = { delta_values.processes_created, "The number of processes created. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };

                /// Also write values normalized to 0..1 by diving to the number of CPUs.
                /// These values are good to be averaged across the cluster of non-uniform servers.

                if (num_cpus)
                {
-                    new_values["OSUserTimeNormalized"] = delta_values_all_cpus.user * multiplier / num_cpus;
-                    new_values["OSNiceTimeNormalized"] = delta_values_all_cpus.nice * multiplier / num_cpus;
-                    new_values["OSSystemTimeNormalized"] = delta_values_all_cpus.system * multiplier / num_cpus;
-                    new_values["OSIdleTimeNormalized"] = delta_values_all_cpus.idle * multiplier / num_cpus;
-                    new_values["OSIOWaitTimeNormalized"] = delta_values_all_cpus.iowait * multiplier / num_cpus;
-                    new_values["OSIrqTimeNormalized"] = delta_values_all_cpus.irq * multiplier / num_cpus;
-                    new_values["OSSoftIrqTimeNormalized"] = delta_values_all_cpus.softirq * multiplier / num_cpus;
-                    new_values["OSStealTimeNormalized"] = delta_values_all_cpus.steal * multiplier / num_cpus;
-                    new_values["OSGuestTimeNormalized"] = delta_values_all_cpus.guest * multiplier / num_cpus;
-                    new_values["OSGuestNiceTimeNormalized"] = delta_values_all_cpus.guest_nice * multiplier / num_cpus;
+                    new_values["OSUserTimeNormalized"] = { delta_values_all_cpus.user * multiplier / num_cpus,
+                        "The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
+                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
+                    new_values["OSNiceTimeNormalized"] = { delta_values_all_cpus.nice * multiplier / num_cpus,
+                        "The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
+                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
+                    new_values["OSSystemTimeNormalized"] = { delta_values_all_cpus.system * multiplier / num_cpus,
+                        "The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
+                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
+                    new_values["OSIdleTimeNormalized"] = { delta_values_all_cpus.idle * multiplier / num_cpus,
+                        "The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
+                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
+                    new_values["OSIOWaitTimeNormalized"] = { delta_values_all_cpus.iowait * multiplier / num_cpus,
+                        "The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
+                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
+                    new_values["OSIrqTimeNormalized"] = { delta_values_all_cpus.irq * multiplier / num_cpus,
+                        "The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
+                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
+                    new_values["OSSoftIrqTimeNormalized"] = { delta_values_all_cpus.softirq * multiplier / num_cpus,
+                        "The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
+                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
+                    new_values["OSStealTimeNormalized"] = { delta_values_all_cpus.steal * multiplier / num_cpus,
+                        "The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
+                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
+                    new_values["OSGuestTimeNormalized"] = { delta_values_all_cpus.guest * multiplier / num_cpus,
+                        "The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
+                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
+                    new_values["OSGuestNiceTimeNormalized"] = { delta_values_all_cpus.guest_nice * multiplier / num_cpus,
+                        "The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores."
+                        " This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric."};
                }
            }

@ -962,39 +1044,47 @@ void AsynchronousMetrics::update(TimePoint update_time)

                if (name == "MemTotal:")
                {
-                    new_values["OSMemoryTotal"] = bytes;
+                    new_values["OSMemoryTotal"] = { bytes, "The total amount of memory on the host system, in bytes." };
                }
                else if (name == "MemFree:")
                {
-                    /// We cannot simply name this metric "Free", because it confuses users.
-                    /// See https://www.linuxatemyram.com/
-                    /// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
-
                    free_plus_cached_bytes += bytes;
-                    new_values["OSMemoryFreeWithoutCached"] = bytes;
+                    new_values["OSMemoryFreeWithoutCached"] = { bytes,
+                        "The amount of free memory on the host system, in bytes."
+                        " This does not include the memory used by the OS page cache memory, in bytes."
+                        " The page cache memory is also available for usage by programs, so the value of this metric can be confusing."
+                        " See the `OSMemoryAvailable` metric instead."
+                        " For convenience we also provide the `OSMemoryFreePlusCached` metric, that should be somewhat similar to OSMemoryAvailable."
+                        " See also https://www.linuxatemyram.com/."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
                }
                else if (name == "MemAvailable:")
                {
-                    new_values["OSMemoryAvailable"] = bytes;
+                    new_values["OSMemoryAvailable"] = { bytes, "The amount of memory available to be used by programs, in bytes. This is very similar to the `OSMemoryFreePlusCached` metric."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
                }
                else if (name == "Buffers:")
                {
-                    new_values["OSMemoryBuffers"] = bytes;
+                    new_values["OSMemoryBuffers"] = { bytes, "The amount of memory used by OS kernel buffers, in bytes. This should be typically small, and large values may indicate a misconfiguration of the OS."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
                }
                else if (name == "Cached:")
                {
                    free_plus_cached_bytes += bytes;
-                    new_values["OSMemoryCached"] = bytes;
+                    new_values["OSMemoryCached"] = { bytes, "The amount of memory used by the OS page cache, in bytes. Typically, almost all available memory is used by the OS page cache - high values of this metric are normal and expected."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
                }
                else if (name == "SwapCached:")
                {
-                    new_values["OSMemorySwapCached"] = bytes;
+                    new_values["OSMemorySwapCached"] = { bytes, "The amount of memory in swap that was also loaded in RAM. Swap should be disabled on production systems. If the value of this metric is large, it indicates a misconfiguration."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
                }

                skipToNextLineOrEOF(*meminfo);
            }

-            new_values["OSMemoryFreePlusCached"] = free_plus_cached_bytes;
+            new_values["OSMemoryFreePlusCached"] = { free_plus_cached_bytes, "The amount of free memory plus OS page cache memory on the host system, in bytes. This memory is available to be used by programs. The value should be very similar to `OSMemoryAvailable`."
+                " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
        }
        catch (...)
        {
@ -1043,7 +1133,7 @@ void AsynchronousMetrics::update(TimePoint update_time)
                    if (auto colon = s.find_first_of(':'))
                    {
                        auto mhz = std::stod(s.substr(colon + 2));
-                        new_values[fmt::format("CPUFrequencyMHz_{}", core_id)] = mhz;
+                        new_values[fmt::format("CPUFrequencyMHz_{}", core_id)] = { mhz, "The current frequency of the CPU, in MHz. Most of the modern CPUs adjust the frequency dynamically for power saving and Turbo Boosting." };
                    }
                }
            }
@ -1062,7 +1152,8 @@ void AsynchronousMetrics::update(TimePoint update_time)

            uint64_t open_files = 0;
            readText(open_files, *file_nr);
-            new_values["OSOpenFiles"] = open_files;
+            new_values["OSOpenFiles"] = { open_files, "The total number of opened files on the host machine."
+                " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
        }
        catch (...)
        {
@ -1083,7 +1174,17 @@ void AsynchronousMetrics::update(TimePoint update_time)

            BlockDeviceStatValues current_values{};
            BlockDeviceStatValues & prev_values = block_device_stats[name];
-            current_values.read(*device);
+
+            try
+            {
+                current_values.read(*device);
+            }
+            catch (const ErrnoException & e)
+            {
+                LOG_DEBUG(log, "Cannot read statistics about the block device '{}': {}.",
+                    name, errnoToString(e.getErrno()));
+                continue;
+            }

            BlockDeviceStatValues delta_values = current_values - prev_values;
            prev_values = current_values;
@ -1097,42 +1198,89 @@ void AsynchronousMetrics::update(TimePoint update_time)
            /// Always in milliseconds according to the docs.
            static constexpr double time_multiplier = 1e-6;

-            new_values["BlockReadOps_" + name] = delta_values.read_ios;
-            new_values["BlockWriteOps_" + name] = delta_values.write_ios;
-            new_values["BlockDiscardOps_" + name] = delta_values.discard_ops;
+#define BLOCK_DEVICE_EXPLANATION \
+    " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." \
+    " Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt"

-            new_values["BlockReadMerges_" + name] = delta_values.read_merges;
-            new_values["BlockWriteMerges_" + name] = delta_values.write_merges;
-            new_values["BlockDiscardMerges_" + name] = delta_values.discard_merges;
+            new_values["BlockReadOps_" + name] = { delta_values.read_ios,
+                "Number of read operations requested from the block device."
+                BLOCK_DEVICE_EXPLANATION };
+            new_values["BlockWriteOps_" + name] = { delta_values.write_ios,
+                "Number of write operations requested from the block device."
+                BLOCK_DEVICE_EXPLANATION };
+            new_values["BlockDiscardOps_" + name] = { delta_values.discard_ops,
+                "Number of discard operations requested from the block device. These operations are relevant for SSD."
+                " Discard operations are not used by ClickHouse, but can be used by other processes on the system."
+                BLOCK_DEVICE_EXPLANATION };

-            new_values["BlockReadBytes_" + name] = delta_values.read_sectors * sector_size;
-            new_values["BlockWriteBytes_" + name] = delta_values.write_sectors * sector_size;
-            new_values["BlockDiscardBytes_" + name] = delta_values.discard_sectors * sector_size;
+            new_values["BlockReadMerges_" + name] = { delta_values.read_merges,
+                "Number of read operations requested from the block device and merged together by the OS IO scheduler."
+                BLOCK_DEVICE_EXPLANATION };
+            new_values["BlockWriteMerges_" + name] = { delta_values.write_merges,
+                "Number of write operations requested from the block device and merged together by the OS IO scheduler."
+                BLOCK_DEVICE_EXPLANATION };
+            new_values["BlockDiscardMerges_" + name] = { delta_values.discard_merges,
+                "Number of discard operations requested from the block device and merged together by the OS IO scheduler."
+                " These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system."
+                BLOCK_DEVICE_EXPLANATION };

-            new_values["BlockReadTime_" + name] = delta_values.read_ticks * time_multiplier;
-            new_values["BlockWriteTime_" + name] = delta_values.write_ticks * time_multiplier;
-            new_values["BlockDiscardTime_" + name] = delta_values.discard_ticks * time_multiplier;
+            new_values["BlockReadBytes_" + name] = { delta_values.read_sectors * sector_size,
+                "Number of bytes read from the block device."
+                " It can be lower than the number of bytes read from the filesystem due to the usage of the OS page cache, that saves IO."
+                BLOCK_DEVICE_EXPLANATION };
+            new_values["BlockWriteBytes_" + name] = { delta_values.write_sectors * sector_size,
+                "Number of bytes written to the block device."
+                " It can be lower than the number of bytes written to the filesystem due to the usage of the OS page cache, that saves IO."
+                " A write to the block device may happen later than the corresponding write to the filesystem due to write-through caching."
+                BLOCK_DEVICE_EXPLANATION };
+            new_values["BlockDiscardBytes_" + name] = { delta_values.discard_sectors * sector_size,
+                "Number of discarded bytes on the block device."
+                " These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system."
+                BLOCK_DEVICE_EXPLANATION };

-            new_values["BlockInFlightOps_" + name] = delta_values.in_flight_ios;
+            new_values["BlockReadTime_" + name] = { delta_values.read_ticks * time_multiplier,
+                "Time in seconds spend in read operations requested from the block device, summed across all the operations."
+                BLOCK_DEVICE_EXPLANATION };
+            new_values["BlockWriteTime_" + name] = { delta_values.write_ticks * time_multiplier,
+                "Time in seconds spend in write operations requested from the block device, summed across all the operations."
+                BLOCK_DEVICE_EXPLANATION };
+            new_values["BlockDiscardTime_" + name] = { delta_values.discard_ticks * time_multiplier,
+                "Time in seconds spend in discard operations requested from the block device, summed across all the operations."
+                " These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system."
+                BLOCK_DEVICE_EXPLANATION };

-            new_values["BlockActiveTime_" + name] = delta_values.io_ticks * time_multiplier;
-            new_values["BlockQueueTime_" + name] = delta_values.time_in_queue * time_multiplier;
+            new_values["BlockInFlightOps_" + name] = { delta_values.in_flight_ios,
+                "This value counts the number of I/O requests that have been issued to"
+                " the device driver but have not yet completed. It does not include IO"
+                " requests that are in the queue but not yet issued to the device driver."
+                BLOCK_DEVICE_EXPLANATION };
+            new_values["BlockActiveTime_" + name] = { delta_values.io_ticks * time_multiplier,
+                "Time in seconds the block device had the IO requests queued."
+                BLOCK_DEVICE_EXPLANATION };
+            new_values["BlockQueueTime_" + name] = { delta_values.time_in_queue * time_multiplier,
+                "This value counts the number of milliseconds that IO requests have waited"
+                " on this block device. If there are multiple IO requests waiting, this"
+                " value will increase as the product of the number of milliseconds times the"
+                " number of requests waiting."
+                BLOCK_DEVICE_EXPLANATION };

            if (delta_values.in_flight_ios)
            {
                /// TODO Check if these values are meaningful.

-                new_values["BlockActiveTimePerOp_" + name] = delta_values.io_ticks * time_multiplier / delta_values.in_flight_ios;
-                new_values["BlockQueueTimePerOp_" + name] = delta_values.time_in_queue * time_multiplier / delta_values.in_flight_ios;
+                new_values["BlockActiveTimePerOp_" + name] = { delta_values.io_ticks * time_multiplier / delta_values.in_flight_ios,
+                    "Similar to the `BlockActiveTime` metrics, but the value is divided to the number of IO operations to count the per-operation time." };
+                new_values["BlockQueueTimePerOp_" + name] = { delta_values.time_in_queue * time_multiplier / delta_values.in_flight_ios,
+                    "Similar to the `BlockQueueTime` metrics, but the value is divided to the number of IO operations to count the per-operation time." };
            }
        }
    }
    catch (...)
    {
-        tryLogCurrentException(__PRETTY_FUNCTION__);
+        LOG_DEBUG(log, "Cannot read statistics from block devices: {}", getCurrentExceptionMessage(false));

        /// Try to reopen block devices in case of error
-        /// (i.e. ENOENT means that some disk had been replaced, and it may apperas with a new name)
+        /// (i.e. ENOENT or ENODEV means that some disk had been replaced, and it may appear with a new name)
        try
        {
            openBlockDevices();
@ -1211,15 +1359,31 @@ void AsynchronousMetrics::update(TimePoint update_time)

                if (!first_run)
                {
-                    new_values["NetworkReceiveBytes_" + interface_name] = delta_values.recv_bytes;
-                    new_values["NetworkReceivePackets_" + interface_name] = delta_values.recv_packets;
-                    new_values["NetworkReceiveErrors_" + interface_name] = delta_values.recv_errors;
-                    new_values["NetworkReceiveDrop_" + interface_name] = delta_values.recv_drop;
+                    new_values["NetworkReceiveBytes_" + interface_name] = { delta_values.recv_bytes,
+                        " Number of bytes received via the network interface."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
+                    new_values["NetworkReceivePackets_" + interface_name] = { delta_values.recv_packets,
+                        " Number of network packets received via the network interface."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
+                    new_values["NetworkReceiveErrors_" + interface_name] = { delta_values.recv_errors,
+                        " Number of times error happened receiving via the network interface."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
+                    new_values["NetworkReceiveDrop_" + interface_name] = { delta_values.recv_drop,
+                        " Number of bytes a packet was dropped while received via the network interface."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };

-                    new_values["NetworkSendBytes_" + interface_name] = delta_values.send_bytes;
-                    new_values["NetworkSendPackets_" + interface_name] = delta_values.send_packets;
-                    new_values["NetworkSendErrors_" + interface_name] = delta_values.send_errors;
-                    new_values["NetworkSendDrop_" + interface_name] = delta_values.send_drop;
+                    new_values["NetworkSendBytes_" + interface_name] = { delta_values.send_bytes,
+                        " Number of bytes sent via the network interface."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
+                    new_values["NetworkSendPackets_" + interface_name] = { delta_values.send_packets,
+                        " Number of network packets sent via the network interface."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
+                    new_values["NetworkSendErrors_" + interface_name] = { delta_values.send_errors,
+                        " Number of times error (e.g. TCP retransmit) happened while sending via the network interface."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
+                    new_values["NetworkSendDrop_" + interface_name] = { delta_values.send_drop,
+                        " Number of times a packed was dropped while sending via the network interface."
+                        " This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server." };
                }
            }
        }
@ -1238,7 +1402,8 @@ void AsynchronousMetrics::update(TimePoint update_time)
            in.rewind();
            Int64 temperature = 0;
            readText(temperature, in);
-            new_values[fmt::format("Temperature{}", i)] = temperature * 0.001;
+            new_values[fmt::format("Temperature{}", i)] = { temperature * 0.001,
+                "The temperature of the corresponding device in ℃. A sensor can return an unrealistic value. Source: `/sys/class/thermal`" };
        }
    }
    catch (...)
@ -1271,13 +1436,17 @@ void AsynchronousMetrics::update(TimePoint update_time)
                }
                catch (const ErrnoException & e)
                {
-                    LOG_DEBUG(&Poco::Logger::get("AsynchronousMetrics"), "Hardware monitor '{}', sensor '{}' exists but could not be read, error {}.", hwmon_name, sensor_name, e.getErrno());
+                    LOG_DEBUG(log, "Hardware monitor '{}', sensor '{}' exists but could not be read: {}.",
+                        hwmon_name, sensor_name, errnoToString(e.getErrno()));
+                    continue;
                }

                if (sensor_name.empty())
-                    new_values[fmt::format("Temperature_{}", hwmon_name)] = temperature * 0.001;
+                    new_values[fmt::format("Temperature_{}", hwmon_name)] = { temperature * 0.001,
+                        "The temperature reported by the corresponding hardware monitor in ℃. A sensor can return an unrealistic value. Source: `/sys/class/hwmon`" };
                else
-                    new_values[fmt::format("Temperature_{}_{}", hwmon_name, sensor_name)] = temperature * 0.001;
+                    new_values[fmt::format("Temperature_{}_{}", hwmon_name, sensor_name)] = { temperature * 0.001,
+                        "The temperature reported by the corresponding hardware monitor and the corresponding sensor in ℃. A sensor can return an unrealistic value. Source: `/sys/class/hwmon`" };
            }
        }
    }
@ -1313,7 +1482,11 @@ void AsynchronousMetrics::update(TimePoint update_time)
                in.rewind();
                uint64_t errors = 0;
                readText(errors, in);
-                new_values[fmt::format("EDAC{}_Correctable", i)] = errors;
+                new_values[fmt::format("EDAC{}_Correctable", i)] = { errors,
+                    "The number of correctable ECC memory errors."
+                    " A high number of this value indicates bad RAM which has to be immediately replaced,"
+                    " because in presence of a high number of corrected errors, a number of silent errors may happen as well, leading to data corruption."
+                    " Source: `/sys/devices/system/edac/mc/`" };
            }

            if (edac[i].second)
@ -1322,7 +1495,11 @@ void AsynchronousMetrics::update(TimePoint update_time)
                in.rewind();
                uint64_t errors = 0;
                readText(errors, in);
-                new_values[fmt::format("EDAC{}_Uncorrectable", i)] = errors;
+                new_values[fmt::format("EDAC{}_Uncorrectable", i)] = { errors,
+                    "The number of uncorrectable ECC memory errors."
+                    " A non-zero number of this value indicates bad RAM which has to be immediately replaced,"
+                    " because it indicates potential data corruption."
+                    " Source: `/sys/devices/system/edac/mc/`" };
            }
        }
    }
@ -1346,24 +1523,36 @@ void AsynchronousMetrics::update(TimePoint update_time)
    {
        auto stat = getStatVFS(getContext()->getPath());

-        new_values["FilesystemMainPathTotalBytes"] = stat.f_blocks * stat.f_frsize;
-        new_values["FilesystemMainPathAvailableBytes"] = stat.f_bavail * stat.f_frsize;
-        new_values["FilesystemMainPathUsedBytes"] = (stat.f_blocks - stat.f_bavail) * stat.f_frsize;
-        new_values["FilesystemMainPathTotalINodes"] = stat.f_files;
-        new_values["FilesystemMainPathAvailableINodes"] = stat.f_favail;
-        new_values["FilesystemMainPathUsedINodes"] = stat.f_files - stat.f_favail;
+        new_values["FilesystemMainPathTotalBytes"] = { stat.f_blocks * stat.f_frsize,
+            "The size of the volume where the main ClickHouse path is mounted, in bytes." };
+        new_values["FilesystemMainPathAvailableBytes"] = { stat.f_bavail * stat.f_frsize,
+            "Available bytes on the volume where the main ClickHouse path is mounted." };
+        new_values["FilesystemMainPathUsedBytes"] = { (stat.f_blocks - stat.f_bavail) * stat.f_frsize,
+            "Used bytes on the volume where the main ClickHouse path is mounted." };
+        new_values["FilesystemMainPathTotalINodes"] = { stat.f_files,
+            "The total number of inodes on the volume where the main ClickHouse path is mounted. If it is less than 25 million, it indicates a misconfiguration." };
+        new_values["FilesystemMainPathAvailableINodes"] = { stat.f_favail,
+            "The number of available inodes on the volume where the main ClickHouse path is mounted. If it is close to zero, it indicates a misconfiguration, and you will get 'no space left on device' even when the disk is not full." };
+        new_values["FilesystemMainPathUsedINodes"] = { stat.f_files - stat.f_favail,
+            "The number of used inodes on the volume where the main ClickHouse path is mounted. This value mostly corresponds to the number of files." };
    }

    {
        /// Current working directory of the server is the directory with logs.
        auto stat = getStatVFS(".");

-        new_values["FilesystemLogsPathTotalBytes"] = stat.f_blocks * stat.f_frsize;
-        new_values["FilesystemLogsPathAvailableBytes"] = stat.f_bavail * stat.f_frsize;
-        new_values["FilesystemLogsPathUsedBytes"] = (stat.f_blocks - stat.f_bavail) * stat.f_frsize;
-        new_values["FilesystemLogsPathTotalINodes"] = stat.f_files;
-        new_values["FilesystemLogsPathAvailableINodes"] = stat.f_favail;
-        new_values["FilesystemLogsPathUsedINodes"] = stat.f_files - stat.f_favail;
+        new_values["FilesystemLogsPathTotalBytes"] = { stat.f_blocks * stat.f_frsize,
+            "The size of the volume where ClickHouse logs path is mounted, in bytes. It's recommended to have at least 10 GB for logs." };
+        new_values["FilesystemLogsPathAvailableBytes"] = { stat.f_bavail * stat.f_frsize,
+            "Available bytes on the volume where ClickHouse logs path is mounted. If this value approaches zero, you should tune the log rotation in the configuration file." };
+        new_values["FilesystemLogsPathUsedBytes"] = { (stat.f_blocks - stat.f_bavail) * stat.f_frsize,
+            "Used bytes on the volume where ClickHouse logs path is mounted." };
+        new_values["FilesystemLogsPathTotalINodes"] = { stat.f_files,
+            "The total number of inodes on the volume where ClickHouse logs path is mounted." };
+        new_values["FilesystemLogsPathAvailableINodes"] = { stat.f_favail,
+            "The number of available inodes on the volume where ClickHouse logs path is mounted." };
+        new_values["FilesystemLogsPathUsedINodes"] = { stat.f_files - stat.f_favail,
+            "The number of used inodes on the volume where ClickHouse logs path is mounted." };
    }

    /// Free and total space on every configured disk.
@ -1380,10 +1569,14 @@ void AsynchronousMetrics::update(TimePoint update_time)
            auto available = disk->getAvailableSpace();
            auto unreserved = disk->getUnreservedSpace();

-            new_values[fmt::format("DiskTotal_{}", name)] = total;
-            new_values[fmt::format("DiskUsed_{}", name)] = total - available;
-            new_values[fmt::format("DiskAvailable_{}", name)] = available;
-            new_values[fmt::format("DiskUnreserved_{}", name)] = unreserved;
+            new_values[fmt::format("DiskTotal_{}", name)] = { total,
+                "The total size in bytes of the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." };
+            new_values[fmt::format("DiskUsed_{}", name)] = { total - available,
+                "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." };
+            new_values[fmt::format("DiskAvailable_{}", name)] = { available,
+                "Available bytes on the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." };
+            new_values[fmt::format("DiskUnreserved_{}", name)] = { unreserved,
+                "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems can show a large value like 16 EiB." };
        }
    }

@ -1463,44 +1656,46 @@ void AsynchronousMetrics::update(TimePoint update_time)
            }
        }

-        new_values["ReplicasMaxQueueSize"] = max_queue_size;
-        new_values["ReplicasMaxInsertsInQueue"] = max_inserts_in_queue;
-        new_values["ReplicasMaxMergesInQueue"] = max_merges_in_queue;
+        new_values["ReplicasMaxQueueSize"] = { max_queue_size, "Maximum queue size (in the number of operations like get, merge) across Replicated tables." };
+        new_values["ReplicasMaxInsertsInQueue"] = { max_inserts_in_queue, "Maximum number of INSERT operations in the queue (still to be replicated) across Replicated tables." };
+        new_values["ReplicasMaxMergesInQueue"] = { max_merges_in_queue, "Maximum number of merge operations in the queue (still to be applied) across Replicated tables." };

-        new_values["ReplicasSumQueueSize"] = sum_queue_size;
-        new_values["ReplicasSumInsertsInQueue"] = sum_inserts_in_queue;
-        new_values["ReplicasSumMergesInQueue"] = sum_merges_in_queue;
+        new_values["ReplicasSumQueueSize"] = { sum_queue_size, "Sum queue size (in the number of operations like get, merge) across Replicated tables." };
+        new_values["ReplicasSumInsertsInQueue"] = { sum_inserts_in_queue, "Sum of INSERT operations in the queue (still to be replicated) across Replicated tables." };
+        new_values["ReplicasSumMergesInQueue"] = { sum_merges_in_queue, "Sum of merge operations in the queue (still to be applied) across Replicated tables." };

-        new_values["ReplicasMaxAbsoluteDelay"] = max_absolute_delay;
-        new_values["ReplicasMaxRelativeDelay"] = max_relative_delay;
+        new_values["ReplicasMaxAbsoluteDelay"] = { max_absolute_delay, "Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data." };
+        new_values["ReplicasMaxRelativeDelay"] = { max_relative_delay, "Maximum difference between the replica delay and the delay of the most up-to-date replica of the same table, across Replicated tables." };

-        new_values["MaxPartCountForPartition"] = max_part_count_for_partition;
+        new_values["MaxPartCountForPartition"] = { max_part_count_for_partition, "Maximum number of parts per partition across all partitions of all tables of MergeTree family. Values larger than 300 indicates misconfiguration, overload, or massive data loading." };

-        new_values["NumberOfDatabases"] = number_of_databases;
-        new_values["NumberOfTables"] = total_number_of_tables;
+        new_values["NumberOfDatabases"] = { number_of_databases, "Total number of databases on the server." };
+        new_values["NumberOfTables"] = { total_number_of_tables, "Total number of tables summed across the databases on the server, excluding the databases that cannot contain MergeTree tables."
+            " The excluded database engines are those who generate the set of tables on the fly, like `Lazy`, `MySQL`, `PostgreSQL`, `SQlite`."};

-        new_values["TotalBytesOfMergeTreeTables"] = total_number_of_bytes;
-        new_values["TotalRowsOfMergeTreeTables"] = total_number_of_rows;
-        new_values["TotalPartsOfMergeTreeTables"] = total_number_of_parts;
+        new_values["TotalBytesOfMergeTreeTables"] = { total_number_of_bytes, "Total amount of bytes (compressed, including data and indices) stored in all tables of MergeTree family." };
+        new_values["TotalRowsOfMergeTreeTables"] = { total_number_of_rows, "Total amount of rows (records) stored in all tables of MergeTree family." };
+        new_values["TotalPartsOfMergeTreeTables"] = { total_number_of_parts, "Total amount of data parts in all tables of MergeTree family."
+            " Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key." };

-        auto get_metric_name = [](const String & name) -> const char *
+        auto get_metric_name_doc = [](const String & name) -> std::pair<const char *, const char *>
        {
-            static std::map<String, const char *> metric_map =
+            static std::map<String, std::pair<const char *, const char *>> metric_map =
            {
-                {"tcp_port", "TCPThreads"},
-                {"tcp_port_secure", "TCPSecureThreads"},
-                {"http_port", "HTTPThreads"},
-                {"https_port", "HTTPSecureThreads"},
-                {"interserver_http_port", "InterserverThreads"},
-                {"interserver_https_port", "InterserverSecureThreads"},
-                {"mysql_port", "MySQLThreads"},
-                {"postgresql_port", "PostgreSQLThreads"},
-                {"grpc_port", "GRPCThreads"},
-                {"prometheus.port", "PrometheusThreads"}
+                {"tcp_port", {"TCPThreads", "Number of threads in the server of the TCP protocol (without TLS)."}},
+                {"tcp_port_secure", {"TCPSecureThreads", "Number of threads in the server of the TCP protocol (with TLS)."}},
+                {"http_port", {"HTTPThreads", "Number of threads in the server of the HTTP interface (without TLS)."}},
+                {"https_port", {"HTTPSecureThreads", "Number of threads in the server of the HTTPS interface."}},
+                {"interserver_http_port", {"InterserverThreads", "Number of threads in the server of the replicas communication protocol (without TLS)."}},
+                {"interserver_https_port", {"InterserverSecureThreads", "Number of threads in the server of the replicas communication protocol (with TLS)."}},
+                {"mysql_port", {"MySQLThreads", "Number of threads in the server of the MySQL compatibility protocol."}},
+                {"postgresql_port", {"PostgreSQLThreads", "Number of threads in the server of the PostgreSQL compatibility protocol."}},
+                {"grpc_port", {"GRPCThreads", "Number of threads in the server of the GRPC protocol."}},
+                {"prometheus.port", {"PrometheusThreads", "Number of threads in the server of the Prometheus endpoint. Note: prometheus endpoints can be also used via the usual HTTP/HTTPs ports."}}
            };
            auto it = metric_map.find(name);
            if (it == metric_map.end())
-                return nullptr;
+                return { nullptr, nullptr };
            else
                return it->second;
        };
@ -1508,8 +1703,8 @@ void AsynchronousMetrics::update(TimePoint update_time)
        const auto server_metrics = protocol_server_metrics_func();
        for (const auto & server_metric : server_metrics)
        {
-            if (const auto * name = get_metric_name(server_metric.port_name))
-                new_values[name] = server_metric.current_threads;
+            if (auto name_doc = get_metric_name_doc(server_metric.port_name); name_doc.first != nullptr)
+                new_values[name_doc.first] = { server_metric.current_threads, name_doc.second };
        }
    }
 #if USE_NURAFT
@ -1522,14 +1717,14 @@ void AsynchronousMetrics::update(TimePoint update_time)
            size_t is_observer = 0;
            size_t is_standalone = 0;
            size_t znode_count = 0;
-            size_t watch_count =0;
+            size_t watch_count = 0;
            size_t ephemerals_count = 0;
-            size_t approximate_data_size =0;
+            size_t approximate_data_size = 0;
            size_t key_arena_size = 0;
-            size_t latest_snapshot_size =0;
-            size_t open_file_descriptor_count =0;
-            size_t max_file_descriptor_count =0;
-            size_t followers =0;
+            size_t latest_snapshot_size = 0;
+            size_t open_file_descriptor_count = 0;
+            size_t max_file_descriptor_count = 0;
+            size_t followers = 0;
            size_t synced_followers = 0;
            size_t zxid = 0;
            size_t session_with_watches = 0;
@ -1570,29 +1765,29 @@ void AsynchronousMetrics::update(TimePoint update_time)
                }
            }

-            new_values["KeeperIsLeader"] = is_leader;
-            new_values["KeeperIsFollower"] = is_follower;
-            new_values["KeeperIsObserver"] = is_observer;
-            new_values["KeeperIsStandalone"] = is_standalone;
+            new_values["KeeperIsLeader"] = { is_leader, "1 if ClickHouse Keeper is a leader, 0 otherwise." };
+            new_values["KeeperIsFollower"] = { is_follower, "1 if ClickHouse Keeper is a follower, 0 otherwise." };
+            new_values["KeeperIsObserver"] = { is_observer, "1 if ClickHouse Keeper is an observer, 0 otherwise." };
+            new_values["KeeperIsStandalone"] = { is_standalone, "1 if ClickHouse Keeper is in a standalone mode, 0 otherwise." };

-            new_values["KeeperZnodeCount"] = znode_count;
-            new_values["KeeperWatchCount"] = watch_count;
-            new_values["KeeperEphemeralsCount"] = ephemerals_count;
+            new_values["KeeperZnodeCount"] = { znode_count, "The number of nodes (data entries) in ClickHouse Keeper." };
+            new_values["KeeperWatchCount"] = { watch_count, "The number of watches in ClickHouse Keeper." };
+            new_values["KeeperEphemeralsCount"] = { ephemerals_count, "The number of ephemeral nodes in ClickHouse Keeper." };

-            new_values["KeeperApproximateDataSize"] = approximate_data_size;
-            new_values["KeeperKeyArenaSize"] = key_arena_size;
-            new_values["KeeperLatestSnapshotSize"] = latest_snapshot_size;
+            new_values["KeeperApproximateDataSize"] = { approximate_data_size, "The approximate data size of ClickHouse Keeper, in bytes." };
+            new_values["KeeperKeyArenaSize"] = { key_arena_size, "The size in bytes of the memory arena for keys in ClickHouse Keeper." };
+            new_values["KeeperLatestSnapshotSize"] = { latest_snapshot_size, "The uncompressed size in bytes of the latest snapshot created by ClickHouse Keeper." };

-            new_values["KeeperOpenFileDescriptorCount"] = open_file_descriptor_count;
-            new_values["KeeperMaxFileDescriptorCount"] = max_file_descriptor_count;
+            new_values["KeeperOpenFileDescriptorCount"] = { open_file_descriptor_count, "The number of open file descriptors in ClickHouse Keeper." };
+            new_values["KeeperMaxFileDescriptorCount"] = { max_file_descriptor_count, "The maximum number of open file descriptors in ClickHouse Keeper." };

-            new_values["KeeperFollowers"] = followers;
-            new_values["KeeperSyncedFollowers"] = synced_followers;
-            new_values["KeeperZxid"] = zxid;
-            new_values["KeeperSessionWithWatches"] = session_with_watches;
-            new_values["KeeperPathsWatched"] = paths_watched;
-            new_values["KeeperSnapshotDirSize"] = snapshot_dir_size;
-            new_values["KeeperLogDirSize"] = log_dir_size;
+            new_values["KeeperFollowers"] = { followers, "The number of followers of ClickHouse Keeper." };
+            new_values["KeeperSyncedFollowers"] = { synced_followers, "The number of followers of ClickHouse Keeper who are also in-sync." };
+            new_values["KeeperZxid"] = { zxid, "The current transaction id number (zxid) in ClickHouse Keeper." };
+            new_values["KeeperSessionWithWatches"] = { session_with_watches, "The number of client sessions of ClickHouse Keeper having watches." };
+            new_values["KeeperPathsWatched"] = { paths_watched, "The number of different paths watched by the clients of ClickHouse Keeper." };
+            new_values["KeeperSnapshotDirSize"] = { snapshot_dir_size, "The size of the snapshots directory of ClickHouse Keeper, in bytes." };
+            new_values["KeeperLogDirSize"] = { log_dir_size, "The size of the logs directory of ClickHouse Keeper, in bytes." };
        }
    }
 #endif
@ -1601,7 +1796,7 @@ void AsynchronousMetrics::update(TimePoint update_time)

    /// Add more metrics as you wish.

-    new_values["AsynchronousMetricsCalculationTimeSpent"] = watch.elapsedSeconds();
+    new_values["AsynchronousMetricsCalculationTimeSpent"] = { watch.elapsedSeconds(), "Time in seconds spent for calculation of asynchronous metrics (this is the overhead of asynchronous metrics)." };

    /// Log the new metrics.
    if (auto asynchronous_metric_log = getContext()->getAsynchronousMetricLog())
@ -1681,11 +1876,10 @@ void AsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_time, Tim
                 update_period.count(),
                 heavy_metric_update_period.count(),
                 watch.elapsedSeconds());
-
    }

-    new_values["NumberOfDetachedParts"] = detached_parts_stats.count;
-    new_values["NumberOfDetachedByUserParts"] = detached_parts_stats.detached_by_user;
+    new_values["NumberOfDetachedParts"] = { detached_parts_stats.count, "The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed." };
+    new_values["NumberOfDetachedByUserParts"] = { detached_parts_stats.detached_by_user, "The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed." };
 }

 }
--- a/src/Interpreters/AsynchronousMetrics.h
+++ b/src/Interpreters/AsynchronousMetrics.h
@ -18,16 +18,25 @@

 namespace Poco
 {
-class Logger;
+    class Logger;
 }

 namespace DB
 {

-class ProtocolServerAdapter;
 class ReadBuffer;

-using AsynchronousMetricValue = double;
+struct AsynchronousMetricValue
+{
+    double value;
+    const char * documentation;
+
+    template <typename T>
+    AsynchronousMetricValue(T value_, const char * documentation_)
+        : value(static_cast<double>(value_)), documentation(documentation_) {}
+    AsynchronousMetricValue() = default; /// For std::unordered_map::operator[].
+};
+
 using AsynchronousMetricValues = std::unordered_map<std::string, AsynchronousMetricValue>;

 struct ProtocolServerMetrics
@ -42,6 +51,9 @@ struct ProtocolServerMetrics
  *
  * This includes both ClickHouse-related metrics (like memory usage of ClickHouse process)
  *  and common OS-related metrics (like total memory usage on the server).
+  *
+  * All the values are either gauge type (like the total number of tables, the current memory usage).
+  * Or delta-counters representing some accumulation during the interval of time.
  */
 class AsynchronousMetrics : WithContext
 {
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -79,6 +79,8 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ParserCreateQuery.h>
 #include <Parsers/parseQuery.h>
+#include <Parsers/ASTAsterisk.h>
+#include <Parsers/ASTIdentifier.h>
 #include <Common/StackTrace.h>
 #include <Common/Config/ConfigHelper.h>
 #include <Common/Config/ConfigProcessor.h>
@ -987,10 +989,12 @@ std::shared_ptr<const ContextAccess> Context::getAccess() const
    return access ? access : ContextAccess::getFullAccess();
 }

-ASTPtr Context::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const
+RowPolicyFilterPtr Context::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const
 {
    auto lock = getLock();
-    auto row_filter_of_initial_user = row_policies_of_initial_user ? row_policies_of_initial_user->getFilter(database, table_name, filter_type) : nullptr;
+    RowPolicyFilterPtr row_filter_of_initial_user;
+    if (row_policies_of_initial_user)
+        row_filter_of_initial_user = row_policies_of_initial_user->getFilter(database, table_name, filter_type);
    return getAccess()->getRowPolicyFilter(database, table_name, filter_type, row_filter_of_initial_user);
 }

@ -1227,7 +1231,7 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String
 }


-StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
+StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const ASTSelectQuery * select_query_hint)
 {
    auto hash = table_expression->getTreeHash();
    String key = toString(hash.first) + '_' + toString(hash.second);
@ -1237,15 +1241,61 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
    if (!res)
    {
        TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
-        if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
+        if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
        {
-            const auto & insertion_table = getInsertionTable();
-            if (!insertion_table.empty())
+            const auto & structure_hint = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
+            bool use_columns_from_insert_query = true;
+
+            /// use_structure_from_insertion_table_in_table_functions=2 means `auto`
+            if (select_query_hint && getSettingsRef().use_structure_from_insertion_table_in_table_functions == 2)
            {
-                const auto & structure_hint
-                    = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
-                table_function_ptr->setStructureHint(structure_hint);
+                const auto * expression_list = select_query_hint->select()->as<ASTExpressionList>();
+                Names columns_names;
+                bool have_asterisk = false;
+                /// First, check if we have only identifiers, asterisk and literals in select expression,
+                /// and if no, we cannot use the structure from insertion table.
+                for (const auto & expression : expression_list->children)
+                {
+                    if (auto * identifier = expression->as<ASTIdentifier>())
+                    {
+                        columns_names.push_back(identifier->name());
+                    }
+                    else if (expression->as<ASTAsterisk>())
+                    {
+                        have_asterisk = true;
+                    }
+                    else if (!expression->as<ASTLiteral>())
+                    {
+                        use_columns_from_insert_query = false;
+                        break;
+                    }
+                }
+
+                /// Check that all identifiers are column names from insertion table.
+                for (const auto & column_name : columns_names)
+                {
+                    if (!structure_hint.has(column_name))
+                    {
+                        use_columns_from_insert_query = false;
+                        break;
+                    }
+                }
+
+                /// If we don't have asterisk but only subset of columns, we should use
+                /// structure from insertion table only in case when table function
+                /// supports reading subset of columns from data.
+                if (use_columns_from_insert_query && !have_asterisk && !columns_names.empty())
+                {
+                    /// For input function we should check if input format supports reading subset of columns.
+                    if (table_function_ptr->getName() == "input")
+                        use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(getInsertFormat());
+                    else
+                        use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
+                }
            }
+
+            if (use_columns_from_insert_query)
+                table_function_ptr->setStructureHint(structure_hint);
        }

        res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
@ -1482,12 +1532,21 @@ String Context::getDefaultFormat() const
    return default_format.empty() ? "TabSeparated" : default_format;
 }

-
 void Context::setDefaultFormat(const String & name)
 {
    default_format = name;
 }

+String Context::getInsertFormat() const
+{
+    return insert_format;
+}
+
+void Context::setInsertFormat(const String & name)
+{
+    insert_format = name;
+}
+
 MultiVersion<Macros>::Version Context::getMacros() const
 {
    return shared->macros.get();
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@ -9,6 +9,7 @@
 #include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/MergeTreeTransactionHolder.h>
 #include <Parsers/IAST_fwd.h>
+#include <Parsers/ASTSelectQuery.h>
 #include <Storages/IStorage_fwd.h>
 #include <Common/MultiVersion.h>
 #include <Common/OpenTelemetryTraceContext.h>
@ -45,6 +46,8 @@ struct User;
 using UserPtr = std::shared_ptr<const User>;
 struct EnabledRolesInfo;
 class EnabledRowPolicies;
+struct RowPolicyFilter;
+using RowPolicyFilterPtr = std::shared_ptr<const RowPolicyFilter>;
 class EnabledQuota;
 struct QuotaUsage;
 class AccessFlags;
@ -237,6 +240,9 @@ private:

    String default_format;  /// Format, used when server formats data by itself and if query does not have FORMAT specification.
                            /// Thus, used in HTTP interface. If not specified - then some globally default format is used.
+
+    String insert_format; /// Format, used in insert query.
+
    TemporaryTablesMapping external_tables_mapping;
    Scalars scalars;
    /// Used to store constant values which are different on each instance during distributed plan, such as _shard_num.
@ -516,7 +522,7 @@ public:

    std::shared_ptr<const ContextAccess> getAccess() const;

-    ASTPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const;
+    RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const;

    /// Finds and sets extra row policies to be used based on `client_info.initial_user`,
    /// if the initial user exists.
@ -600,7 +606,9 @@ public:
    const QueryFactoriesInfo & getQueryFactoriesInfo() const { return query_factories_info; }
    void addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const;

-    StoragePtr executeTableFunction(const ASTPtr & table_expression);
+    /// For table functions s3/file/url/hdfs/input we can use structure from
+    /// insertion table depending on select expression.
+    StoragePtr executeTableFunction(const ASTPtr & table_expression, const ASTSelectQuery * select_query_hint = nullptr);

    void addViewSource(const StoragePtr & storage);
    StoragePtr getViewSource() const;
@ -629,6 +637,9 @@ public:
    String getDefaultFormat() const;    /// If default_format is not specified, some global default format is returned.
    void setDefaultFormat(const String & name);

+    String getInsertFormat() const;
+    void setInsertFormat(const String & name);
+
    MultiVersion<Macros>::Version getMacros() const;
    void setMacros(std::unique_ptr<Macros> && macros);

--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@ -1944,7 +1944,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
        }

        optimize_read_in_order =
-            settings.optimize_read_in_order
+            settings.optimize_read_in_order && (!settings.query_plan_read_in_order)
            && storage
            && query.orderBy()
            && !query_analyzer.hasAggregation()
@ -1952,13 +1952,6 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
            && !query.final()
            && join_allow_read_in_order;

-        if (storage && optimize_read_in_order)
-        {
-            Names columns_for_sorting_key = metadata_snapshot->getColumnsRequiredForSortingKey();
-            additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(),
-                columns_for_sorting_key.begin(), columns_for_sorting_key.end());
-        }
-
        /// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers.
        query_analyzer.appendSelect(chain, only_types || (need_aggregate ? !second_stage : !first_stage));

--- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp
+++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp
@ -6,21 +6,21 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Parsers/ExpressionListParsers.h>
 #include <Parsers/parseQuery.h>
-#include <Interpreters/TreeRewriter.h>
 #include <Interpreters/ActionsDAG.h>
 #include <Interpreters/ExpressionAnalyzer.h>
+#include <Interpreters/TreeRewriter.h>
 #include <Processors/QueryPlan/IQueryPlanStep.h>
 #include <Processors/QueryPlan/FilterStep.h>

+
 namespace DB
 {

-void IInterpreterUnionOrSelectQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const
+void IInterpreterUnionOrSelectQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & /*ast*/, ContextPtr /*context_*/) const
 {
    elem.query_kind = "Select";
 }

-
 QueryPipelineBuilder IInterpreterUnionOrSelectQuery::buildQueryPipeline()
 {
    QueryPlan query_plan;
--- a/src/Interpreters/IInterpreterUnionOrSelectQuery.h
+++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.h
@ -44,7 +44,7 @@ public:

    size_t getMaxStreams() const { return max_streams; }

-    void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const override;
+    void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr context) const override;

    /// Returns whether the query uses the view source from the Context
    /// The view source is a virtual storage that currently only materialized views use to replace the source table
@ -58,6 +58,8 @@ public:
    /// Add limits from external query.
    void addStorageLimits(const StorageLimitsList & limits);

+    ContextPtr getContext() const { return context; }
+
 protected:
    ASTPtr query_ptr;
    ContextMutablePtr context;
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@ -419,19 +419,23 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
            auto settings = checkAndGetSettings<QueryPlanSettings>(ast.getSettings());
            QueryPlan plan;

+            ContextPtr context;
+
            if (getContext()->getSettingsRef().allow_experimental_analyzer)
            {
                InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), options, getContext());
+                context = interpreter.getContext();
                plan = std::move(interpreter).extractQueryPlan();
            }
            else
            {
                InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), options);
                interpreter.buildQueryPlan(plan);
+                context = interpreter.getContext();
            }

            if (settings.optimize)
-                plan.optimize(QueryPlanOptimizationSettings::fromContext(getContext()));
+                plan.optimize(QueryPlanOptimizationSettings::fromContext(context));

            if (settings.json)
            {
@ -461,21 +465,24 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
            {
                auto settings = checkAndGetSettings<QueryPipelineSettings>(ast.getSettings());
                QueryPlan plan;
+                ContextPtr context;

                if (getContext()->getSettingsRef().allow_experimental_analyzer)
                {
                    InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), options, getContext());
+                    context = interpreter.getContext();
                    plan = std::move(interpreter).extractQueryPlan();
                }
                else
                {
                    InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), options);
                    interpreter.buildQueryPlan(plan);
+                    context = interpreter.getContext();
                }

                auto pipeline = plan.buildQueryPipeline(
-                    QueryPlanOptimizationSettings::fromContext(getContext()),
-                    BuildQueryPipelineSettings::fromContext(getContext()));
+                    QueryPlanOptimizationSettings::fromContext(context),
+                    BuildQueryPipelineSettings::fromContext(context));

                if (settings.graph)
                {
@ -511,16 +518,18 @@ QueryPipeline InterpreterExplainQuery::executeImpl()

            auto settings = checkAndGetSettings<QueryPlanSettings>(ast.getSettings());
            QueryPlan plan;
+            ContextPtr context;

            InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions());
            interpreter.buildQueryPlan(plan);
+            context = interpreter.getContext();
            // collect the selected marks, rows, parts during build query pipeline.
            plan.buildQueryPipeline(
-                QueryPlanOptimizationSettings::fromContext(getContext()),
-                BuildQueryPipelineSettings::fromContext(getContext()));
+                QueryPlanOptimizationSettings::fromContext(context),
+                BuildQueryPipelineSettings::fromContext(context));

            if (settings.optimize)
-                plan.optimize(QueryPlanOptimizationSettings::fromContext(getContext()));
+                plan.optimize(QueryPlanOptimizationSettings::fromContext(context));
            plan.explainEstimate(res_columns);
            insert_buf = false;
            break;
--- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp
+++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp
@ -1,7 +1,10 @@
+#include <Access/AccessControl.h>
+
 #include <Columns/getLeastSuperColumn.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterSelectIntersectExceptQuery.h>
 #include <Interpreters/InterpreterSelectQuery.h>
+#include <Interpreters/QueryLog.h>
 #include <Parsers/ASTSelectIntersectExceptQuery.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 #include <Processors/QueryPlan/DistinctStep.h>
@ -188,4 +191,25 @@ void InterpreterSelectIntersectExceptQuery::ignoreWithTotals()
        interpreter->ignoreWithTotals();
 }

+void InterpreterSelectIntersectExceptQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & /*ast*/, ContextPtr /*context_*/) const
+{
+    elem.query_kind = "Select";
+
+    for (const auto & interpreter : nested_interpreters)
+    {
+        if (const auto * select_interpreter = dynamic_cast<const InterpreterSelectQuery *>(interpreter.get()))
+        {
+            auto filter = select_interpreter->getRowPolicyFilter();
+            if (filter)
+            {
+                for (const auto & row_policy : filter->policies)
+                {
+                    auto name = row_policy->getFullName().toString();
+                    elem.used_row_policies.emplace(std::move(name));
+                }
+            }
+        }
+    }
+}
+
 }
--- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.h
+++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.h
@ -35,6 +35,8 @@ public:

    void ignoreWithTotals() override;

+    void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr context) const override;
+
 private:
    static String getName() { return "SelectIntersectExceptQuery"; }

--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -1,3 +1,5 @@
+#include <Access/AccessControl.h>
+
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeInterval.h>

@ -33,6 +35,7 @@
 #include <Interpreters/JoinedTables.h>
 #include <Interpreters/OpenTelemetrySpanLog.h>
 #include <Interpreters/QueryAliasesVisitor.h>
+#include <Interpreters/QueryLog.h>
 #include <Interpreters/replaceAliasColumnsInQuery.h>
 #include <Interpreters/RewriteCountDistinctVisitor.h>

@ -112,7 +115,7 @@ namespace ErrorCodes
 /// Assumes `storage` is set and the table filter (row-level security) is not empty.
 FilterDAGInfoPtr generateFilterActions(
    const StorageID & table_id,
-    const ASTPtr & row_policy_filter,
+    const ASTPtr & row_policy_filter_expression,
    const ContextPtr & context,
    const StoragePtr & storage,
    const StorageSnapshotPtr & storage_snapshot,
@ -133,9 +136,9 @@ FilterDAGInfoPtr generateFilterActions(
    auto expr_list = select_ast->select();

    /// The first column is our filter expression.
-    /// the row_policy_filter should be cloned, because it may be changed by TreeRewriter.
+    /// the row_policy_filter_expression should be cloned, because it may be changed by TreeRewriter.
    /// which make it possible an invalid expression, although it may be valid in whole select.
-    expr_list->children.push_back(row_policy_filter->clone());
+    expr_list->children.push_back(row_policy_filter_expression->clone());

    /// Keep columns that are required after the filter actions.
    for (const auto & column_str : prerequisite_columns)
@ -613,13 +616,13 @@ InterpreterSelectQuery::InterpreterSelectQuery(
            query_info.filter_asts.clear();

            /// Fix source_header for filter actions.
-            if (row_policy_filter)
+            if (row_policy_filter && !row_policy_filter->empty())
            {
                filter_info = generateFilterActions(
-                    table_id, row_policy_filter, context, storage, storage_snapshot, metadata_snapshot, required_columns,
+                    table_id, row_policy_filter->expression, context, storage, storage_snapshot, metadata_snapshot, required_columns,
                    prepared_sets);

-                query_info.filter_asts.push_back(row_policy_filter);
+                query_info.filter_asts.push_back(row_policy_filter->expression);
            }

            if (query_info.additional_filter_ast)
@ -1447,17 +1450,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                        for (const auto & key_name : key_names)
                            order_descr.emplace_back(key_name);

+                        SortingStep::Settings sort_settings(*context);
+
                        auto sorting_step = std::make_unique<SortingStep>(
                            plan.getCurrentDataStream(),
                            std::move(order_descr),
-                            settings.max_block_size,
-                            0 /* LIMIT */,
-                            SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode),
-                            settings.max_bytes_before_remerge_sort,
-                            settings.remerge_sort_lowered_memory_bytes_ratio,
-                            settings.max_bytes_before_external_sort,
-                            this->context->getTempDataOnDisk(),
-                            settings.min_free_disk_space_for_temporary_data,
+                            0 /* LIMIT */, sort_settings,
                            settings.optimize_sorting_by_input_stream_properties);
                        sorting_step->setStepDescription(fmt::format("Sort {} before JOIN", join_pos));
                        plan.addStep(std::move(sorting_step));
@ -1866,6 +1864,22 @@ void InterpreterSelectQuery::setProperClientInfo(size_t replica_num, size_t repl
    context->getClientInfo().number_of_current_replica = replica_num;
 }

+RowPolicyFilterPtr InterpreterSelectQuery::getRowPolicyFilter() const
+{
+    return row_policy_filter;
+}
+
+void InterpreterSelectQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & /*ast*/, ContextPtr /*context_*/) const
+{
+    elem.query_kind = "Select";
+
+    for (const auto & row_policy : row_policy_filter->policies)
+    {
+        auto name = row_policy->getFullName().toString();
+        elem.used_row_policies.emplace(std::move(name));
+    }
+}
+
 bool InterpreterSelectQuery::shouldMoveToPrewhere()
 {
    const Settings & settings = context->getSettingsRef();
@ -2617,17 +2631,13 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
        // happens in case of `over ()`.
        if (!window.full_sort_description.empty() && (i == 0 || !sortIsPrefix(window, *windows_sorted[i - 1])))
        {
+            SortingStep::Settings sort_settings(*context);
+
            auto sorting_step = std::make_unique<SortingStep>(
                query_plan.getCurrentDataStream(),
                window.full_sort_description,
-                settings.max_block_size,
                0 /* LIMIT */,
-                SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode),
-                settings.max_bytes_before_remerge_sort,
-                settings.remerge_sort_lowered_memory_bytes_ratio,
-                settings.max_bytes_before_external_sort,
-                context->getTempDataOnDisk(),
-                settings.min_free_disk_space_for_temporary_data,
+                sort_settings,
                settings.optimize_sorting_by_input_stream_properties);
            sorting_step->setStepDescription("Sorting for window '" + window.window_name + "'");
            query_plan.addStep(std::move(sorting_step));
@ -2675,18 +2685,14 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo

    const Settings & settings = context->getSettingsRef();

+    SortingStep::Settings sort_settings(*context);
+
    /// Merge the sorted blocks.
    auto sorting_step = std::make_unique<SortingStep>(
        query_plan.getCurrentDataStream(),
        output_order_descr,
-        settings.max_block_size,
        limit,
-        SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode),
-        settings.max_bytes_before_remerge_sort,
-        settings.remerge_sort_lowered_memory_bytes_ratio,
-        settings.max_bytes_before_external_sort,
-        context->getTempDataOnDisk(),
-        settings.min_free_disk_space_for_temporary_data,
+        sort_settings,
        settings.optimize_sorting_by_input_stream_properties);

    sorting_step->setStepDescription("Sorting for ORDER BY");
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@ -2,6 +2,7 @@

 #include <memory>

+#include <Access/EnabledRowPolicies.h>
 #include <Core/QueryProcessingStage.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/ExpressionAnalyzer.h>
@ -23,6 +24,7 @@ class Logger;

 namespace DB
 {
+
 class SubqueryForSet;
 class InterpreterSelectWithUnionQuery;
 class Context;
@ -34,6 +36,9 @@ using GroupingSetsParamsList = std::vector<GroupingSetsParams>;
 struct TreeRewriterResult;
 using TreeRewriterResultPtr = std::shared_ptr<const TreeRewriterResult>;

+struct RowPolicy;
+using RowPolicyPtr = std::shared_ptr<const RowPolicy>;
+

 /** Interprets the SELECT query. Returns the stream of blocks with the results of the query before `to_stage` stage.
  */
@ -129,6 +134,10 @@ public:

    FilterDAGInfoPtr getAdditionalQueryInfo() const { return additional_filter_info; }

+    RowPolicyFilterPtr getRowPolicyFilter() const;
+
+    void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr context) const override;
+
    static SortDescription getSortDescription(const ASTSelectQuery & query, const ContextPtr & context);
    static UInt64 getLimitForSorting(const ASTSelectQuery & query, const ContextPtr & context);

@ -209,7 +218,7 @@ private:
    /// Is calculated in getSampleBlock. Is used later in readImpl.
    ExpressionAnalysisResult analysis_result;
    /// For row-level security.
-    ASTPtr row_policy_filter;
+    RowPolicyFilterPtr row_policy_filter;
    FilterDAGInfoPtr filter_info;

    /// For additional_filter setting.
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@ -1,8 +1,11 @@
+#include <Access/AccessControl.h>
+
 #include <Columns/getLeastSuperColumn.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/InterpreterSelectWithUnionQuery.h>
 #include <Interpreters/InterpreterSelectIntersectExceptQuery.h>
+#include <Interpreters/QueryLog.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 #include <Parsers/ASTSelectIntersectExceptQuery.h>
@ -22,6 +25,7 @@

 #include <algorithm>

+
 namespace DB
 {

@ -382,4 +386,25 @@ void InterpreterSelectWithUnionQuery::ignoreWithTotals()
        interpreter->ignoreWithTotals();
 }

+void InterpreterSelectWithUnionQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & /*ast*/, ContextPtr /*context_*/) const
+{
+    elem.query_kind = "Select";
+
+    for (const auto & interpreter : nested_interpreters)
+    {
+        if (const auto * select_interpreter = dynamic_cast<const InterpreterSelectQuery *>(interpreter.get()))
+        {
+            auto filter = select_interpreter->getRowPolicyFilter();
+            if (filter)
+            {
+                for (const auto & row_policy : filter->policies)
+                {
+                    auto name = row_policy->getFullName().toString();
+                    elem.used_row_policies.emplace(std::move(name));
+                }
+            }
+        }
+    }
+}
+
 }
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.h
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.h
@ -47,6 +47,8 @@ public:

    bool supportsTransactions() const override { return true; }

+    void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr context) const override;
+
 private:
    std::vector<std::unique_ptr<IInterpreterUnionOrSelectQuery>> nested_interpreters;

--- a/src/Interpreters/JoinedTables.cpp
+++ b/src/Interpreters/JoinedTables.cpp
@ -173,12 +173,13 @@ using RenameQualifiedIdentifiersVisitor = InDepthNodeVisitor<RenameQualifiedIden

 }

-JoinedTables::JoinedTables(ContextPtr context_, const ASTSelectQuery & select_query, bool include_all_columns_)
+JoinedTables::JoinedTables(ContextPtr context_, const ASTSelectQuery & select_query_, bool include_all_columns_)
    : context(context_)
-    , table_expressions(getTableExpressions(select_query))
+    , table_expressions(getTableExpressions(select_query_))
    , include_all_columns(include_all_columns_)
-    , left_table_expression(extractTableExpression(select_query, 0))
-    , left_db_and_table(getDatabaseAndTable(select_query, 0))
+    , left_table_expression(extractTableExpression(select_query_, 0))
+    , left_db_and_table(getDatabaseAndTable(select_query_, 0))
+    , select_query(select_query_)
 {}

 bool JoinedTables::isLeftTableSubquery() const
@ -206,7 +207,7 @@ StoragePtr JoinedTables::getLeftTableStorage()
        return {};

    if (isLeftTableFunction())
-        return context->getQueryContext()->executeTableFunction(left_table_expression);
+        return context->getQueryContext()->executeTableFunction(left_table_expression, &select_query);

    StorageID table_id = StorageID::createEmpty();
    if (left_db_and_table)
--- a/src/Interpreters/JoinedTables.h
+++ b/src/Interpreters/JoinedTables.h
@ -22,7 +22,7 @@ using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
 class JoinedTables
 {
 public:
-    JoinedTables(ContextPtr context, const ASTSelectQuery & select_query, bool include_all_columns_ = false);
+    JoinedTables(ContextPtr context, const ASTSelectQuery & select_query_, bool include_all_columns_ = false);

    void reset(const ASTSelectQuery & select_query);

@ -52,6 +52,7 @@ private:
    /// Legacy (duplicated left table values)
    ASTPtr left_table_expression;
    std::optional<DatabaseAndTableWithAlias> left_db_and_table;
+    const ASTSelectQuery & select_query;
 };

 }
--- a/src/Interpreters/PartLog.cpp
+++ b/src/Interpreters/PartLog.cpp
@ -31,8 +31,6 @@ PartLogElement::MergeReasonType PartLogElement::getMergeReasonType(MergeType mer
            return TTL_DELETE_MERGE;
        case MergeType::TTLRecompress:
            return TTL_RECOMPRESS_MERGE;
-        case MergeType::TTLDrop:
-            return TTL_DROP_MERGE;
    }

    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown MergeType {}", static_cast<UInt64>(merge_type));
@ -74,7 +72,6 @@ NamesAndTypesList PartLogElement::getNamesAndTypes()
            {"RegularMerge",        static_cast<Int8>(REGULAR_MERGE)},
            {"TTLDeleteMerge",      static_cast<Int8>(TTL_DELETE_MERGE)},
            {"TTLRecompressMerge",  static_cast<Int8>(TTL_RECOMPRESS_MERGE)},
-            {"TTLDropMerge",        static_cast<Int8>(TTL_DROP_MERGE)},
        }
    );

--- a/src/Interpreters/PartLog.h
+++ b/src/Interpreters/PartLog.h
@ -41,8 +41,6 @@ struct PartLogElement
        TTL_DELETE_MERGE = 3,
        /// Merge with recompression
        TTL_RECOMPRESS_MERGE = 4,
-        /// Merge assigned to drop parts (with TTLMergeSelector)
-        TTL_DROP_MERGE = 5,
    };

    String query_id;
--- a/src/Interpreters/QueryLog.cpp
+++ b/src/Interpreters/QueryLog.cpp
@ -1,4 +1,5 @@
-#include <array>
+#include <Interpreters/QueryLog.h>
+
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnString.h>
@ -13,15 +14,18 @@
 #include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/ProfileEventsExt.h>
-#include <Interpreters/QueryLog.h>
-#include <Poco/Net/IPAddress.h>
 #include <Common/ClickHouseRevision.h>
 #include <Common/IPv6ToBinary.h>
 #include <Common/ProfileEvents.h>
 #include <Common/typeid_cast.h>

+#include <Poco/Net/IPAddress.h>
+
+#include <array>
+

 namespace DB
 {
@ -119,6 +123,8 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()
        {"used_storages", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
        {"used_table_functions", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},

+        {"used_row_policies", std::make_shared<DataTypeArray>(std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()))},
+
        {"transaction_id", getTransactionIDDataType()},
    };

@ -236,13 +242,14 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
        auto & column_function_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
        auto & column_storage_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
        auto & column_table_function_factory_objects = typeid_cast<ColumnArray &>(*columns[i++]);
+        auto & column_row_policies_names = typeid_cast<ColumnArray &>(*columns[i++]);

-        auto fill_column = [](const std::unordered_set<String> & data, ColumnArray & column)
+        auto fill_column = [](const auto & data, ColumnArray & column)
        {
            size_t size = 0;
-            for (const auto & name : data)
+            for (const auto & value : data)
            {
-                column.getData().insertData(name.data(), name.size());
+                column.getData().insert(value);
                ++size;
            }
            auto & offsets = column.getOffsets();
@ -258,6 +265,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const
        fill_column(used_functions, column_function_factory_objects);
        fill_column(used_storages, column_storage_factory_objects);
        fill_column(used_table_functions, column_table_function_factory_objects);
+        fill_column(used_row_policies, column_row_policies_names);
    }

    columns[i++]->insert(Tuple{tid.start_csn, tid.local_tid, tid.host_id});
--- a/src/Interpreters/QueryLog.h
+++ b/src/Interpreters/QueryLog.h
@ -7,6 +7,7 @@
 #include <Interpreters/ClientInfo.h>
 #include <Interpreters/TransactionVersionMetadata.h>

+
 namespace ProfileEvents
 {
    class Counters;
@ -73,6 +74,7 @@ struct QueryLogElement
    std::unordered_set<String> used_functions;
    std::unordered_set<String> used_storages;
    std::unordered_set<String> used_table_functions;
+    std::set<String> used_row_policies;

    Int32 exception_code{}; // because ErrorCodes are int
    String exception;
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@ -451,6 +451,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
        }
        else if (auto * insert_query = ast->as<ASTInsertQuery>())
        {
+            context->setInsertFormat(insert_query->format);
            if (insert_query->settings_ast)
                InterpreterSetQuery(insert_query->settings_ast, context).executeForCurrentContext();
            insert_query->tail = istr;
@ -530,7 +531,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                insert_query->tryFindInputFunction(input_function);
                if (input_function)
                {
-                    StoragePtr storage = context->executeTableFunction(input_function);
+                    StoragePtr storage = context->executeTableFunction(input_function, insert_query->select->as<ASTSelectQuery>());
                    auto & input_storage = dynamic_cast<StorageInput &>(*storage);
                    auto input_metadata_snapshot = input_storage.getInMemoryMetadataPtr();
                    auto pipe = getSourceFromASTInsertQuery(
--- a/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp
+++ b/src/Interpreters/maskSensitiveInfoInQueryForLogging.cpp
@ -164,7 +164,7 @@ namespace
                /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
                wipePasswordFromArgument(*storage.engine, data, 4);
            }
-            else if (engine_name == "S3" || engine_name == "COSN")
+            else if (engine_name == "S3" || engine_name == "COSN" || engine_name == "OSS")
            {
                /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
                wipePasswordFromS3TableEngineArguments(*storage.engine, data);
@ -222,7 +222,7 @@ namespace
                /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
                wipePasswordFromArgument(function, data, 4);
            }
-            else if (function.name == "s3" || function.name == "cosn")
+            else if (function.name == "s3" || function.name == "cosn" || function.name == "oss")
            {
                /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
                wipePasswordFromS3FunctionArguments(function, data, /* is_cluster_function= */ false);
--- a/Show More
+++ b/Show More