Merge branch 'master' of github.com:ClickHouse/ClickHouse into interactive-mode-for-clickhouse-local

2024-11-10 01:25:21 +00:00 · 2021-08-07 22:21:52 +03:00 · 2021-08-07 22:21:52 +03:00 · 30cc705464
commit 30cc705464
parent 9f9be65ac7 ee53d9448c
166 changed files with 2065 additions and 781 deletions
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -183,6 +183,10 @@ for conn_index, c in enumerate(all_connections):
        # requires clickhouse-driver >= 1.1.5 to accept arbitrary new settings
        # (https://github.com/mymarilyn/clickhouse-driver/pull/142)
        c.settings[s.tag] = s.text
+    # We have to perform a query to make sure the settings work. Otherwise an
+    # unknown setting will lead to failing precondition check, and we will skip
+    # the test, which is wrong.
+    c.execute("select 1")

 reportStageEnd('settings')

--- a/docker/test/pvs/Dockerfile
+++ b/docker/test/pvs/Dockerfile
@ -28,7 +28,7 @@ RUN apt-get update --yes \
 ENV PKG_VERSION="pvs-studio-latest"

 RUN set -x \
-    && export PUBKEY_HASHSUM="486a0694c7f92e96190bbfac01c3b5ac2cb7823981db510a28f744c99eabbbf17a7bcee53ca42dc6d84d4323c2742761" \
+    && export PUBKEY_HASHSUM="686e5eb8b3c543a5c54442c39ec876b6c2d912fe8a729099e600017ae53c877dda3368fe38ed7a66024fe26df6b5892a" \
    && wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
    && echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
    && apt-key add /tmp/pubkey.txt \
--- a/docs/en/engines/database-engines/postgresql.md
+++ b/docs/en/engines/database-engines/postgresql.md
@ -15,7 +15,7 @@ Supports table structure modifications (`ALTER TABLE ... ADD|DROP COLUMN`). If `

 ``` sql
 CREATE DATABASE test_database
-ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cache`]);
+ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `schema`, `use_table_cache`]);
 ```

 **Engine Parameters**
@ -24,6 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac
 -   `database` — Remote database name.
 -   `user` — PostgreSQL user.
 -   `password` — User password.
+-   `schema` — PostgreSQL schema.
 -   `use_table_cache` —  Defines if the database table structure is cached or not. Optional. Default value: `0`.

 ## Data Types Support {#data_types-support}
--- a/docs/en/operations/system-tables/replicas.md
+++ b/docs/en/operations/system-tables/replicas.md
@ -82,6 +82,7 @@ The next 4 columns have a non-zero value only where there is an active session w
 -   `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has.
 -   `total_replicas` (`UInt8`) - The total number of known replicas of this table.
 -   `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas).
+-   `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active.

 If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row.
 If you do not request the last 4 columns (log_max_index, log_pointer, total_replicas, active_replicas), the table works quickly.
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@ -2138,3 +2138,52 @@ Result:

 -   [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port)

+## currentProfiles {#current-profiles}
+
+Returns a list of the current [settings profiles](../../operations/access-rights.md#settings-profiles-management) for the current user. 
+
+The command [SET PROFILE](../../sql-reference/statements/set.md#query-set) could be used to change the current setting profile. If the command `SET PROFILE` was not used the function returns the profiles specified at the current user's definition (see [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
+
+**Syntax**
+
+``` sql
+currentProfiles()
+```
+
+**Returned value**
+
+-   List of the current user settings profiles. 
+
+Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+
+## enabledProfiles {#enabled-profiles}
+
+ Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file).
+
+**Syntax**
+
+``` sql
+enabledProfiles()
+```
+
+**Returned value**
+
+-   List of the enabled settings profiles. 
+
+Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+
+## defaultProfiles {#default-profiles}
+
+Returns all the profiles specified at the current user's definition (see [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement).
+
+**Syntax**
+
+``` sql
+defaultProfiles()
+```
+
+**Returned value**
+
+-   List of the default settings profiles. 
+
+Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@ -11,7 +11,7 @@ Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types

 **Syntax**

-``` sql
+```sql
 map(key1, value1[, key2, value2, ...])
 ```

@ -30,7 +30,7 @@ Type: [Map(key, value)](../../sql-reference/data-types/map.md).

 Query:

-``` sql
+```sql
 SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
 ```

@ -46,7 +46,7 @@ Result:

 Query:

-``` sql
+```sql
 CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a;
 INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3);
 SELECT a['key2'] FROM table_map;
@ -54,7 +54,7 @@ SELECT a['key2'] FROM table_map;

 Result:

-``` text
+```text
 ┌─arrayElement(a, 'key2')─┐
 │                       0 │
 │                       2 │
@ -72,7 +72,7 @@ Collect all the keys and sum corresponding values.

 **Syntax**

-``` sql
+```sql
 mapAdd(arg1, arg2 [, ...])
 ```

@ -88,13 +88,13 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sq

 Query with a tuple map:

-``` sql
+```sql
 SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type;
 ```

 Result:

-``` text
+```text
 ┌─res───────────┬─type───────────────────────────────┐
 │ ([1,2],[2,2]) │ Tuple(Array(UInt8), Array(UInt64)) │
 └───────────────┴────────────────────────────────────┘
@ -102,7 +102,16 @@ Result:

 Query with `Map` type:

-``` sql
+```sql
+SELECT mapAdd(map(1,1), map(1,1));
+```
+
+Result:
+
+```text
+┌─mapAdd(map(1, 1), map(1, 1))─┐
+│ {1:2}                        │
+└──────────────────────────────┘
 ```

 ## mapSubtract {#function-mapsubtract}
@ -111,21 +120,21 @@ Collect all the keys and subtract corresponding values.

 **Syntax**

-``` sql
+```sql
 mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
 ```

 **Arguments**

-Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
+Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.

 **Returned value**

-   Returns one [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
+-   Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.

 **Example**

-Query:
+Query with a tuple map:

 ```sql
 SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt32(2), 1])) as res, toTypeName(res) as type;
@ -139,32 +148,54 @@ Result:
 └────────────────┴───────────────────────────────────┘
 ```

+Query with `Map` type:
+
+```sql
+SELECT mapSubtract(map(1,1), map(1,1));
+```
+
+Result:
+
+```text
+┌─mapSubtract(map(1, 1), map(1, 1))─┐
+│ {1:0}                             │
+└───────────────────────────────────┘
+```
+
 ## mapPopulateSeries {#function-mappopulateseries}

 Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array.
+Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
+
+For array arguments the number of elements in `keys` and `values` must be the same for each row.

 **Syntax**

-``` sql
+```sql
 mapPopulateSeries(keys, values[, max])
+mapPopulateSeries(map[, max])
 ```

-Generates a map, where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from `keys` array with a step size of one, and corresponding values taken from `values` array. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
-
-The number of elements in `keys` and `values` must be the same for each row.
+Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.

 **Arguments**

+Mapped arrays:
+
 -   `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
 -   `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).

+or
+
+-   `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md).
+
 **Returned value**

-  Returns a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
+-  Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.

 **Example**

-Query:
+Query with mapped arrays:

 ```sql
 select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
@ -178,13 +209,27 @@ Result:
 └──────────────────────────────┴───────────────────────────────────┘
 ```

+Query with `Map` type:
+
+```sql
+SELECT mapPopulateSeries(map(1, 10, 5, 20), 6);
+```
+
+Result:
+
+```text
+┌─mapPopulateSeries(map(1, 10, 5, 20), 6)─┐
+│ {1:10,2:0,3:0,4:0,5:20,6:0}             │
+└─────────────────────────────────────────┘
+```
+
 ## mapContains {#mapcontains}

 Determines  whether the `map` contains the `key` parameter.

 **Syntax**

-``` sql
+```sql
 mapContains(map, key)
 ```

--- a/docs/en/sql-reference/window-functions/index.md
+++ b/docs/en/sql-reference/window-functions/index.md
@ -5,9 +5,6 @@ toc_title: Window Functions

 # [experimental] Window Functions

-!!! warning "Warning"
-    This is an experimental feature that is currently in development and is not ready for general use. It will change in unpredictable backwards-incompatible ways in the future releases. Set `allow_experimental_window_functions = 1` to enable it.
-
 ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported:

 | Feature | Support or workaround |
--- a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md
@ -134,7 +134,7 @@ default
 -   `regexp` – шаблон имени метрики.
 -   `age` – минимальный возраст данных в секундах.
 -   `precision` – точность определения возраста данных в секундах. Должен быть делителем для 86400 (количество секунд в сутках).
-   `function` – имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`.
+-   `function` – имя агрегирующей функции, которую следует применить к данным, чей возраст оказался в интервале `[age, age + precision]`. Допустимые функции: min/max/any/avg. Avg вычисляется неточно, как среднее от средних.

 ### Пример конфигурации {#configuration-example}

@ -171,3 +171,6 @@ default
 </graphite_rollup>
 ```

+
+!!! warning "Внимание"
+    Прореживание данных производится во время слияний. Обычно для старых партций слияния не запускаются, поэтому для прореживания надо иницировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize/). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@ -2088,3 +2088,52 @@ SELECT tcpPort();

 -   [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port)

+## currentProfiles {#current-profiles}
+
+Возвращает список [профилей настроек](../../operations/access-rights.md#settings-profiles-management) для текущего пользователя.
+
+Для изменения текущего профиля настроек может быть использована команда SET PROFILE. Если команда `SET PROFILE` не применялась, функция возвращает профили, указанные при определении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
+
+**Синтаксис**
+
+``` sql
+currentProfiles()
+```
+
+**Возвращаемое значение**
+
+-   Список профилей настроек для текущего пользователя.
+
+Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+
+## enabledProfiles {#enabled-profiles}
+
+Возвращает профили настроек, назначенные пользователю как явно, так и неявно. Явно назначенные профили — это те же профили, которые возвращает функция [currentProfiles](#current-profiles). Неявно назначенные профили включают родительские профили других назначенных профилей; профили, назначенные с помощью предоставленных ролей; профили, назначенные с помощью собственных настроек; основной профиль по умолчанию (см. секцию `default_profile` в основном конфигурационном файле сервера).
+
+**Синтаксис**
+
+``` sql
+enabledProfiles()
+```
+
+**Возвращаемое значение**
+
+-   Список доступных профилей для текущего пользователя.
+
+Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+
+## defaultProfiles {#default-profiles}
+
+Возвращает все профили, указанные при объявлении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement))
+
+**Синтаксис**
+
+``` sql
+defaultProfiles()
+```
+
+**Возвращаемое значение**
+
+-   Список профилей по умолчанию.
+
+Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -781,7 +781,12 @@ bool Client::processWithFuzzing(const String & full_query)
                    "Error while reconnecting to the server: {}\n",
                    getCurrentExceptionMessage(true));

-                assert(!connection->isConnected());
+                // The reconnection might fail, but we'll still be connected
+                // in the sense of `connection->isConnected() = true`,
+                // in case when the requested database doesn't exist.
+                // Disconnect manually now, so that the following code doesn't
+                // have any doubts, and the connection state is predictable.
+                connection->disconnect();
            }
        }

@ -1430,8 +1435,11 @@ void Client::initBlockOutputStream(const Block & block, ASTPtr parsed_query)
                const auto & out_file_node = query_with_output->out_file->as<ASTLiteral &>();
                const auto & out_file = out_file_node.value.safeGet<std::string>();

-                out_file_buf.emplace(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT);
-                out_buf = &*out_file_buf;
+                out_file_buf = wrapWriteBufferWithCompressionMethod(
+                    std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
+                    chooseCompressionMethod(out_file, ""),
+                    /* compression level = */ 3
+                );

                // We are writing to file, so default format is the same as in non-interactive mode.
                if (is_interactive && is_default_format)
@ -1451,9 +1459,9 @@ void Client::initBlockOutputStream(const Block & block, ASTPtr parsed_query)

        /// It is not clear how to write progress with parallel formatting. It may increase code complexity significantly.
        if (!need_render_progress)
-            block_out_stream = global_context->getOutputStreamParallelIfPossible(current_format, *out_buf, block);
+            block_out_stream = global_context->getOutputStreamParallelIfPossible(current_format, out_file_buf ? *out_file_buf : *out_buf, block);
        else
-            block_out_stream = global_context->getOutputStream(current_format, *out_buf, block);
+            block_out_stream = global_context->getOutputStream(current_format, out_file_buf ? *out_file_buf : *out_buf, block);

        block_out_stream->writePrefix();
    }
--- a/programs/server/play.html
+++ b/programs/server/play.html
@ -361,23 +361,22 @@

    function postImpl(posted_request_num, query)
    {
-        /// TODO: Check if URL already contains query string (append parameters).
+        const user = document.getElementById('user').value;
+        const password = document.getElementById('password').value;

-        let user = document.getElementById('user').value;
-        let password = document.getElementById('password').value;
+        const server_address = document.getElementById('url').value;

-        let server_address = document.getElementById('url').value;
-
-        let url = server_address +
+        const url = server_address +
+            (server_address.indexOf('?') >= 0 ? '&' : '?') + 
            /// Ask server to allow cross-domain requests.
-            '?add_http_cors_header=1' +
+            'add_http_cors_header=1' +
            '&user=' + encodeURIComponent(user) +
            '&password=' + encodeURIComponent(password) +
            '&default_format=JSONCompact' +
            /// Safety settings to prevent results that browser cannot display.
            '&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';

-        let xhr = new XMLHttpRequest;
+        const xhr = new XMLHttpRequest;

        xhr.open('POST', url, true);

@ -391,12 +390,12 @@
                /// The query is saved in browser history (in state JSON object)
                /// as well as in URL fragment identifier.
                if (query != previous_query) {
-                    let state = {
+                    const state = {
                        query: query,
                        status: this.status,
                        response: this.response.length > 100000 ? null : this.response  /// Lower than the browser's limit.
                    };
-                    let title = "ClickHouse Query: " + query;
+                    const title = "ClickHouse Query: " + query;

                    let history_url = window.location.pathname + '?user=' + encodeURIComponent(user);
                    if (server_address != location.origin) {
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@ -188,9 +188,11 @@ protected:
    /// Console output.
    WriteBufferFromFileDescriptor std_out{STDOUT_FILENO};
    std::unique_ptr<ShellCommand> pager_cmd;
+
    /// The user can specify to redirect query output to a file.
-    std::optional<WriteBufferFromFile> out_file_buf;
+    std::unique_ptr<WriteBuffer> out_file_buf;
    BlockOutputStreamPtr block_out_stream;
+
    /// The user could specify special file for server logs (stderr by default)
    std::unique_ptr<WriteBuffer> out_logs_buf;
    String server_logs_file;
--- a/src/Compression/CompressionCodecDelta.cpp
+++ b/src/Compression/CompressionCodecDelta.cpp
@ -132,6 +132,10 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
        throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);

    UInt8 bytes_size = source[0];
+
+    if (bytes_size == 0)
+        throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
+
    UInt8 bytes_to_skip = uncompressed_size % bytes_size;

    if (UInt32(2 + bytes_to_skip) > source_size)
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@ -502,6 +502,10 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s
        throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);

    UInt8 bytes_size = source[0];
+
+    if (bytes_size == 0)
+        throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
+
    UInt8 bytes_to_skip = uncompressed_size % bytes_size;

    if (UInt32(2 + bytes_to_skip) > source_size)
--- a/src/Compression/CompressionCodecGorilla.cpp
+++ b/src/Compression/CompressionCodecGorilla.cpp
@ -410,6 +410,10 @@ void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 sourc
        throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);

    UInt8 bytes_size = source[0];
+
+    if (bytes_size == 0)
+        throw Exception("Cannot decompress. File has wrong header", ErrorCodes::CANNOT_DECOMPRESS);
+
    UInt8 bytes_to_skip = uncompressed_size % bytes_size;

    if (UInt32(2 + bytes_to_skip) > source_size)
--- a/src/Compression/CompressionCodecLZ4.cpp
+++ b/src/Compression/CompressionCodecLZ4.cpp
@ -62,6 +62,7 @@ private:
 namespace ErrorCodes
 {
    extern const int CANNOT_COMPRESS;
+    extern const int CANNOT_DECOMPRESS;
    extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
    extern const int ILLEGAL_CODEC_PARAMETER;
 }
@ -93,7 +94,10 @@ UInt32 CompressionCodecLZ4::doCompressData(const char * source, UInt32 source_si

 void CompressionCodecLZ4::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
 {
-    LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
+    bool success = LZ4::decompress(source, dest, source_size, uncompressed_size, lz4_stat);
+
+    if (!success)
+        throw Exception("Cannot decompress", ErrorCodes::CANNOT_DECOMPRESS);
 }

 void registerCodecLZ4(CompressionCodecFactory & factory)
--- a/src/Compression/LZ4_decompress_faster.cpp
+++ b/src/Compression/LZ4_decompress_faster.cpp
@ -412,13 +412,16 @@ template <> void inline copyOverlap<32, false>(UInt8 * op, const UInt8 *& match,
 /// See also https://stackoverflow.com/a/30669632

 template <size_t copy_amount, bool use_shuffle>
-void NO_INLINE decompressImpl(
+bool NO_INLINE decompressImpl(
     const char * const source,
     char * const dest,
+     size_t source_size,
     size_t dest_size)
 {
    const UInt8 * ip = reinterpret_cast<const UInt8 *>(source);
    UInt8 * op = reinterpret_cast<UInt8 *>(dest);
+    const UInt8 * const input_end = ip + source_size;
+    UInt8 * const output_begin = op;
    UInt8 * const output_end = op + dest_size;

    /// Unrolling with clang is doing >10% performance degrade.
@ -461,13 +464,19 @@ void NO_INLINE decompressImpl(
        /// output: xyzHello, w
        ///                  ^-op (we will overwrite excessive bytes on next iteration)

-        wildCopy<copy_amount>(op, ip, copy_end);    /// Here we can write up to copy_amount - 1 bytes after buffer.
+        {
+            auto * target = std::min(copy_end, output_end);
+            wildCopy<copy_amount>(op, ip, target);    /// Here we can write up to copy_amount - 1 bytes after buffer.
+
+            if (target == output_end)
+                return true;
+        }

        ip += length;
        op = copy_end;

-        if (copy_end >= output_end)
-            return;
+        if (unlikely(ip > input_end))
+            return false;

        /// Get match offset.

@ -475,6 +484,9 @@ void NO_INLINE decompressImpl(
        ip += 2;
        const UInt8 * match = op - offset;

+        if (unlikely(match < output_begin))
+            return false;
+
        /// Get match length.

        length = token & 0x0F;
@ -515,7 +527,10 @@ void NO_INLINE decompressImpl(

        copy<copy_amount>(op, match);   /// copy_amount + copy_amount - 1 - 4 * 2 bytes after buffer.
        if (length > copy_amount * 2)
-            wildCopy<copy_amount>(op + copy_amount, match + copy_amount, copy_end);
+        {
+            auto * target = std::min(copy_end, output_end);
+            wildCopy<copy_amount>(op + copy_amount, match + copy_amount, target);
+        }

        op = copy_end;
    }
@ -524,7 +539,7 @@ void NO_INLINE decompressImpl(
 }


-void decompress(
+bool decompress(
    const char * const source,
    char * const dest,
    size_t source_size,
@ -532,7 +547,7 @@ void decompress(
    PerformanceStatistics & statistics [[maybe_unused]])
 {
    if (source_size == 0 || dest_size == 0)
-        return;
+        return true;

    /// Don't run timer if the block is too small.
    if (dest_size >= 32768)
@ -542,24 +557,27 @@ void decompress(
        /// Run the selected method and measure time.

        Stopwatch watch;
+        bool success = true;
        if (best_variant == 0)
-            decompressImpl<16, true>(source, dest, dest_size);
+            success = decompressImpl<16, true>(source, dest, source_size, dest_size);
        if (best_variant == 1)
-            decompressImpl<16, false>(source, dest, dest_size);
+            success = decompressImpl<16, false>(source, dest, source_size, dest_size);
        if (best_variant == 2)
-            decompressImpl<8, true>(source, dest, dest_size);
+            success = decompressImpl<8, true>(source, dest, source_size, dest_size);
        if (best_variant == 3)
-            decompressImpl<32, false>(source, dest, dest_size);
+            success = decompressImpl<32, false>(source, dest, source_size, dest_size);

        watch.stop();

        /// Update performance statistics.

        statistics.data[best_variant].update(watch.elapsedSeconds(), dest_size);
+
+        return success;
    }
    else
    {
-        decompressImpl<8, false>(source, dest, dest_size);
+        return decompressImpl<8, false>(source, dest, source_size, dest_size);
    }
 }

--- a/src/Compression/LZ4_decompress_faster.h
+++ b/src/Compression/LZ4_decompress_faster.h
@ -122,14 +122,14 @@ struct PerformanceStatistics
            return choose_method;
    }

-    PerformanceStatistics() {}
-    PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {}
+    PerformanceStatistics() = default;
+    explicit PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {}
 };


 /** This method dispatch to one of different implementations depending on performance statistics.
  */
-void decompress(
+bool decompress(
    const char * const source,
    char * const dest,
    size_t source_size,
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -449,7 +449,6 @@ class IColumn;
    M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
    M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
    M(Bool, allow_experimental_map_type, true, "Obsolete setting, does nothing.", 0) \
-    M(Bool, allow_experimental_window_functions, false, "Allow experimental window functions", 0) \
    M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \
    M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
    M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
--- a/src/DataStreams/ExecutionSpeedLimits.cpp
+++ b/src/DataStreams/ExecutionSpeedLimits.cpp
@ -3,6 +3,7 @@
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentThread.h>
 #include <IO/WriteHelpers.h>
+#include <Common/Stopwatch.h>
 #include <common/sleep.h>

 namespace ProfileEvents
@ -104,14 +105,18 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co
    }
 }

-bool ExecutionSpeedLimits::checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const
+bool ExecutionSpeedLimits::checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const
 {
-    if (max_execution_time != 0
-        && elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
-        return handleOverflowMode(overflow_mode,
+    if (max_execution_time != 0)
+    {
+        auto elapsed_ns = stopwatch.elapsed();
+
+        if (elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
+            return handleOverflowMode(overflow_mode,
                                  "Timeout exceeded: elapsed " + toString(static_cast<double>(elapsed_ns) / 1000000000ULL)
                                  + " seconds, maximum: " + toString(max_execution_time.totalMicroseconds() / 1000000.0),
                                  ErrorCodes::TIMEOUT_EXCEEDED);
+    }

    return true;
 }
--- a/src/DataStreams/ExecutionSpeedLimits.h
+++ b/src/DataStreams/ExecutionSpeedLimits.h
@ -3,6 +3,7 @@
 #include <Poco/Timespan.h>
 #include <common/types.h>
 #include <DataStreams/SizeLimits.h>
+#include <Common/Stopwatch.h>

 namespace DB
 {
@ -25,7 +26,7 @@ public:
    /// Pause execution in case if speed limits were exceeded.
    void throttle(size_t read_rows, size_t read_bytes, size_t total_rows_to_read, UInt64 total_elapsed_microseconds) const;

-    bool checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const;
+    bool checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const;
 };

 }
--- a/src/DataStreams/IBlockInputStream.cpp
+++ b/src/DataStreams/IBlockInputStream.cpp
@ -201,7 +201,7 @@ void IBlockInputStream::updateExtremes(Block & block)

 bool IBlockInputStream::checkTimeLimit() const
 {
-    return limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode);
+    return limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode);
 }


--- a/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp
+++ b/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp
@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest)
    EXPECT_EQ(pipe.numOutputPorts(), 3);

    auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
-            DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true);
+            DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true);

    pipe.addTransform(std::move(transform));

@ -128,7 +128,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes)
    EXPECT_EQ(pipe.numOutputPorts(), 3);

    auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
-            DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true);
+            DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true);

    pipe.addTransform(std::move(transform));

--- a/src/Databases/DatabaseFactory.cpp
+++ b/src/Databases/DatabaseFactory.cpp
@ -103,9 +103,11 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
    const String & engine_name = engine_define->engine->name;
    const UUID & uuid = create.uuid;

-    bool engine_may_have_arguments = engine_name == "MySQL" || engine_name == "MaterializeMySQL" || engine_name == "MaterializedMySQL" ||
-                                     engine_name == "Lazy" || engine_name == "Replicated" || engine_name == "PostgreSQL" ||
-                                     engine_name == "MaterializedPostgreSQL" || engine_name == "SQLite";
+    static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
+        "Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite"};
+
+    bool engine_may_have_arguments = engines_with_arguments.contains(engine_name);
+
    if (engine_define->engine->arguments && !engine_may_have_arguments)
        throw Exception("Database engine " + engine_name + " cannot have arguments", ErrorCodes::BAD_ARGUMENTS);

@ -113,6 +115,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
                                  engine_define->primary_key || engine_define->order_by ||
                                  engine_define->sample_by;
    bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
+
    if (has_unexpected_element || (!may_have_settings && engine_define->settings))
        throw Exception("Database engine " + engine_name + " cannot have parameters, primary_key, order_by, sample_by, settings",
                        ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
@ -233,11 +236,10 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
    {
        const ASTFunction * engine = engine_define->engine;

-        if (!engine->arguments || engine->arguments->children.size() < 4 || engine->arguments->children.size() > 5)
-            throw Exception(fmt::format(
-                        "{} Database require host:port, database_name, username, password arguments "
-                        "[, use_table_cache = 0].", engine_name),
-                ErrorCodes::BAD_ARGUMENTS);
+        if (!engine->arguments || engine->arguments->children.size() < 4 || engine->arguments->children.size() > 6)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "{} Database require `host:port`, `database_name`, `username`, `password` [, `schema` = "", `use_table_cache` = 0].",
+                            engine_name);

        ASTs & engine_args = engine->arguments->children;

@ -249,9 +251,13 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
        const auto & username = safeGetLiteralValue<String>(engine_args[2], engine_name);
        const auto & password = safeGetLiteralValue<String>(engine_args[3], engine_name);

+        String schema;
+        if (engine->arguments->children.size() >= 5)
+            schema = safeGetLiteralValue<String>(engine_args[4], engine_name);
+
        auto use_table_cache = 0;
-        if (engine->arguments->children.size() == 5)
-            use_table_cache = safeGetLiteralValue<UInt64>(engine_args[4], engine_name);
+        if (engine->arguments->children.size() >= 6)
+            use_table_cache = safeGetLiteralValue<UInt8>(engine_args[5], engine_name);

        /// Split into replicas if needed.
        size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements;
@ -266,7 +272,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
            context->getSettingsRef().postgresql_connection_pool_wait_timeout);

        return std::make_shared<DatabasePostgreSQL>(
-            context, metadata_path, engine_define, database_name, postgres_database_name, connection_pool, use_table_cache);
+            context, metadata_path, engine_define, database_name, postgres_database_name, schema, connection_pool, use_table_cache);
    }
    else if (engine_name == "MaterializedPostgreSQL")
    {
@ -274,9 +280,9 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String

        if (!engine->arguments || engine->arguments->children.size() != 4)
        {
-            throw Exception(
-                    fmt::format("{} Database require host:port, database_name, username, password arguments ", engine_name),
-                    ErrorCodes::BAD_ARGUMENTS);
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "{} Database require `host:port`, `database_name`, `username`, `password`.",
+                            engine_name);
        }

        ASTs & engine_args = engine->arguments->children;
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@ -525,7 +525,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
        query_context->getClientInfo().is_replicated_database_internal = true;
        query_context->setCurrentDatabase(database_name);
        query_context->setCurrentQueryId("");
-        auto txn = std::make_shared<ZooKeeperMetadataTransaction>(current_zookeeper, zookeeper_path, false);
+        auto txn = std::make_shared<ZooKeeperMetadataTransaction>(current_zookeeper, zookeeper_path, false, "");
        query_context->initZooKeeperMetadataTransaction(txn);
        return query_context;
    };
--- a/src/Databases/DatabaseReplicatedWorker.h
+++ b/src/Databases/DatabaseReplicatedWorker.h
@ -43,7 +43,7 @@ private:
    mutable std::mutex mutex;
    std::condition_variable wait_current_task_change;
    String current_task;
-    UInt32 logs_to_keep = std::numeric_limits<UInt32>::max();
+    std::atomic<UInt32> logs_to_keep = std::numeric_limits<UInt32>::max();
 };

 }
--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
@ -39,14 +39,16 @@ DatabasePostgreSQL::DatabasePostgreSQL(
        const String & metadata_path_,
        const ASTStorage * database_engine_define_,
        const String & dbname_,
-        const String & postgres_dbname,
+        const String & postgres_dbname_,
+        const String & postgres_schema_,
        postgres::PoolWithFailoverPtr pool_,
        bool cache_tables_)
    : IDatabase(dbname_)
    , WithContext(context_->getGlobalContext())
    , metadata_path(metadata_path_)
    , database_engine_define(database_engine_define_->clone())
-    , dbname(postgres_dbname)
+    , postgres_dbname(postgres_dbname_)
+    , postgres_schema(postgres_schema_)
    , pool(std::move(pool_))
    , cache_tables(cache_tables_)
 {
@ -55,12 +57,28 @@ DatabasePostgreSQL::DatabasePostgreSQL(
 }


+String DatabasePostgreSQL::getTableNameForLogs(const String & table_name) const
+{
+    if (postgres_schema.empty())
+        return fmt::format("{}.{}", postgres_dbname, table_name);
+    return fmt::format("{}.{}.{}", postgres_dbname, postgres_schema, table_name);
+}
+
+
+String DatabasePostgreSQL::formatTableName(const String & table_name) const
+{
+    if (postgres_schema.empty())
+        return doubleQuoteString(table_name);
+    return fmt::format("{}.{}", doubleQuoteString(postgres_schema), doubleQuoteString(table_name));
+}
+
+
 bool DatabasePostgreSQL::empty() const
 {
    std::lock_guard<std::mutex> lock(mutex);

    auto connection_holder = pool->get();
-    auto tables_list = fetchPostgreSQLTablesList(connection_holder->get());
+    auto tables_list = fetchPostgreSQLTablesList(connection_holder->get(), postgres_schema);

    for (const auto & table_name : tables_list)
        if (!detached_or_dropped.count(table_name))
@ -76,7 +94,7 @@ DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local

    Tables tables;
    auto connection_holder = pool->get();
-    auto table_names = fetchPostgreSQLTablesList(connection_holder->get());
+    auto table_names = fetchPostgreSQLTablesList(connection_holder->get(), postgres_schema);

    for (const auto & table_name : table_names)
        if (!detached_or_dropped.count(table_name))
@ -104,8 +122,11 @@ bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const
        pqxx::result result = tx.exec(fmt::format(
                    "SELECT '{}'::regclass, tablename "
                    "FROM pg_catalog.pg_tables "
-                    "WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema' "
-                    "AND tablename = '{}'", table_name, table_name));
+                    "WHERE schemaname != 'pg_catalog' AND {} "
+                    "AND tablename = '{}'",
+                    formatTableName(table_name),
+                    (postgres_schema.empty() ? "schemaname != 'information_schema'" : "schemaname = " + quoteString(postgres_schema)),
+                    formatTableName(table_name)));
    }
    catch (pqxx::undefined_table const &)
    {
@ -151,14 +172,14 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr
            return StoragePtr{};

        auto connection_holder = pool->get();
-        auto columns = fetchPostgreSQLTableStructure(connection_holder->get(), doubleQuoteString(table_name)).columns;
+        auto columns = fetchPostgreSQLTableStructure(connection_holder->get(), formatTableName(table_name)).columns;

        if (!columns)
            return StoragePtr{};

        auto storage = StoragePostgreSQL::create(
                StorageID(database_name, table_name), pool, table_name,
-                ColumnsDescription{*columns}, ConstraintsDescription{}, String{}, local_context);
+                ColumnsDescription{*columns}, ConstraintsDescription{}, String{}, local_context, postgres_schema);

        if (cache_tables)
            cached_tables[table_name] = storage;
@ -182,10 +203,14 @@ void DatabasePostgreSQL::attachTable(const String & table_name, const StoragePtr
    std::lock_guard<std::mutex> lock{mutex};

    if (!checkPostgresTable(table_name))
-        throw Exception(fmt::format("Cannot attach table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
+        throw Exception(ErrorCodes::UNKNOWN_TABLE,
+                        "Cannot attach PostgreSQL table {} because it does not exist in PostgreSQL",
+                        getTableNameForLogs(table_name), database_name);

    if (!detached_or_dropped.count(table_name))
-        throw Exception(fmt::format("Cannot attach table {}.{}. It already exists", database_name, table_name), ErrorCodes::TABLE_ALREADY_EXISTS);
+        throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS,
+                        "Cannot attach PostgreSQL table {} because it already exists",
+                        getTableNameForLogs(table_name), database_name);

    if (cache_tables)
        cached_tables[table_name] = storage;
@ -203,10 +228,10 @@ StoragePtr DatabasePostgreSQL::detachTable(const String & table_name)
    std::lock_guard<std::mutex> lock{mutex};

    if (detached_or_dropped.count(table_name))
-        throw Exception(fmt::format("Cannot detach table {}.{}. It is already dropped/detached", database_name, table_name), ErrorCodes::TABLE_IS_DROPPED);
+        throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot detach table {}. It is already dropped/detached", getTableNameForLogs(table_name));

    if (!checkPostgresTable(table_name))
-        throw Exception(fmt::format("Cannot detach table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
+        throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot detach table {}, because it does not exist", getTableNameForLogs(table_name));

    if (cache_tables)
        cached_tables.erase(table_name);
@ -234,10 +259,10 @@ void DatabasePostgreSQL::dropTable(ContextPtr, const String & table_name, bool /
    std::lock_guard<std::mutex> lock{mutex};

    if (!checkPostgresTable(table_name))
-        throw Exception(fmt::format("Cannot drop table {}.{} because it does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
+        throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot drop table {} because it does not exist", getTableNameForLogs(table_name));

    if (detached_or_dropped.count(table_name))
-        throw Exception(fmt::format("Table {}.{} is already dropped/detached", database_name, table_name), ErrorCodes::TABLE_IS_DROPPED);
+        throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is already dropped/detached", getTableNameForLogs(table_name));

    fs::path mark_table_removed = fs::path(getMetadataPath()) / (escapeForFileName(table_name) + suffix);
    FS::createFile(mark_table_removed);
@ -281,7 +306,7 @@ void DatabasePostgreSQL::removeOutdatedTables()
 {
    std::lock_guard<std::mutex> lock{mutex};
    auto connection_holder = pool->get();
-    auto actual_tables = fetchPostgreSQLTablesList(connection_holder->get());
+    auto actual_tables = fetchPostgreSQLTablesList(connection_holder->get(), postgres_schema);

    if (cache_tables)
    {
@ -334,7 +359,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co
    if (!storage)
    {
        if (throw_on_error)
-            throw Exception(fmt::format("PostgreSQL table {}.{} does not exist", database_name, table_name), ErrorCodes::UNKNOWN_TABLE);
+            throw Exception(ErrorCodes::UNKNOWN_TABLE, "PostgreSQL table {} does not exist", getTableNameForLogs(table_name));

        return nullptr;
    }
@ -367,9 +392,9 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co
    ASTs storage_children = ast_storage->children;
    auto storage_engine_arguments = ast_storage->engine->arguments;

-    /// Remove extra engine argument (`use_table_cache`)
-    if (storage_engine_arguments->children.size() > 4)
-        storage_engine_arguments->children.resize(storage_engine_arguments->children.size() - 1);
+    /// Remove extra engine argument (`schema` and `use_table_cache`)
+    if (storage_engine_arguments->children.size() >= 5)
+        storage_engine_arguments->children.resize(4);

    /// Add table_name to engine arguments
    assert(storage_engine_arguments->children.size() >= 2);
--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h
@ -32,7 +32,8 @@ public:
        const String & metadata_path_,
        const ASTStorage * database_engine_define,
        const String & dbname_,
-        const String & postgres_dbname,
+        const String & postgres_dbname_,
+        const String & postgres_schema_,
        postgres::PoolWithFailoverPtr pool_,
        bool cache_tables_);

@ -69,7 +70,8 @@ protected:
 private:
    String metadata_path;
    ASTPtr database_engine_define;
-    String dbname;
+    String postgres_dbname;
+    String postgres_schema;
    postgres::PoolWithFailoverPtr pool;
    const bool cache_tables;

@ -77,6 +79,10 @@ private:
    std::unordered_set<std::string> detached_or_dropped;
    BackgroundSchedulePool::TaskHolder cleaner_task;

+    String getTableNameForLogs(const String & table_name) const;
+
+    String formatTableName(const String & table_name) const;
+
    bool checkPostgresTable(const String & table_name) const;

    StoragePtr fetchTable(const String & table_name, ContextPtr context, const bool table_checked) const;
--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp
@ -27,11 +27,12 @@ namespace ErrorCodes


 template<typename T>
-std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx)
+std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx, const String & postgres_schema)
 {
    std::unordered_set<std::string> tables;
-    std::string query = "SELECT tablename FROM pg_catalog.pg_tables "
-        "WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema'";
+    std::string query = fmt::format("SELECT tablename FROM pg_catalog.pg_tables "
+                                    "WHERE schemaname != 'pg_catalog' AND {}",
+                                    postgres_schema.empty() ? "schemaname != 'information_schema'" : "schemaname = " + quoteString(postgres_schema));

    for (auto table_name : tx.template stream<std::string>(query))
        tables.insert(std::get<0>(table_name));
@ -270,10 +271,10 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(pqxx::connection & connec
 }


-std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection)
+std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema)
 {
    pqxx::ReadTransaction tx(connection);
-    auto result = fetchPostgreSQLTablesList(tx);
+    auto result = fetchPostgreSQLTablesList(tx, postgres_schema);
    tx.commit();
    return result;
 }
@ -290,10 +291,10 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
        bool with_primary_key, bool with_replica_identity_index);

 template
-std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::work & tx);
+std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::work & tx, const String & postgres_schema);

 template
-std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx);
+std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::ReadTransaction & tx, const String & postgres_schema);

 }

--- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
+++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.h
@ -21,7 +21,7 @@ struct PostgreSQLTableStructure

 using PostgreSQLTableStructurePtr = std::unique_ptr<PostgreSQLTableStructure>;

-std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection);
+std::unordered_set<std::string> fetchPostgreSQLTablesList(pqxx::connection & connection, const String & postgres_schema);

 PostgreSQLTableStructure fetchPostgreSQLTableStructure(
    pqxx::connection & connection, const String & postgres_table_name, bool use_nulls = true);
@ -32,7 +32,7 @@ PostgreSQLTableStructure fetchPostgreSQLTableStructure(
    bool with_primary_key = false, bool with_replica_identity_index = false);

 template<typename T>
-std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx);
+std::unordered_set<std::string> fetchPostgreSQLTablesList(T & tx, const String & postgres_schema);

 }

--- a/src/Disks/S3/ProxyConfiguration.h
+++ b/src/Disks/S3/ProxyConfiguration.h
@ -19,6 +19,7 @@ public:
    virtual ~ProxyConfiguration() = default;
    /// Returns proxy configuration on each HTTP request.
    virtual Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) = 0;
+    virtual void errorReport(const Aws::Client::ClientConfigurationPerRequest & config) = 0;
 };

 }
--- a/src/Disks/S3/ProxyListConfiguration.h
+++ b/src/Disks/S3/ProxyListConfiguration.h
@ -20,6 +20,7 @@ class ProxyListConfiguration : public ProxyConfiguration
 public:
    explicit ProxyListConfiguration(std::vector<Poco::URI> proxies_);
    Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
+    void errorReport(const Aws::Client::ClientConfigurationPerRequest &) override {}

 private:
    /// List of configured proxies.
--- a/src/Disks/S3/ProxyResolverConfiguration.cpp
+++ b/src/Disks/S3/ProxyResolverConfiguration.cpp
@ -16,8 +16,10 @@ namespace DB::ErrorCodes

 namespace DB::S3
 {
-ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_)
-    : endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_)
+
+ProxyResolverConfiguration::ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_
+    , unsigned proxy_port_, unsigned cache_ttl_)
+    : endpoint(endpoint_), proxy_scheme(std::move(proxy_scheme_)), proxy_port(proxy_port_), cache_ttl(cache_ttl_)
 {
 }

@ -25,16 +27,25 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig
 {
    LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Obtain proxy using resolver: {}", endpoint.toString());

+    std::unique_lock lock(cache_mutex);
+
+    std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
+
+    if (cache_ttl.count() && cache_valid && now <= cache_timestamp + cache_ttl && now >= cache_timestamp)
+    {
+        LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use cached proxy: {}://{}:{}", Aws::Http::SchemeMapper::ToString(cached_config.proxyScheme), cached_config.proxyHost, cached_config.proxyPort);
+        return cached_config;
+    }
+
    /// 1 second is enough for now.
    /// TODO: Make timeouts configurable.
    ConnectionTimeouts timeouts(
        Poco::Timespan(1000000), /// Connection timeout.
        Poco::Timespan(1000000), /// Send timeout.
-        Poco::Timespan(1000000) /// Receive timeout.
+        Poco::Timespan(1000000)  /// Receive timeout.
    );
    auto session = makeHTTPSession(endpoint, timeouts);

-    Aws::Client::ClientConfigurationPerRequest cfg;
    try
    {
        /// It should be just empty GET request.
@ -53,20 +64,41 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig

        LOG_DEBUG(&Poco::Logger::get("AWSClient"), "Use proxy: {}://{}:{}", proxy_scheme, proxy_host, proxy_port);

-        cfg.proxyScheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
-        cfg.proxyHost = proxy_host;
-        cfg.proxyPort = proxy_port;
+        cached_config.proxyScheme = Aws::Http::SchemeMapper::FromString(proxy_scheme.c_str());
+        cached_config.proxyHost = proxy_host;
+        cached_config.proxyPort = proxy_port;
+        cache_timestamp = std::chrono::system_clock::now();
+        cache_valid = true;

-        return cfg;
+        return cached_config;
    }
    catch (...)
    {
        tryLogCurrentException("AWSClient", "Failed to obtain proxy");
        /// Don't use proxy if it can't be obtained.
+        Aws::Client::ClientConfigurationPerRequest cfg;
        return cfg;
    }
 }

+void ProxyResolverConfiguration::errorReport(const Aws::Client::ClientConfigurationPerRequest & config)
+{
+    if (config.proxyHost.empty())
+        return;
+
+    std::unique_lock lock(cache_mutex);
+
+    if (!cache_ttl.count() || !cache_valid)
+        return;
+
+    if (cached_config.proxyScheme != config.proxyScheme || cached_config.proxyHost != config.proxyHost
+            || cached_config.proxyPort != config.proxyPort)
+        return;
+
+    /// Invalidate cached proxy when got error with this proxy
+    cache_valid = false;
+}
+
 }

 #endif
--- a/src/Disks/S3/ProxyResolverConfiguration.h
+++ b/src/Disks/S3/ProxyResolverConfiguration.h
@ -8,6 +8,8 @@

 #include "ProxyConfiguration.h"

+#include <mutex>
+
 namespace DB::S3
 {
 /**
@ -18,8 +20,9 @@ namespace DB::S3
 class ProxyResolverConfiguration : public ProxyConfiguration
 {
 public:
-    ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_);
+    ProxyResolverConfiguration(const Poco::URI & endpoint_, String proxy_scheme_, unsigned proxy_port_, unsigned cache_ttl_);
    Aws::Client::ClientConfigurationPerRequest getConfiguration(const Aws::Http::HttpRequest & request) override;
+    void errorReport(const Aws::Client::ClientConfigurationPerRequest & config) override;

 private:
    /// Endpoint to obtain a proxy host.
@ -28,6 +31,12 @@ private:
    const String proxy_scheme;
    /// Port for obtained proxy.
    const unsigned proxy_port;
+
+    std::mutex cache_mutex;
+    bool cache_valid = false;
+    std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
+    const std::chrono::seconds cache_ttl{0};
+    Aws::Client::ClientConfigurationPerRequest cached_config;
 };

 }
--- a/src/Disks/S3/registerDiskS3.cpp
+++ b/src/Disks/S3/registerDiskS3.cpp
@ -56,11 +56,12 @@ std::shared_ptr<S3::ProxyResolverConfiguration> getProxyResolverConfiguration(
    if (proxy_scheme != "http" && proxy_scheme != "https")
        throw Exception("Only HTTP/HTTPS schemas allowed in proxy resolver config: " + proxy_scheme, ErrorCodes::BAD_ARGUMENTS);
    auto proxy_port = proxy_resolver_config.getUInt(prefix + ".proxy_port");
+    auto cache_ttl = proxy_resolver_config.getUInt(prefix + ".proxy_cache_time", 10);

    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Configured proxy resolver: {}, Scheme: {}, Port: {}",
        endpoint.toString(), proxy_scheme, proxy_port);

-    return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port);
+    return std::make_shared<S3::ProxyResolverConfiguration>(endpoint, proxy_scheme, proxy_port, cache_ttl);
 }

 std::shared_ptr<S3::ProxyListConfiguration> getProxyListConfiguration(
@ -128,8 +129,12 @@ getClient(const Poco::Util::AbstractConfiguration & config, const String & confi

    auto proxy_config = getProxyConfiguration(config_prefix, config);
    if (proxy_config)
+    {
        client_configuration.perRequestConfiguration
            = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
+        client_configuration.error_report
+            = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
+    }

    client_configuration.retryStrategy
        = std::make_shared<Aws::Client::DefaultRetryStrategy>(config.getUInt(config_prefix + ".retry_attempts", 10));
--- a/src/Functions/MultiSearchFirstIndexImpl.h
+++ b/src/Functions/MultiSearchFirstIndexImpl.h
@ -42,6 +42,8 @@ struct MultiSearchFirstIndexImpl
            }
            ++iteration;
        }
+        if (iteration == 0)
+            std::fill(res.begin(), res.end(), 0);
    }
 };

--- a/src/Functions/MultiSearchFirstPositionImpl.h
+++ b/src/Functions/MultiSearchFirstPositionImpl.h
@ -51,6 +51,8 @@ struct MultiSearchFirstPositionImpl
            }
            ++iteration;
        }
+        if (iteration == 0)
+            std::fill(res.begin(), res.end(), 0);
    }
 };

--- a/src/Functions/MultiSearchImpl.h
+++ b/src/Functions/MultiSearchImpl.h
@ -41,6 +41,8 @@ struct MultiSearchImpl
            }
            ++iteration;
        }
+        if (iteration == 0)
+            std::fill(res.begin(), res.end(), 0);
    }
 };

--- a/src/Functions/Regexps.h
+++ b/src/Functions/Regexps.h
@ -113,12 +113,34 @@ namespace MultiRegexps
        ScratchPtr scratch;
    };

+    class RegexpsConstructor
+    {
+    public:
+        RegexpsConstructor() = default;
+
+        void setConstructor(std::function<Regexps()> constructor_) { constructor = std::move(constructor_); }
+
+        Regexps * operator()()
+        {
+            std::unique_lock lock(mutex);
+            if (regexp)
+                return &*regexp;
+            regexp = constructor();
+            return &*regexp;
+        }
+
+    private:
+        std::function<Regexps()> constructor;
+        std::optional<Regexps> regexp;
+        std::mutex mutex;
+    };
+
    struct Pool
    {
        /// Mutex for finding in map.
        std::mutex mutex;
        /// Patterns + possible edit_distance to database and scratch.
-        std::map<std::pair<std::vector<String>, std::optional<UInt32>>, Regexps> storage;
+        std::map<std::pair<std::vector<String>, std::optional<UInt32>>, RegexpsConstructor> storage;
    };

    template <bool save_indices, bool CompileForEditDistance>
@ -250,15 +272,19 @@ namespace MultiRegexps

        /// If not found, compile and let other threads wait.
        if (known_regexps.storage.end() == it)
+        {
            it = known_regexps.storage
-                     .emplace(
-                         std::pair{str_patterns, edit_distance},
-                         constructRegexps<save_indices, CompileForEditDistance>(str_patterns, edit_distance))
+                     .emplace(std::piecewise_construct, std::make_tuple(std::move(str_patterns), edit_distance), std::make_tuple())
                     .first;
-        /// If found, unlock and return the database.
-        lock.unlock();
+            it->second.setConstructor([&str_patterns = it->first.first, edit_distance]()
+            {
+                return constructRegexps<save_indices, CompileForEditDistance>(str_patterns, edit_distance);
+            });
+        }

-        return &it->second;
+        /// Unlock before possible construction.
+        lock.unlock();
+        return it->second();
    }
 }

--- a/src/Functions/array/mapPopulateSeries.cpp
+++ b/src/Functions/array/mapPopulateSeries.cpp
@ -1,4 +1,5 @@
 #include <Columns/ColumnArray.h>
+#include <Columns/ColumnMap.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnVector.h>
 #include <DataTypes/DataTypeArray.h>
@ -7,6 +8,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
 #include "Core/ColumnWithTypeAndName.h"
+#include "DataTypes/DataTypeMap.h"
 #include "DataTypes/IDataType.h"

 namespace DB
@ -32,85 +34,211 @@ private:
    bool isVariadic() const override { return true; }
    bool useDefaultImplementationForConstants() const override { return true; }

-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    void checkTypes(const DataTypePtr & key_type, const DataTypePtr max_key_type) const
+    {
+        WhichDataType which_key(key_type);
+        if (!(which_key.isInt() || which_key.isUInt()))
+        {
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Keys for {} function should be of integer type (signed or unsigned)", getName());
+        }
+
+        if (max_key_type)
+        {
+            WhichDataType which_max_key(max_key_type);
+
+            if (which_max_key.isNullable())
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Max key argument in arguments of function " + getName() + " can not be Nullable");
+
+            if (key_type->getTypeId() != max_key_type->getTypeId())
+                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Max key type in {} should be same as keys type", getName());
+        }
+    }
+
+    DataTypePtr getReturnTypeForTuple(const DataTypes & arguments) const
    {
        if (arguments.size() < 2)
-            throw Exception{getName() + " accepts at least two arrays for key and value", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} accepts at least two arrays for key and value", getName());

        if (arguments.size() > 3)
-            throw Exception{"too many arguments in " + getName() + " call", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());

        const DataTypeArray * key_array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
        const DataTypeArray * val_array_type = checkAndGetDataType<DataTypeArray>(arguments[1].get());

        if (!key_array_type || !val_array_type)
-            throw Exception{getName() + " accepts two arrays for key and value", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} accepts two arrays for key and value", getName());

-        DataTypePtr keys_type = key_array_type->getNestedType();
-        WhichDataType which_key(keys_type);
-        if (!(which_key.isNativeInt() || which_key.isNativeUInt()))
-        {
-            throw Exception(
-                "Keys for " + getName() + " should be of native integer type (signed or unsigned)", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-        }
+        const auto & key_type = key_array_type->getNestedType();

        if (arguments.size() == 3)
-        {
-            DataTypePtr max_key_type = arguments[2];
-            WhichDataType which_max_key(max_key_type);
-
-            if (which_max_key.isNullable())
-                throw Exception(
-                    "Max key argument in arguments of function " + getName() + " can not be Nullable",
-                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-
-            if (keys_type->getTypeId() != max_key_type->getTypeId())
-                throw Exception("Max key type in " + getName() + " should be same as keys type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-        }
+            this->checkTypes(key_type, arguments[2]);
+        else
+            this->checkTypes(key_type, nullptr);

        return std::make_shared<DataTypeTuple>(DataTypes{arguments[0], arguments[1]});
    }

-    template <typename KeyType, typename ValType>
-    ColumnPtr execute2(ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type) const
+    DataTypePtr getReturnTypeForMap(const DataTypes & arguments) const
    {
-        MutableColumnPtr res_tuple = res_type.createColumn();
+        const auto * map = assert_cast<const DataTypeMap *>(arguments[0].get());
+        if (arguments.size() == 1)
+            this->checkTypes(map->getKeyType(), nullptr);
+        else if (arguments.size() == 2)
+            this->checkTypes(map->getKeyType(), arguments[1]);
+        else
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName());

-        auto * to_tuple = assert_cast<ColumnTuple *>(res_tuple.get());
-        auto & to_keys_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(0));
-        auto & to_keys_data = to_keys_arr.getData();
-        auto & to_keys_offsets = to_keys_arr.getOffsets();
+        return std::make_shared<DataTypeMap>(map->getKeyType(), map->getValueType());
+    }

-        auto & to_vals_arr = assert_cast<ColumnArray &>(to_tuple->getColumn(1));
-        auto & to_values_data = to_vals_arr.getData();
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (arguments.empty())
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, getName() + " accepts at least one map or two arrays");

-        bool max_key_is_const = false, key_is_const = false, val_is_const = false;
+        if (arguments[0]->getTypeId() == TypeIndex::Array)
+            return getReturnTypeForTuple(arguments);
+        else if (arguments[0]->getTypeId() == TypeIndex::Map)
+            return getReturnTypeForMap(arguments);
+        else
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Function {} only accepts one map or arrays, but got {}",
+                getName(),
+                arguments[0]->getName());
+    }

-        const auto * keys_array = checkAndGetColumn<ColumnArray>(key_column.get());
-        if (!keys_array)
+    // Struct holds input and output columns references,
+    // Both arrays and maps have similar columns to work with but extracted differently
+    template <typename KeyType, typename ValType>
+    struct ColumnsInOut
+    {
+        // inputs
+        const PaddedPODArray<KeyType> & in_keys_data;
+        const PaddedPODArray<ValType> & in_vals_data;
+        const IColumn::Offsets & in_key_offsets;
+        const IColumn::Offsets & in_val_offsets;
+        size_t row_count;
+        bool key_is_const;
+        bool val_is_const;
+
+        // outputs
+        PaddedPODArray<KeyType> & out_keys_data;
+        PaddedPODArray<ValType> & out_vals_data;
+
+        IColumn::Offsets & out_keys_offsets;
+        // with map argument this field will not be used
+        IColumn::Offsets * out_vals_offsets;
+    };
+
+    template <typename KeyType, typename ValType>
+    ColumnsInOut<KeyType, ValType> getInOutDataFromArrays(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
+    {
+        auto * out_tuple = assert_cast<ColumnTuple *>(res_column.get());
+        auto & out_keys_array = assert_cast<ColumnArray &>(out_tuple->getColumn(0));
+        auto & out_vals_array = assert_cast<ColumnArray &>(out_tuple->getColumn(1));
+
+        const auto * key_column = arg_columns[0].get();
+        const auto * in_keys_array = checkAndGetColumn<ColumnArray>(key_column);
+
+        bool key_is_const = false, val_is_const = false;
+
+        if (!in_keys_array)
        {
-            const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column.get());
+            const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(key_column);
            if (!const_array)
-                throw Exception("Expected array column, found " + key_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
+                throw Exception(
+                    ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), key_column->getName());

-            keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
+            in_keys_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
            key_is_const = true;
        }

-        const auto * values_array = checkAndGetColumn<ColumnArray>(val_column.get());
-        if (!values_array)
+        const auto * val_column = arg_columns[1].get();
+        const auto * in_values_array = checkAndGetColumn<ColumnArray>(val_column);
+        if (!in_values_array)
        {
-            const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column.get());
+            const ColumnConst * const_array = checkAndGetColumnConst<ColumnArray>(val_column);
            if (!const_array)
-                throw Exception("Expected array column, found " + val_column->getName(), ErrorCodes::ILLEGAL_COLUMN);
+                throw Exception(
+                    ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), val_column->getName());

-            values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
+            in_values_array = checkAndGetColumn<ColumnArray>(const_array->getDataColumnPtr().get());
            val_is_const = true;
        }

-        if (!keys_array || !values_array)
+        if (!in_keys_array || !in_values_array)
            /* something went wrong */
-            throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
+
+        const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_keys_array->getData()).getData();
+        const auto & in_values_data = assert_cast<const ColumnVector<ValType> &>(in_values_array->getData()).getData();
+        const auto & in_keys_offsets = in_keys_array->getOffsets();
+        const auto & in_vals_offsets = in_values_array->getOffsets();
+
+        auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_keys_array.getData()).getData();
+        auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_vals_array.getData()).getData();
+        auto & out_keys_offsets = out_keys_array.getOffsets();
+
+        size_t row_count = key_is_const ? in_values_array->size() : in_keys_array->size();
+        IColumn::Offsets * out_vals_offsets = &out_vals_array.getOffsets();
+
+        return {
+            in_keys_data,
+            in_values_data,
+            in_keys_offsets,
+            in_vals_offsets,
+            row_count,
+            key_is_const,
+            val_is_const,
+            out_keys_data,
+            out_vals_data,
+            out_keys_offsets,
+            out_vals_offsets};
+    }
+
+    template <typename KeyType, typename ValType>
+    ColumnsInOut<KeyType, ValType> getInOutDataFromMap(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const
+    {
+        const auto * in_map = assert_cast<const ColumnMap *>(arg_columns[0].get());
+        const auto & in_nested_array = in_map->getNestedColumn();
+        const auto & in_nested_tuple = in_map->getNestedData();
+        const auto & in_keys_data = assert_cast<const ColumnVector<KeyType> &>(in_nested_tuple.getColumn(0)).getData();
+        const auto & in_vals_data = assert_cast<const ColumnVector<ValType> &>(in_nested_tuple.getColumn(1)).getData();
+        const auto & in_keys_offsets = in_nested_array.getOffsets();
+
+        auto * out_map = assert_cast<ColumnMap *>(res_column.get());
+        auto & out_nested_array = out_map->getNestedColumn();
+        auto & out_nested_tuple = out_map->getNestedData();
+        auto & out_keys_data = assert_cast<ColumnVector<KeyType> &>(out_nested_tuple.getColumn(0)).getData();
+        auto & out_vals_data = assert_cast<ColumnVector<ValType> &>(out_nested_tuple.getColumn(1)).getData();
+        auto & out_keys_offsets = out_nested_array.getOffsets();
+
+        return {
+            in_keys_data,
+            in_vals_data,
+            in_keys_offsets,
+            in_keys_offsets,
+            in_nested_array.size(),
+            false,
+            false,
+            out_keys_data,
+            out_vals_data,
+            out_keys_offsets,
+            nullptr};
+    }
+
+    template <typename KeyType, typename ValType>
+    ColumnPtr execute2(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type) const
+    {
+        MutableColumnPtr res_column = res_type->createColumn();
+        bool max_key_is_const = false;
+        auto columns = res_column->getDataType() == TypeIndex::Tuple ? getInOutDataFromArrays<KeyType, ValType>(res_column, arg_columns)
+                                                                     : getInOutDataFromMap<KeyType, ValType>(res_column, arg_columns);

        KeyType max_key_const{0};

@ -121,49 +249,43 @@ private:
            max_key_is_const = true;
        }

-        auto & keys_data = assert_cast<const ColumnVector<KeyType> &>(keys_array->getData()).getData();
-        auto & values_data = assert_cast<const ColumnVector<ValType> &>(values_array->getData()).getData();
-
-        // Original offsets
-        const IColumn::Offsets & key_offsets = keys_array->getOffsets();
-        const IColumn::Offsets & val_offsets = values_array->getOffsets();
-
        IColumn::Offset offset{0};
-        size_t row_count = key_is_const ? values_array->size() : keys_array->size();
-
        std::map<KeyType, ValType> res_map;

        //Iterate through two arrays and fill result values.
-        for (size_t row = 0; row < row_count; ++row)
+        for (size_t row = 0; row < columns.row_count; ++row)
        {
-            size_t key_offset = 0, val_offset = 0, array_size = key_offsets[0], val_array_size = val_offsets[0];
+            size_t key_offset = 0, val_offset = 0, items_count = columns.in_key_offsets[0], val_array_size = columns.in_val_offsets[0];

            res_map.clear();

-            if (!key_is_const)
+            if (!columns.key_is_const)
            {
-                key_offset = row > 0 ? key_offsets[row - 1] : 0;
-                array_size = key_offsets[row] - key_offset;
+                key_offset = row > 0 ? columns.in_key_offsets[row - 1] : 0;
+                items_count = columns.in_key_offsets[row] - key_offset;
            }

-            if (!val_is_const)
+            if (!columns.val_is_const)
            {
-                val_offset = row > 0 ? val_offsets[row - 1] : 0;
-                val_array_size = val_offsets[row] - val_offset;
+                val_offset = row > 0 ? columns.in_val_offsets[row - 1] : 0;
+                val_array_size = columns.in_val_offsets[row] - val_offset;
            }

-            if (array_size != val_array_size)
-                throw Exception("Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+            if (items_count != val_array_size)
+                throw Exception(
+                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                    "Key and value array should have same amount of elements in function {}",
+                    getName());

-            if (array_size == 0)
+            if (items_count == 0)
            {
-                to_keys_offsets.push_back(offset);
+                columns.out_keys_offsets.push_back(offset);
                continue;
            }

-            for (size_t i = 0; i < array_size; ++i)
+            for (size_t i = 0; i < items_count; ++i)
            {
-                res_map.insert({keys_data[key_offset + i], values_data[val_offset + i]});
+                res_map.insert({columns.in_keys_data[key_offset + i], columns.in_vals_data[val_offset + i]});
            }

            auto min_key = res_map.begin()->first;
@ -184,7 +306,7 @@ private:
                /* no need to add anything, max key is less that first key */
                if (max_key < min_key)
                {
-                    to_keys_offsets.push_back(offset);
+                    columns.out_keys_offsets.push_back(offset);
                    continue;
                }
            }
@ -197,16 +319,16 @@ private:
            KeyType key;
            for (key = min_key;; ++key)
            {
-                to_keys_data.insert(key);
+                columns.out_keys_data.push_back(key);

                auto it = res_map.find(key);
                if (it != res_map.end())
                {
-                    to_values_data.insert(it->second);
+                    columns.out_vals_data.push_back(it->second);
                }
                else
                {
-                    to_values_data.insertDefault();
+                    columns.out_vals_data.push_back(0);
                }

                ++offset;
@ -214,80 +336,112 @@ private:
                    break;
            }

-            to_keys_offsets.push_back(offset);
+            columns.out_keys_offsets.push_back(offset);
        }

-        to_vals_arr.getOffsets().insert(to_keys_offsets.begin(), to_keys_offsets.end());
-        return res_tuple;
+        if (columns.out_vals_offsets)
+            columns.out_vals_offsets->insert(columns.out_keys_offsets.begin(), columns.out_keys_offsets.end());
+
+        return res_column;
    }

    template <typename KeyType>
-    ColumnPtr execute1(ColumnPtr key_column, ColumnPtr val_column, ColumnPtr max_key_column, const DataTypeTuple & res_type) const
+    ColumnPtr execute1(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type, const DataTypePtr & val_type) const
    {
-        const auto & val_type = (assert_cast<const DataTypeArray *>(res_type.getElements()[1].get()))->getNestedType();
        switch (val_type->getTypeId())
        {
            case TypeIndex::Int8:
-                return execute2<KeyType, Int8>(key_column, val_column, max_key_column, res_type);
+                return execute2<KeyType, Int8>(arg_columns, max_key_column, res_type);
            case TypeIndex::Int16:
-                return execute2<KeyType, Int16>(key_column, val_column, max_key_column, res_type);
+                return execute2<KeyType, Int16>(arg_columns, max_key_column, res_type);
            case TypeIndex::Int32:
-                return execute2<KeyType, Int32>(key_column, val_column, max_key_column, res_type);
+                return execute2<KeyType, Int32>(arg_columns, max_key_column, res_type);
            case TypeIndex::Int64:
-                return execute2<KeyType, Int64>(key_column, val_column, max_key_column, res_type);
+                return execute2<KeyType, Int64>(arg_columns, max_key_column, res_type);
+            case TypeIndex::Int128:
+                return execute2<KeyType, Int128>(arg_columns, max_key_column, res_type);
+            case TypeIndex::Int256:
+                return execute2<KeyType, Int256>(arg_columns, max_key_column, res_type);
            case TypeIndex::UInt8:
-                return execute2<KeyType, UInt8>(key_column, val_column, max_key_column, res_type);
+                return execute2<KeyType, UInt8>(arg_columns, max_key_column, res_type);
            case TypeIndex::UInt16:
-                return execute2<KeyType, UInt16>(key_column, val_column, max_key_column, res_type);
+                return execute2<KeyType, UInt16>(arg_columns, max_key_column, res_type);
            case TypeIndex::UInt32:
-                return execute2<KeyType, UInt32>(key_column, val_column, max_key_column, res_type);
+                return execute2<KeyType, UInt32>(arg_columns, max_key_column, res_type);
            case TypeIndex::UInt64:
-                return execute2<KeyType, UInt64>(key_column, val_column, max_key_column, res_type);
+                return execute2<KeyType, UInt64>(arg_columns, max_key_column, res_type);
+            case TypeIndex::UInt128:
+                return execute2<KeyType, UInt128>(arg_columns, max_key_column, res_type);
+            case TypeIndex::UInt256:
+                return execute2<KeyType, UInt256>(arg_columns, max_key_column, res_type);
            default:
-                throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
        }
    }

    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
    {
-        auto col1 = arguments[0];
-        auto col2 = arguments[1];
-
-        const auto * k = assert_cast<const DataTypeArray *>(col1.type.get());
-        const auto * v = assert_cast<const DataTypeArray *>(col2.type.get());
-
-        /* determine output type */
-        const DataTypeTuple & res_type = DataTypeTuple(
-            DataTypes{std::make_shared<DataTypeArray>(k->getNestedType()), std::make_shared<DataTypeArray>(v->getNestedType())});
-
+        DataTypePtr res_type, key_type, val_type;
        ColumnPtr max_key_column = nullptr;
+        ColumnPtr arg_columns[] = {arguments[0].column, nullptr};

-        if (arguments.size() == 3)
+        if (arguments[0].type->getTypeId() == TypeIndex::Array)
        {
-            /* max key provided */
-            max_key_column = arguments[2].column;
+            key_type = assert_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();
+            val_type = assert_cast<const DataTypeArray *>(arguments[1].type.get())->getNestedType();
+            res_type = getReturnTypeImpl(DataTypes{arguments[0].type, arguments[1].type});
+
+            arg_columns[1] = arguments[1].column;
+            if (arguments.size() == 3)
+            {
+                /* max key provided */
+                max_key_column = arguments[2].column;
+            }
+        }
+        else
+        {
+            assert(arguments[0].type->getTypeId() == TypeIndex::Map);
+
+            const auto * map_type = assert_cast<const DataTypeMap *>(arguments[0].type.get());
+            res_type = getReturnTypeImpl(DataTypes{arguments[0].type});
+            key_type = map_type->getKeyType();
+            val_type = map_type->getValueType();
+
+            if (arguments.size() == 2)
+            {
+                /* max key provided */
+                max_key_column = arguments[1].column;
+            }
        }

-        switch (k->getNestedType()->getTypeId())
+        switch (key_type->getTypeId())
        {
            case TypeIndex::Int8:
-                return execute1<Int8>(col1.column, col2.column, max_key_column, res_type);
+                return execute1<Int8>(arg_columns, max_key_column, res_type, val_type);
            case TypeIndex::Int16:
-                return execute1<Int16>(col1.column, col2.column, max_key_column, res_type);
+                return execute1<Int16>(arg_columns, max_key_column, res_type, val_type);
            case TypeIndex::Int32:
-                return execute1<Int32>(col1.column, col2.column, max_key_column, res_type);
+                return execute1<Int32>(arg_columns, max_key_column, res_type, val_type);
            case TypeIndex::Int64:
-                return execute1<Int64>(col1.column, col2.column, max_key_column, res_type);
+                return execute1<Int64>(arg_columns, max_key_column, res_type, val_type);
+            case TypeIndex::Int128:
+                return execute1<Int128>(arg_columns, max_key_column, res_type, val_type);
+            case TypeIndex::Int256:
+                return execute1<Int256>(arg_columns, max_key_column, res_type, val_type);
            case TypeIndex::UInt8:
-                return execute1<UInt8>(col1.column, col2.column, max_key_column, res_type);
+                return execute1<UInt8>(arg_columns, max_key_column, res_type, val_type);
            case TypeIndex::UInt16:
-                return execute1<UInt16>(col1.column, col2.column, max_key_column, res_type);
+                return execute1<UInt16>(arg_columns, max_key_column, res_type, val_type);
            case TypeIndex::UInt32:
-                return execute1<UInt32>(col1.column, col2.column, max_key_column, res_type);
+                return execute1<UInt32>(arg_columns, max_key_column, res_type, val_type);
            case TypeIndex::UInt64:
-                return execute1<UInt64>(col1.column, col2.column, max_key_column, res_type);
+                return execute1<UInt64>(arg_columns, max_key_column, res_type, val_type);
+            case TypeIndex::UInt128:
+                return execute1<UInt128>(arg_columns, max_key_column, res_type, val_type);
+            case TypeIndex::UInt256:
+                return execute1<UInt256>(arg_columns, max_key_column, res_type, val_type);
            default:
-                throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN};
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName());
        }
    }
 };
@ -296,5 +450,4 @@ void registerFunctionMapPopulateSeries(FunctionFactory & factory)
 {
    factory.registerFunction<FunctionMapPopulateSeries>();
 }
-
 }
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@ -403,7 +403,6 @@ bool tryReadIntText(T & x, ReadBuffer & buf)  // -V1071
  * Differs in following:
  * - for numbers starting with zero, parsed only zero;
  * - symbol '+' before number is not supported;
-  * - symbols :;<=>? are parsed as some numbers.
  */
 template <typename T, bool throw_on_error = true>
 void readIntTextUnsafe(T & x, ReadBuffer & buf)
@ -437,15 +436,12 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf)

    while (!buf.eof())
    {
-        /// This check is suddenly faster than
-        ///  unsigned char c = *buf.position() - '0';
-        ///  if (c < 10)
-        /// for unknown reason on Xeon E5645.
+        unsigned char value = *buf.position() - '0';

-        if ((*buf.position() & 0xF0) == 0x30) /// It makes sense to have this condition inside loop.
+        if (value < 10)
        {
            res *= 10;
-            res += *buf.position() & 0x0F;
+            res += value;
            ++buf.position();
        }
        else
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@ -89,6 +89,7 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()

 PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & clientConfiguration)
    : per_request_configuration(clientConfiguration.perRequestConfiguration)
+    , error_report(clientConfiguration.error_report)
    , timeouts(ConnectionTimeouts(
          Poco::Timespan(clientConfiguration.connectTimeoutMs * 1000), /// connection timeout.
          Poco::Timespan(clientConfiguration.requestTimeoutMs * 1000), /// send timeout.
@ -296,6 +297,8 @@ void PocoHTTPClient::makeRequestInternal(
            else if (status_code >= 300)
            {
                ProfileEvents::increment(select_metric(S3MetricType::Errors));
+                if (status_code >= 500 && error_report)
+                    error_report(request_configuration);
            }

            response->SetResponseBody(response_body_stream, session);
--- a/src/IO/S3/PocoHTTPClient.h
+++ b/src/IO/S3/PocoHTTPClient.h
@ -37,6 +37,8 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration

    void updateSchemeAndRegion();

+    std::function<void(const Aws::Client::ClientConfigurationPerRequest &)> error_report;
+
 private:
    PocoHTTPClientConfiguration(const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_);

@ -95,6 +97,7 @@ private:
        Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;

    std::function<Aws::Client::ClientConfigurationPerRequest(const Aws::Http::HttpRequest &)> per_request_configuration;
+    std::function<void(const Aws::Client::ClientConfigurationPerRequest &)> error_report;
    ConnectionTimeouts timeouts;
    const RemoteHostFilter & remote_host_filter;
    unsigned int s3_max_redirects;
--- a/src/IO/ya.make
+++ b/src/IO/ya.make
@ -5,7 +5,7 @@ LIBRARY()

 ADDINCL(
    contrib/libs/zstd/include
-    contrib/restricted/fast_float
+    contrib/restricted/fast_float/include
 )

 PEERDIR(
--- a/src/IO/ya.make.in
+++ b/src/IO/ya.make.in
@ -4,7 +4,7 @@ LIBRARY()

 ADDINCL(
    contrib/libs/zstd/include
-    contrib/restricted/fast_float
+    contrib/restricted/fast_float/include
 )

 PEERDIR(
--- a/src/Interpreters/AsynchronousMetrics.cpp
+++ b/src/Interpreters/AsynchronousMetrics.cpp
@ -896,9 +896,9 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
    if (block_devices_rescan_delay.elapsedSeconds() >= 300)
        openBlockDevices();

-    for (auto & [name, device] : block_devs)
+    try
    {
-        try
+        for (auto & [name, device] : block_devs)
        {
            device->rewind();

@ -947,20 +947,20 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
                new_values["BlockQueueTimePerOp_" + name] = delta_values.time_in_queue * time_multiplier / delta_values.in_flight_ios;
            }
        }
+    }
+    catch (...)
+    {
+        /// Try to reopen block devices in case of error
+        /// (i.e. ENOENT means that some disk had been replaced, and it may apperas with a new name)
+        try
+        {
+            openBlockDevices();
+        }
        catch (...)
        {
-            /// Try to reopen block devices in case of error
-            /// (i.e. ENOENT means that some disk had been replaced, and it may apperas with a new name)
-            try
-            {
-                openBlockDevices();
-            }
-            catch (...)
-            {
-                tryLogCurrentException(__PRETTY_FUNCTION__);
-            }
            tryLogCurrentException(__PRETTY_FUNCTION__);
        }
+        tryLogCurrentException(__PRETTY_FUNCTION__);
    }

    if (net_dev)
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -2796,6 +2796,13 @@ ZooKeeperMetadataTransactionPtr Context::getZooKeeperMetadataTransaction() const
    return metadata_transaction;
 }

+void Context::resetZooKeeperMetadataTransaction()
+{
+    assert(metadata_transaction);
+    assert(hasQueryContext());
+    metadata_transaction = nullptr;
+}
+
 PartUUIDsPtr Context::getPartUUIDs() const
 {
    auto lock = getLock();
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@ -819,6 +819,8 @@ public:
    void initZooKeeperMetadataTransaction(ZooKeeperMetadataTransactionPtr txn, bool attach_existing = false);
    /// Returns context of current distributed DDL query or nullptr.
    ZooKeeperMetadataTransactionPtr getZooKeeperMetadataTransaction() const;
+    /// Removes context of current distributed DDL.
+    void resetZooKeeperMetadataTransaction();

    PartUUIDsPtr getPartUUIDs() const;
    PartUUIDsPtr getIgnoredPartUUIDs() const;
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@ -22,6 +22,7 @@ namespace ErrorCodes
    extern const int UNKNOWN_FORMAT_VERSION;
    extern const int UNKNOWN_TYPE_OF_QUERY;
    extern const int INCONSISTENT_CLUSTER_DEFINITION;
+    extern const int LOGICAL_ERROR;
 }

 HostID HostID::fromString(const String & host_port_str)
@ -362,7 +363,7 @@ ContextMutablePtr DatabaseReplicatedTask::makeQueryContext(ContextPtr from_conte
    query_context->getClientInfo().is_replicated_database_internal = true;
    query_context->setCurrentDatabase(database->getDatabaseName());

-    auto txn = std::make_shared<ZooKeeperMetadataTransaction>(zookeeper, database->zookeeper_path, is_initial_query);
+    auto txn = std::make_shared<ZooKeeperMetadataTransaction>(zookeeper, database->zookeeper_path, is_initial_query, entry_path);
    query_context->initZooKeeperMetadataTransaction(txn);

    if (is_initial_query)
@ -402,7 +403,8 @@ UInt32 DDLTaskBase::getLogEntryNumber(const String & log_entry_name)

 void ZooKeeperMetadataTransaction::commit()
 {
-    assert(state == CREATED);
+    if (state != CREATED)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state ({}), it's a bug", state);
    state = FAILED;
    current_zookeeper->multi(ops);
    state = COMMITTED;
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@ -20,6 +20,11 @@ namespace fs = std::filesystem;
 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
 class ASTQueryWithOnCluster;
 using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
 using ClusterPtr = std::shared_ptr<Cluster>;
@ -164,13 +169,15 @@ class ZooKeeperMetadataTransaction
    ZooKeeperPtr current_zookeeper;
    String zookeeper_path;
    bool is_initial_query;
+    String task_path;
    Coordination::Requests ops;

 public:
-    ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_)
+    ZooKeeperMetadataTransaction(const ZooKeeperPtr & current_zookeeper_, const String & zookeeper_path_, bool is_initial_query_, const String & task_path_)
    : current_zookeeper(current_zookeeper_)
    , zookeeper_path(zookeeper_path_)
    , is_initial_query(is_initial_query_)
+    , task_path(task_path_)
    {
    }

@ -180,15 +187,21 @@ public:

    String getDatabaseZooKeeperPath() const { return zookeeper_path; }

+    String getTaskZooKeeperPath() const { return task_path; }
+
+    ZooKeeperPtr getZooKeeper() const { return current_zookeeper; }
+
    void addOp(Coordination::RequestPtr && op)
    {
-        assert(!isExecuted());
+        if (isExecuted())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add ZooKeeper operation because query is executed. It's a bug.");
        ops.emplace_back(op);
    }

    void moveOpsTo(Coordination::Requests & other_ops)
    {
-        assert(!isExecuted());
+        if (isExecuted())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add ZooKeeper operation because query is executed. It's a bug.");
        std::move(ops.begin(), ops.end(), std::back_inserter(other_ops));
        ops.clear();
        state = COMMITTED;
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@ -613,18 +613,6 @@ void makeWindowDescriptionFromAST(const Context & context,

 void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
 {
-    // Convenient to check here because at least we have the Context.
-    if (!syntax->window_function_asts.empty() &&
-        !getContext()->getSettingsRef().allow_experimental_window_functions)
-    {
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
-            "The support for window functions is experimental and will change"
-            " in backwards-incompatible ways in the future releases. Set"
-            " allow_experimental_window_functions = 1 to enable it."
-            " While processing '{}'",
-            syntax->window_function_asts[0]->formatForErrorMessage());
-    }
-
    // Window definitions from the WINDOW clause
    const auto * select_query = query->as<ASTSelectQuery>();
    if (select_query && select_query->window())
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@ -8,6 +8,7 @@
 #include <Common/Macros.h>
 #include <Common/randomSeed.h>
 #include <Common/renameat2.h>
+#include <Common/hex.h>

 #include <Core/Defines.h>
 #include <Core/Settings.h>
@ -31,7 +32,9 @@

 #include <Interpreters/Context.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
+#include <Interpreters/executeQuery.h>
 #include <Interpreters/Cluster.h>
+#include <Interpreters/DDLTask.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/InterpreterSelectWithUnionQuery.h>
@ -84,7 +87,6 @@ namespace ErrorCodes
    extern const int UNKNOWN_DATABASE;
    extern const int PATH_ACCESS_DENIED;
    extern const int NOT_IMPLEMENTED;
-    extern const int UNKNOWN_TABLE;
 }

 namespace fs = std::filesystem;
@ -803,36 +805,6 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data
        create.uuid = UUIDHelpers::Nil;
        create.to_inner_uuid = UUIDHelpers::Nil;
    }
-
-    if (create.replace_table)
-    {
-        if (database->getUUID() == UUIDHelpers::Nil)
-            throw Exception(ErrorCodes::INCORRECT_QUERY,
-                            "{} query is supported only for Atomic databases",
-                            create.create_or_replace ? "CREATE OR REPLACE TABLE" : "REPLACE TABLE");
-
-        UUID uuid_of_table_to_replace;
-        if (create.create_or_replace)
-        {
-            uuid_of_table_to_replace = getContext()->tryResolveStorageID(StorageID(create.database, create.table)).uuid;
-            if (uuid_of_table_to_replace == UUIDHelpers::Nil)
-            {
-                /// Convert to usual CREATE
-                create.replace_table = false;
-                assert(!database->isTableExist(create.table, getContext()));
-            }
-            else
-                create.table = "_tmp_replace_" + toString(uuid_of_table_to_replace);
-        }
-        else
-        {
-            uuid_of_table_to_replace = getContext()->resolveStorageID(StorageID(create.database, create.table)).uuid;
-            if (uuid_of_table_to_replace == UUIDHelpers::Nil)
-                throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist",
-                                backQuoteIfNeed(create.database), backQuoteIfNeed(create.table));
-            create.table = "_tmp_replace_" + toString(uuid_of_table_to_replace);
-        }
-    }
 }


@ -1110,23 +1082,72 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
 BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
                                                       const InterpreterCreateQuery::TableProperties & properties)
 {
+    /// Replicated database requires separate contexts for each DDL query
+    ContextPtr current_context = getContext();
+    ContextMutablePtr create_context = Context::createCopy(current_context);
+    create_context->setQueryContext(std::const_pointer_cast<Context>(current_context));
+
+    auto make_drop_context = [&](bool on_error) -> ContextMutablePtr
+    {
+        ContextMutablePtr drop_context = Context::createCopy(current_context);
+        drop_context->makeQueryContext();
+        if (on_error)
+            return drop_context;
+
+        if (auto txn = current_context->getZooKeeperMetadataTransaction())
+        {
+            /// Execute drop as separate query, because [CREATE OR] REPLACE query can be considered as
+            /// successfully executed after RENAME/EXCHANGE query.
+            drop_context->resetZooKeeperMetadataTransaction();
+            auto drop_txn = std::make_shared<ZooKeeperMetadataTransaction>(txn->getZooKeeper(), txn->getDatabaseZooKeeperPath(),
+                                                                           txn->isInitialQuery(), txn->getTaskZooKeeperPath());
+            drop_context->initZooKeeperMetadataTransaction(drop_txn);
+        }
+        return drop_context;
+    };
+
    auto ast_drop = std::make_shared<ASTDropQuery>();
    String table_to_replace_name = create.table;
-    bool created = false;
-    bool replaced = false;

-    try
    {
-        [[maybe_unused]] bool done = doCreateTable(create, properties);
-        assert(done);
+        auto database = DatabaseCatalog::instance().getDatabase(create.database);
+        if (database->getUUID() == UUIDHelpers::Nil)
+            throw Exception(ErrorCodes::INCORRECT_QUERY,
+                            "{} query is supported only for Atomic databases",
+                            create.create_or_replace ? "CREATE OR REPLACE TABLE" : "REPLACE TABLE");
+
+
+        UInt64 name_hash = sipHash64(create.database + create.table);
+        UInt16 random_suffix = thread_local_rng();
+        if (auto txn = current_context->getZooKeeperMetadataTransaction())
+        {
+            /// Avoid different table name on database replicas
+            random_suffix = sipHash64(txn->getTaskZooKeeperPath());
+        }
+        create.table = fmt::format("_tmp_replace_{}_{}",
+                                   getHexUIntLowercase(name_hash),
+                                   getHexUIntLowercase(random_suffix));
+
        ast_drop->table = create.table;
        ast_drop->is_dictionary = create.is_dictionary;
        ast_drop->database = create.database;
        ast_drop->kind = ASTDropQuery::Drop;
-        created = true;
-        if (!create.replace_table)
-            return fillTableIfNeeded(create);
+    }

+    bool created = false;
+    bool renamed = false;
+    try
+    {
+        /// Create temporary table (random name will be generated)
+        [[maybe_unused]] bool done = InterpreterCreateQuery(query_ptr, create_context).doCreateTable(create, properties);
+        assert(done);
+        created = true;
+
+        /// Try fill temporary table
+        BlockIO fill_io = fillTableIfNeeded(create);
+        executeTrivialBlockIO(fill_io, getContext());
+
+        /// Replace target table with created one
        auto ast_rename = std::make_shared<ASTRenameQuery>();
        ASTRenameQuery::Element elem
        {
@ -1135,22 +1156,44 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
        };

        ast_rename->elements.push_back(std::move(elem));
-        ast_rename->exchange = true;
        ast_rename->dictionary = create.is_dictionary;
+        if (create.create_or_replace)
+        {
+            /// CREATE OR REPLACE TABLE
+            /// Will execute ordinary RENAME instead of EXCHANGE if the target table does not exist
+            ast_rename->rename_if_cannot_exchange = true;
+            ast_rename->exchange = false;
+        }
+        else
+        {
+            /// REPLACE TABLE
+            /// Will execute EXCHANGE query and fail if the target table does not exist
+            ast_rename->exchange = true;
+        }

-        InterpreterRenameQuery(ast_rename, getContext()).execute();
-        replaced = true;
+        InterpreterRenameQuery interpreter_rename{ast_rename, current_context};
+        interpreter_rename.execute();
+        renamed = true;

-        InterpreterDropQuery(ast_drop, getContext()).execute();
+        if (!interpreter_rename.renamedInsteadOfExchange())
+        {
+            /// Target table was replaced with new one, drop old table
+            auto drop_context = make_drop_context(false);
+            InterpreterDropQuery(ast_drop, drop_context).execute();
+        }

        create.table = table_to_replace_name;

-        return fillTableIfNeeded(create);
+        return {};
    }
    catch (...)
    {
-        if (created && create.replace_table && !replaced)
-            InterpreterDropQuery(ast_drop, getContext()).execute();
+        /// Drop temporary table if it was successfully created, but was not renamed to target name
+        if (created && !renamed)
+        {
+            auto drop_context = make_drop_context(true);
+            InterpreterDropQuery(ast_drop, drop_context).execute();
+        }
        throw;
    }
 }
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@ -72,12 +72,27 @@ BlockIO InterpreterRenameQuery::execute()

 BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards)
 {
+    assert(!rename.rename_if_cannot_exchange || descriptions.size() == 1);
+    assert(!(rename.rename_if_cannot_exchange && rename.exchange));
    auto & database_catalog = DatabaseCatalog::instance();

    for (const auto & elem : descriptions)
    {
-        if (!rename.exchange)
+        bool exchange_tables;
+        if (rename.exchange)
+        {
+            exchange_tables = true;
+        }
+        else if (rename.rename_if_cannot_exchange)
+        {
+            exchange_tables = database_catalog.isTableExist(StorageID(elem.to_database_name, elem.to_table_name), getContext());
+            renamed_instead_of_exchange = !exchange_tables;
+        }
+        else
+        {
+            exchange_tables = false;
            database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), getContext());
+        }

        DatabasePtr database = database_catalog.getDatabase(elem.from_database_name);
        if (typeid_cast<DatabaseReplicated *>(database.get())
@ -100,7 +115,7 @@ BlockIO InterpreterRenameQuery::executeToTables(const ASTRenameQuery & rename, c
                elem.from_table_name,
                *database_catalog.getDatabase(elem.to_database_name),
                elem.to_table_name,
-                rename.exchange,
+                exchange_tables,
                rename.dictionary);
        }
    }
--- a/src/Interpreters/InterpreterRenameQuery.h
+++ b/src/Interpreters/InterpreterRenameQuery.h
@ -55,6 +55,8 @@ public:
    BlockIO execute() override;
    void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const override;

+    bool renamedInsteadOfExchange() const { return renamed_instead_of_exchange; }
+
 private:
    BlockIO executeToTables(const ASTRenameQuery & rename, const RenameDescriptions & descriptions, TableGuards & ddl_guards);
    static BlockIO executeToDatabase(const ASTRenameQuery & rename, const RenameDescriptions & descriptions);
@ -62,6 +64,7 @@ private:
    AccessRightsElements getRequiredAccess() const;

    ASTPtr query_ptr;
+    bool renamed_instead_of_exchange{false};
 };

 }
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -1928,11 +1928,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
                }
            }

+            /// If we don't have filtration, we can pushdown limit to reading stage for optimizations.
+            UInt64 limit = (query.hasFiltration() || query.groupBy()) ? 0 : getLimitForSorting(query, context);
            if (query_info.projection)
                query_info.projection->input_order_info
-                    = query_info.projection->order_optimizer->getInputOrder(query_info.projection->desc->metadata, context);
+                    = query_info.projection->order_optimizer->getInputOrder(query_info.projection->desc->metadata, context, limit);
            else
-                query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context);
+                query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context, limit);
        }

        StreamLocalLimits limits;
@ -2290,8 +2292,14 @@ void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, Input
 {
    const Settings & settings = context->getSettingsRef();

+    const auto & query = getSelectQuery();
    auto finish_sorting_step = std::make_unique<FinishSortingStep>(
-        query_plan.getCurrentDataStream(), input_sorting_info->order_key_prefix_descr, output_order_descr, settings.max_block_size, limit);
+        query_plan.getCurrentDataStream(),
+        input_sorting_info->order_key_prefix_descr,
+        output_order_descr,
+        settings.max_block_size,
+        limit,
+        query.hasFiltration());

    query_plan.addStep(std::move(finish_sorting_step));
 }
--- a/src/Interpreters/QueryNormalizer.cpp
+++ b/src/Interpreters/QueryNormalizer.cpp
@ -3,6 +3,7 @@
 #include <Interpreters/QueryNormalizer.h>
 #include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/RequiredSourceColumnsVisitor.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTSelectQuery.h>
@ -170,6 +171,24 @@ void QueryNormalizer::visitChildren(IAST * node, Data & data)
            /// Don't go into query argument.
            return;
        }
+
+        /// For lambda functions we need to avoid replacing lambda parameters with external aliases, for example,
+        /// Select 1 as x, arrayMap(x -> x + 2, [1, 2, 3])
+        /// shouldn't be replaced with Select 1 as x, arrayMap(x -> **(1 as x)** + 2, [1, 2, 3])
+        Aliases extracted_aliases;
+        if (func_node->name == "lambda")
+        {
+            Names lambda_aliases = RequiredSourceColumnsMatcher::extractNamesFromLambda(*func_node);
+            for (const auto & name : lambda_aliases)
+            {
+                auto it = data.aliases.find(name);
+                if (it != data.aliases.end())
+                {
+                    extracted_aliases.insert(data.aliases.extract(it));
+                }
+            }
+        }
+
        /// We skip the first argument. We also assume that the lambda function can not have parameters.
        size_t first_pos = 0;
        if (func_node->name == "lambda")
@ -192,6 +211,11 @@ void QueryNormalizer::visitChildren(IAST * node, Data & data)
        {
            visitChildren(func_node->window_definition.get(), data);
        }
+
+        for (auto & it : extracted_aliases)
+        {
+            data.aliases.insert(it);
+        }
    }
    else if (!node->as<ASTSelectQuery>())
    {
--- a/src/Interpreters/QueryNormalizer.h
+++ b/src/Interpreters/QueryNormalizer.h
@ -39,7 +39,7 @@ public:
        using SetOfASTs = std::set<const IAST *>;
        using MapOfASTs = std::map<ASTPtr, ASTPtr>;

-        const Aliases & aliases;
+        Aliases & aliases;
        const NameSet & source_columns_set;
        ExtractedSettings settings;

@ -53,7 +53,7 @@ public:
        /// It's Ok to have "c + 1 AS c" in queries, but not in table definition
        const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column"

-        Data(const Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_)
+        Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_)
            : aliases(aliases_)
            , source_columns_set(source_columns_set_)
            , settings(settings_)
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@ -74,6 +74,7 @@ namespace ErrorCodes
 {
    extern const int INTO_OUTFILE_NOT_ALLOWED;
    extern const int QUERY_WAS_CANCELLED;
+    extern const int LOGICAL_ERROR;
 }


@ -262,7 +263,11 @@ static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr
    elem.query = query_for_logging;
    elem.normalized_query_hash = normalizedQueryHash<false>(query_for_logging);

-    // We don't calculate query_kind, databases, tables and columns when the query isn't able to start
+    // Try log query_kind if ast is valid
+    if (ast)
+        elem.query_kind = ast->getQueryKindString();
+
+    // We don't calculate databases, tables and columns when the query isn't able to start

    elem.exception_code = getCurrentExceptionCode();
    elem.exception = getCurrentExceptionMessage(false);
@ -1007,22 +1012,31 @@ void executeQuery(
            const auto * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get());

            WriteBuffer * out_buf = &ostr;
-            std::optional<WriteBufferFromFile> out_file_buf;
+            std::unique_ptr<WriteBuffer> compressed_buffer;
            if (ast_query_with_output && ast_query_with_output->out_file)
            {
                if (!allow_into_outfile)
                    throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED);

                const auto & out_file = ast_query_with_output->out_file->as<ASTLiteral &>().value.safeGet<std::string>();
-                out_file_buf.emplace(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT);
-                out_buf = &*out_file_buf;
+                compressed_buffer = wrapWriteBufferWithCompressionMethod(
+                    std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
+                    chooseCompressionMethod(out_file, ""),
+                    /* compression level = */ 3
+                );
            }

            String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr)
                ? getIdentifierName(ast_query_with_output->format)
                : context->getDefaultFormat();

-            auto out = FormatFactory::instance().getOutputStreamParallelIfPossible(format_name, *out_buf, streams.in->getHeader(), context, {}, output_format_settings);
+            auto out = FormatFactory::instance().getOutputStreamParallelIfPossible(
+                format_name,
+                compressed_buffer ? *compressed_buffer : *out_buf,
+                streams.in->getHeader(),
+                context,
+                {},
+                output_format_settings);

            /// Save previous progress callback if any. TODO Do it more conveniently.
            auto previous_progress_callback = context->getProgressCallback();
@ -1046,15 +1060,18 @@ void executeQuery(
            const ASTQueryWithOutput * ast_query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get());

            WriteBuffer * out_buf = &ostr;
-            std::optional<WriteBufferFromFile> out_file_buf;
+            std::unique_ptr<WriteBuffer> compressed_buffer;
            if (ast_query_with_output && ast_query_with_output->out_file)
            {
                if (!allow_into_outfile)
                    throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED);

                const auto & out_file = typeid_cast<const ASTLiteral &>(*ast_query_with_output->out_file).value.safeGet<std::string>();
-                out_file_buf.emplace(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT);
-                out_buf = &*out_file_buf;
+                compressed_buffer = wrapWriteBufferWithCompressionMethod(
+                    std::make_unique<WriteBufferFromFile>(out_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_EXCL | O_CREAT),
+                    chooseCompressionMethod(out_file, ""),
+                    /* compression level = */ 3
+                );
            }

            String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr)
@ -1068,7 +1085,14 @@ void executeQuery(
                    return std::make_shared<MaterializingTransform>(header);
                });

-                auto out = FormatFactory::instance().getOutputFormatParallelIfPossible(format_name, *out_buf, pipeline.getHeader(), context, {}, output_format_settings);
+                auto out = FormatFactory::instance().getOutputFormatParallelIfPossible(
+                    format_name,
+                    compressed_buffer ? *compressed_buffer : *out_buf,
+                    pipeline.getHeader(),
+                    context,
+                    {},
+                    output_format_settings);
+
                out->setAutoFlush();

                /// Save previous progress callback if any. TODO Do it more conveniently.
@ -1111,4 +1135,32 @@ void executeQuery(
    streams.onFinish();
 }

+void executeTrivialBlockIO(BlockIO & streams, ContextPtr context)
+{
+    try
+    {
+        if (streams.out)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Query stream requires input, but no input buffer provided, it's a bug");
+        if (streams.in)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Query stream requires output, but no output buffer provided, it's a bug");
+
+        if (!streams.pipeline.initialized())
+            return;
+
+        if (!streams.pipeline.isCompleted())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Query pipeline requires output, but no output buffer provided, it's a bug");
+
+        streams.pipeline.setProgressCallback(context->getProgressCallback());
+        auto executor = streams.pipeline.execute();
+        executor->execute(streams.pipeline.getNumThreads());
+    }
+    catch (...)
+    {
+        streams.onException();
+        throw;
+    }
+
+    streams.onFinish();
+}
+
 }
--- a/src/Interpreters/executeQuery.h
+++ b/src/Interpreters/executeQuery.h
@ -57,4 +57,8 @@ BlockIO executeQuery(
    bool allow_processors /// If can use processors pipeline
 );

+/// Executes BlockIO returned from executeQuery(...)
+/// if built pipeline does not require any input and does not produce any output.
+void executeTrivialBlockIO(BlockIO & streams, ContextPtr context);
+
 }
--- a/src/Parsers/ASTAlterQuery.h
+++ b/src/Parsers/ASTAlterQuery.h
@ -225,6 +225,8 @@ public:
        return removeOnCluster<ASTAlterQuery>(clone(), new_database);
    }

+    const char * getQueryKindString() const override { return "Alter"; }
+
 protected:
    void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;

--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@ -102,6 +102,8 @@ public:

    bool isView() const { return is_ordinary_view || is_materialized_view || is_live_view; }

+    const char * getQueryKindString() const override { return "Create"; }
+
 protected:
    void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
 };
--- a/src/Parsers/ASTDropQuery.h
+++ b/src/Parsers/ASTDropQuery.h
@ -45,6 +45,8 @@ public:
        return removeOnCluster<ASTDropQuery>(clone(), new_database);
    }

+    const char * getQueryKindString() const override { return "Drop"; }
+
 protected:
    void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
 };
--- a/src/Parsers/ASTGrantQuery.h
+++ b/src/Parsers/ASTGrantQuery.h
@ -34,5 +34,6 @@ public:
    void replaceEmptyDatabase(const String & current_database);
    void replaceCurrentUserTag(const String & current_user_name) const;
    ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override { return removeOnCluster<ASTGrantQuery>(clone()); }
+    const char * getQueryKindString() const override { return is_revoke ? "Revoke" : "Grant"; }
 };
 }
--- a/src/Parsers/ASTInsertQuery.h
+++ b/src/Parsers/ASTInsertQuery.h
@ -47,6 +47,8 @@ public:
        return res;
    }

+    const char * getQueryKindString() const override { return "Insert"; }
+
 protected:
    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
 };
--- a/src/Parsers/ASTRenameQuery.h
+++ b/src/Parsers/ASTRenameQuery.h
@ -34,6 +34,9 @@ public:
    bool database{false};   /// For RENAME DATABASE
    bool dictionary{false};   /// For RENAME DICTIONARY

+    /// Special flag for CREATE OR REPLACE. Do not throw if the second table does not exist.
+    bool rename_if_cannot_exchange{false};
+
    /** Get the text that identifies this element. */
    String getID(char) const override { return "Rename"; }

@ -61,6 +64,8 @@ public:
        return query_ptr;
    }

+    const char * getQueryKindString() const override { return "Rename"; }
+
 protected:
    void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override
    {
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@ -69,6 +69,8 @@ public:
    const ASTPtr limitLength()    const { return getExpression(Expression::LIMIT_LENGTH); }
    const ASTPtr settings()       const { return getExpression(Expression::SETTINGS); }

+    bool hasFiltration() const { return where() || prewhere() || having(); }
+
    /// Set/Reset/Remove expression.
    void setExpression(Expression expr, ASTPtr && ast);

@ -95,6 +97,8 @@ public:

    void setFinal();

+    const char * getQueryKindString() const override { return "Select"; }
+
 protected:
    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;

--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@ -16,6 +16,8 @@ public:
    ASTPtr clone() const override;
    void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;

+    const char * getQueryKindString() const override { return "Select"; }
+
    enum class Mode
    {
        Unspecified,
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@ -86,6 +86,8 @@ public:
        return removeOnCluster<ASTSystemQuery>(clone(), new_database);
    }

+    const char * getQueryKindString() const override { return "System"; }
+
 protected:

    void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@ -231,6 +231,9 @@ public:

    void cloneChildren();

+    // Return query_kind string representation of this AST query.
+    virtual const char * getQueryKindString() const { return ""; }
+
 public:
    /// For syntax highlighting.
    static const char * hilite_keyword;
--- a/src/Processors/Merges/AggregatingSortedTransform.h
+++ b/src/Processors/Merges/AggregatingSortedTransform.h
@ -16,7 +16,7 @@ public:
        const Block & header, size_t num_inputs,
        SortDescription description_, size_t max_block_size)
        : IMergingTransform(
-            num_inputs, header, header, true,
+            num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
            header,
            num_inputs,
            std::move(description_),
--- a/src/Processors/Merges/CollapsingSortedTransform.h
+++ b/src/Processors/Merges/CollapsingSortedTransform.h
@ -20,7 +20,7 @@ public:
        WriteBuffer * out_row_sources_buf_ = nullptr,
        bool use_average_block_sizes = false)
        : IMergingTransform(
-            num_inputs, header, header, true,
+            num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
            header,
            num_inputs,
            std::move(description_),
--- a/src/Processors/Merges/FinishAggregatingInOrderTransform.h
+++ b/src/Processors/Merges/FinishAggregatingInOrderTransform.h
@ -19,7 +19,7 @@ public:
        SortDescription description,
        size_t max_block_size)
        : IMergingTransform(
-            num_inputs, header, header, true,
+            num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
            header,
            num_inputs,
            params,
--- a/src/Processors/Merges/GraphiteRollupSortedTransform.h
+++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h
@ -15,7 +15,7 @@ public:
        SortDescription description_, size_t max_block_size,
        Graphite::Params params_, time_t time_of_merge_)
        : IMergingTransform(
-            num_inputs, header, header, true,
+            num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
            header,
            num_inputs,
            std::move(description_),
--- a/src/Processors/Merges/IMergingTransform.cpp
+++ b/src/Processors/Merges/IMergingTransform.cpp
@ -14,9 +14,11 @@ IMergingTransformBase::IMergingTransformBase(
    size_t num_inputs,
    const Block & input_header,
    const Block & output_header,
-    bool have_all_inputs_)
+    bool have_all_inputs_,
+    bool has_limit_below_one_block_)
    : IProcessor(InputPorts(num_inputs, input_header), {output_header})
    , have_all_inputs(have_all_inputs_)
+    , has_limit_below_one_block(has_limit_below_one_block_)
 {
 }

@ -64,10 +66,7 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs()
            continue;

        if (input_states[i].is_initialized)
-        {
-            // input.setNotNeeded();
            continue;
-        }

        input.setNeeded();

@ -77,12 +76,17 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs()
            continue;
        }

-        auto chunk = input.pull();
+        /// setNotNeeded after reading first chunk, because in optimismtic case
+        /// (e.g. with optimized 'ORDER BY primary_key LIMIT n' and small 'n')
+        /// we won't have to read any chunks anymore;
+        auto chunk = input.pull(has_limit_below_one_block);
        if (!chunk.hasRows())
        {
-
            if (!input.isFinished())
+            {
+                input.setNeeded();
                all_inputs_has_data = false;
+            }

            continue;
        }
--- a/src/Processors/Merges/IMergingTransform.h
+++ b/src/Processors/Merges/IMergingTransform.h
@ -16,7 +16,8 @@ public:
        size_t num_inputs,
        const Block & input_header,
        const Block & output_header,
-        bool have_all_inputs_);
+        bool have_all_inputs_,
+        bool has_limit_below_one_block_);

    OutputPort & getOutputPort() { return outputs.front(); }

@ -66,6 +67,7 @@ private:
    std::vector<InputState> input_states;
    std::atomic<bool> have_all_inputs;
    bool is_initialized = false;
+    bool has_limit_below_one_block = false;

    IProcessor::Status prepareInitializeInputs();
 };
@ -81,8 +83,9 @@ public:
        const Block & input_header,
        const Block & output_header,
        bool have_all_inputs_,
+        bool has_limit_below_one_block_,
        Args && ... args)
-        : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_)
+        : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, has_limit_below_one_block_)
        , algorithm(std::forward<Args>(args) ...)
    {
    }
--- a/src/Processors/Merges/MergingSortedTransform.cpp
+++ b/src/Processors/Merges/MergingSortedTransform.cpp
@ -13,12 +13,13 @@ MergingSortedTransform::MergingSortedTransform(
    SortDescription  description_,
    size_t max_block_size,
    UInt64 limit_,
+    bool has_limit_below_one_block_,
    WriteBuffer * out_row_sources_buf_,
    bool quiet_,
    bool use_average_block_sizes,
    bool have_all_inputs_)
    : IMergingTransform(
-        num_inputs, header, header, have_all_inputs_,
+        num_inputs, header, header, have_all_inputs_, has_limit_below_one_block_,
        header,
        num_inputs,
        std::move(description_),
--- a/src/Processors/Merges/MergingSortedTransform.h
+++ b/src/Processors/Merges/MergingSortedTransform.h
@ -17,6 +17,7 @@ public:
        SortDescription description,
        size_t max_block_size,
        UInt64 limit_ = 0,
+        bool has_limit_below_one_block_ = false,
        WriteBuffer * out_row_sources_buf_ = nullptr,
        bool quiet_ = false,
        bool use_average_block_sizes = false,
--- a/src/Processors/Merges/ReplacingSortedTransform.h
+++ b/src/Processors/Merges/ReplacingSortedTransform.h
@ -18,7 +18,7 @@ public:
        WriteBuffer * out_row_sources_buf_ = nullptr,
        bool use_average_block_sizes = false)
        : IMergingTransform(
-            num_inputs, header, header, true,
+            num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
            header,
            num_inputs,
            std::move(description_),
--- a/src/Processors/Merges/SummingSortedTransform.h
+++ b/src/Processors/Merges/SummingSortedTransform.h
@ -19,7 +19,7 @@ public:
        const Names & partition_key_columns,
        size_t max_block_size)
        : IMergingTransform(
-            num_inputs, header, header, true,
+            num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
            header,
            num_inputs,
            std::move(description_),
--- a/src/Processors/Merges/VersionedCollapsingTransform.h
+++ b/src/Processors/Merges/VersionedCollapsingTransform.h
@ -19,7 +19,7 @@ public:
        WriteBuffer * out_row_sources_buf_ = nullptr,
        bool use_average_block_sizes = false)
        : IMergingTransform(
-            num_inputs, header, header, true,
+            num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false,
            header,
            num_inputs,
            std::move(description_),
--- a/src/Processors/QueryPlan/FinishSortingStep.cpp
+++ b/src/Processors/QueryPlan/FinishSortingStep.cpp
@ -31,12 +31,14 @@ FinishSortingStep::FinishSortingStep(
    SortDescription prefix_description_,
    SortDescription result_description_,
    size_t max_block_size_,
-    UInt64 limit_)
+    UInt64 limit_,
+    bool has_filtration_)
    : ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_))
    , prefix_description(std::move(prefix_description_))
    , result_description(std::move(result_description_))
    , max_block_size(max_block_size_)
    , limit(limit_)
+    , has_filtration(has_filtration_)
 {
    /// TODO: check input_stream is sorted by prefix_description.
    output_stream->sort_description = result_description;
@ -58,11 +60,14 @@ void FinishSortingStep::transformPipeline(QueryPipeline & pipeline, const BuildQ
    if (pipeline.getNumStreams() > 1)
    {
        UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit);
+        bool has_limit_below_one_block = !has_filtration && limit_for_merging && limit_for_merging < max_block_size;
        auto transform = std::make_shared<MergingSortedTransform>(
                pipeline.getHeader(),
                pipeline.getNumStreams(),
                prefix_description,
-                max_block_size, limit_for_merging);
+                max_block_size,
+                limit_for_merging,
+                has_limit_below_one_block);

        pipeline.addTransform(std::move(transform));
    }
--- a/src/Processors/QueryPlan/FinishSortingStep.h
+++ b/src/Processors/QueryPlan/FinishSortingStep.h
@ -13,8 +13,9 @@ public:
        const DataStream & input_stream_,
        SortDescription prefix_description_,
        SortDescription result_description_,
-        size_t max_block_size,
-        UInt64 limit);
+        size_t max_block_size_,
+        UInt64 limit_,
+        bool has_filtration_);

    String getName() const override { return "FinishSorting"; }

@ -31,6 +32,7 @@ private:
    SortDescription result_description;
    size_t max_block_size;
    UInt64 limit;
+    bool has_filtration;
 };

 }
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@ -13,7 +13,7 @@
 #include <Processors/Merges/ReplacingSortedTransform.h>
 #include <Processors/Merges/SummingSortedTransform.h>
 #include <Processors/Merges/VersionedCollapsingTransform.h>
-#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
+#include <Storages/MergeTree/MergeTreeInOrderSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeThreadSelectProcessor.h>
 #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
@ -179,26 +179,32 @@ template<typename TSource>
 ProcessorPtr ReadFromMergeTree::createSource(
    const RangesInDataPart & part,
    const Names & required_columns,
-    bool use_uncompressed_cache)
+    bool use_uncompressed_cache,
+    bool has_limit_below_one_block)
 {
    return std::make_shared<TSource>(
            data, metadata_snapshot, part.data_part, max_block_size, preferred_block_size_bytes,
-            preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache,
-            prewhere_info, actions_settings, true, reader_settings, virt_column_names, part.part_index_in_query);
+            preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, prewhere_info,
+            actions_settings, true, reader_settings, virt_column_names, part.part_index_in_query, has_limit_below_one_block);
 }

 Pipe ReadFromMergeTree::readInOrder(
    RangesInDataParts parts_with_range,
    Names required_columns,
    ReadType read_type,
-    bool use_uncompressed_cache)
+    bool use_uncompressed_cache,
+    UInt64 limit)
 {
    Pipes pipes;
+    /// For reading in order it makes sense to read only
+    /// one range per task to reduce number of read rows.
+    bool has_limit_below_one_block = read_type != ReadType::Default && limit && limit < max_block_size;
+
    for (const auto & part : parts_with_range)
    {
        auto source = read_type == ReadType::InReverseOrder
-                    ? createSource<MergeTreeReverseSelectProcessor>(part, required_columns, use_uncompressed_cache)
-                    : createSource<MergeTreeSelectProcessor>(part, required_columns, use_uncompressed_cache);
+                    ? createSource<MergeTreeReverseSelectProcessor>(part, required_columns, use_uncompressed_cache, has_limit_below_one_block)
+                    : createSource<MergeTreeInOrderSelectProcessor>(part, required_columns, use_uncompressed_cache, has_limit_below_one_block);

        pipes.emplace_back(std::move(source));
    }
@ -224,7 +230,7 @@ Pipe ReadFromMergeTree::read(
        return readFromPool(parts_with_range, required_columns, max_streams,
                            min_marks_for_concurrent_read, use_uncompressed_cache);

-    auto pipe = readInOrder(parts_with_range, required_columns, read_type, use_uncompressed_cache);
+    auto pipe = readInOrder(parts_with_range, required_columns, read_type, use_uncompressed_cache, 0);

    /// Use ConcatProcessor to concat sources together.
    /// It is needed to read in parts order (and so in PK order) if single thread is used.
@ -403,7 +409,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
        {
            RangesInDataPart part = parts_with_ranges.back();
            parts_with_ranges.pop_back();
-
            size_t & marks_in_part = info.sum_marks_in_parts.back();

            /// We will not take too few rows from a part.
@ -418,8 +423,13 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(

            MarkRanges ranges_to_get_from_part;

+            /// We take full part if it contains enough marks or
+            /// if we know limit and part contains less than 'limit' rows.
+            bool take_full_part = marks_in_part <= need_marks
+                || (input_order_info->limit && input_order_info->limit < part.getRowsCount());
+
            /// We take the whole part if it is small enough.
-            if (marks_in_part <= need_marks)
+            if (take_full_part)
            {
                ranges_to_get_from_part = part.ranges;

@ -449,6 +459,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
                }
                parts_with_ranges.emplace_back(part);
            }
+
            ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, input_order_info->direction);
            new_parts.emplace_back(part.data_part, part.part_index_in_query, std::move(ranges_to_get_from_part));
        }
@ -457,8 +468,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
                       ? ReadFromMergeTree::ReadType::InOrder
                       : ReadFromMergeTree::ReadType::InReverseOrder;

-        pipes.emplace_back(read(std::move(new_parts), column_names, read_type,
-                           requested_num_streams, info.min_marks_for_concurrent_read, info.use_uncompressed_cache));
+        pipes.emplace_back(readInOrder(std::move(new_parts), column_names, read_type,
+                                        info.use_uncompressed_cache, input_order_info->limit));
    }

    if (need_preliminary_merge)
@ -486,7 +497,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
                        pipe.getHeader(),
                        pipe.numOutputPorts(),
                        sort_description,
-                        max_block_size);
+                        max_block_size,
+                        0, true);

                pipe.addTransform(std::move(transform));
            }
--- a/src/Processors/QueryPlan/ReadFromMergeTree.h
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.h
@ -116,10 +116,10 @@ private:

    Pipe read(RangesInDataParts parts_with_range, Names required_columns, ReadType read_type, size_t max_streams, size_t min_marks_for_concurrent_read, bool use_uncompressed_cache);
    Pipe readFromPool(RangesInDataParts parts_with_ranges, Names required_columns, size_t max_streams, size_t min_marks_for_concurrent_read, bool use_uncompressed_cache);
-    Pipe readInOrder(RangesInDataParts parts_with_range, Names required_columns, ReadType read_type, bool use_uncompressed_cache);
+    Pipe readInOrder(RangesInDataParts parts_with_range, Names required_columns, ReadType read_type, bool use_uncompressed_cache, UInt64 limit);

    template<typename TSource>
-    ProcessorPtr createSource(const RangesInDataPart & part, const Names & required_columns, bool use_uncompressed_cache);
+    ProcessorPtr createSource(const RangesInDataPart & part, const Names & required_columns, bool use_uncompressed_cache, bool has_limit_below_one_block);

    Pipe spreadMarkRangesAmongStreams(
        RangesInDataParts && parts_with_ranges,
--- a/src/Processors/Sources/SourceWithProgress.cpp
+++ b/src/Processors/Sources/SourceWithProgress.cpp
@ -49,7 +49,7 @@ void SourceWithProgress::setProcessListElement(QueryStatus * elem)

 void SourceWithProgress::work()
 {
-    if (!limits.speed_limits.checkTimeLimit(total_stopwatch.elapsed(), limits.timeout_overflow_mode))
+    if (!limits.speed_limits.checkTimeLimit(total_stopwatch, limits.timeout_overflow_mode))
    {
        cancel();
    }
--- a/src/Processors/Transforms/LimitsCheckingTransform.cpp
+++ b/src/Processors/Transforms/LimitsCheckingTransform.cpp
@ -32,7 +32,7 @@ void LimitsCheckingTransform::transform(Chunk & chunk)
        info.started = true;
    }

-    if (!limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode))
+    if (!limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode))
    {
        stopReading();
        return;
--- a/src/Processors/Transforms/MergeSortingTransform.cpp
+++ b/src/Processors/Transforms/MergeSortingTransform.cpp
@ -200,6 +200,7 @@ void MergeSortingTransform::consume(Chunk chunk)
                    description,
                    max_merged_block_size,
                    limit,
+                    false,
                    nullptr,
                    quiet,
                    use_average_block_sizes,
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@ -712,7 +712,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
    MergeTreeData::DataPart::Checksums & checksums,
    ThrottlerPtr throttler)
 {
-    static const String TMP_PREFIX = "tmp_fetch_";
+    static const String TMP_PREFIX = "tmp-fetch_";
    String tmp_prefix = tmp_prefix_.empty() ? TMP_PREFIX : tmp_prefix_;

    /// We will remove directory if it's already exists. Make precautions.
@ -784,7 +784,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta(
    LOG_DEBUG(log, "Downloading Part {} unique id {} metadata onto disk {}.",
        part_name, part_id, disk->getName());

-    static const String TMP_PREFIX = "tmp_fetch_";
+    static const String TMP_PREFIX = "tmp-fetch_";
    String tmp_prefix = tmp_prefix_.empty() ? TMP_PREFIX : tmp_prefix_;

    String part_relative_path = String(to_detached ? "detached/" : "") + tmp_prefix + part_name;
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -1322,6 +1322,9 @@ String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix)
 {
    /// Do not allow underscores in the prefix because they are used as separators.
    assert(prefix.find_first_of('_') == String::npos);
+    assert(prefix.empty() || std::find(DetachedPartInfo::DETACH_REASONS.begin(),
+                                       DetachedPartInfo::DETACH_REASONS.end(),
+                                       prefix) != DetachedPartInfo::DETACH_REASONS.end());
    return "detached/" + getRelativePathForPrefix(prefix);
 }

--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@ -465,6 +465,19 @@ Block MergeTreeBaseSelectProcessor::transformHeader(
    return block;
 }

+std::unique_ptr<MergeTreeBlockSizePredictor> MergeTreeBaseSelectProcessor::getSizePredictor(
+    const MergeTreeData::DataPartPtr & data_part,
+    const MergeTreeReadTaskColumns & task_columns,
+    const Block & sample_block)
+{
+    const auto & required_column_names = task_columns.columns.getNames();
+    const auto & required_pre_column_names = task_columns.pre_columns.getNames();
+    NameSet complete_column_names(required_column_names.begin(), required_column_names.end());
+    complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end());
+
+    return std::make_unique<MergeTreeBlockSizePredictor>(
+        data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block);
+}

 MergeTreeBaseSelectProcessor::~MergeTreeBaseSelectProcessor() = default;

--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
@ -37,6 +37,11 @@ public:
    static Block transformHeader(
        Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns);

+    static std::unique_ptr<MergeTreeBlockSizePredictor> getSizePredictor(
+        const MergeTreeData::DataPartPtr & data_part,
+        const MergeTreeReadTaskColumns & task_columns,
+        const Block & sample_block);
+
 protected:
    Chunk generate() final;

--- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
+++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
@ -70,7 +70,7 @@ struct MergeTreeReadTaskColumns
    /// column names to read during PREWHERE
    NamesAndTypesList pre_columns;
    /// resulting block may require reordering in accordance with `ordered_names`
-    bool should_reorder;
+    bool should_reorder = false;
 };

 MergeTreeReadTaskColumns getReadTaskColumns(
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@ -894,7 +894,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
    {
        case MergeTreeData::MergingParams::Ordinary:
            merged_transform = std::make_unique<MergingSortedTransform>(
-                header, pipes.size(), sort_description, merge_block_size, 0, rows_sources_write_buf.get(), true, blocks_are_granules_size);
+                header, pipes.size(), sort_description, merge_block_size, 0, false, rows_sources_write_buf.get(), true, blocks_are_granules_size);
            break;

        case MergeTreeData::MergingParams::Collapsing:
--- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp
@ -0,0 +1,54 @@
+#include <Storages/MergeTree/MergeTreeInOrderSelectProcessor.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int MEMORY_LIMIT_EXCEEDED;
+}
+
+bool MergeTreeInOrderSelectProcessor::getNewTask()
+try
+{
+    if (all_mark_ranges.empty())
+    {
+        finish();
+        return false;
+    }
+
+    if (!reader)
+        initializeReaders();
+
+    MarkRanges mark_ranges_for_task;
+    /// If we need to read few rows, set one range per task to reduce number of read data.
+    if (has_limit_below_one_block)
+    {
+        mark_ranges_for_task = { std::move(all_mark_ranges.front()) };
+        all_mark_ranges.pop_front();
+    }
+    else
+    {
+        mark_ranges_for_task = std::move(all_mark_ranges);
+        all_mark_ranges.clear();
+    }
+
+    auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr
+        : getSizePredictor(data_part, task_columns, sample_block);
+
+    task = std::make_unique<MergeTreeReadTask>(
+        data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set, task_columns.columns,
+        task_columns.pre_columns, prewhere_info && prewhere_info->remove_prewhere_column,
+        task_columns.should_reorder, std::move(size_predictor));
+
+    return true;
+}
+catch (...)
+{
+    /// Suspicion of the broken part. A part is added to the queue for verification.
+    if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
+        storage.reportBrokenPart(data_part->name);
+    throw;
+}
+
+}
--- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h
+++ b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h
@ -0,0 +1,31 @@
+#pragma once
+#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
+
+namespace DB
+{
+
+
+/// Used to read data from single part with select query in order of primary key.
+/// Cares about PREWHERE, virtual columns, indexes etc.
+/// To read data from multiple parts, Storage (MergeTree) creates multiple such objects.
+class MergeTreeInOrderSelectProcessor final : public MergeTreeSelectProcessor
+{
+public:
+    template <typename... Args>
+    MergeTreeInOrderSelectProcessor(Args &&... args)
+        : MergeTreeSelectProcessor{std::forward<Args>(args)...}
+    {
+        LOG_DEBUG(log, "Reading {} ranges in order from part {}, approx. {} rows starting from {}",
+            all_mark_ranges.size(), data_part->name, total_rows,
+            data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin));
+    }
+
+    String getName() const override { return "MergeTreeInOrder"; }
+
+private:
+    bool getNewTask() override;
+
+    Poco::Logger * log = &Poco::Logger::get("MergeTreeInOrderSelectProcessor");
+};
+
+}
--- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
@ -39,8 +39,25 @@ void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const
    {
        const DataTypePtr & type = index_sample_block.getByPosition(i).type;
        auto serialization = type->getDefaultSerialization();
-        serialization->serializeBinary(hyperrectangle[i].left, ostr);
-        serialization->serializeBinary(hyperrectangle[i].right, ostr);
+
+        if (!type->isNullable())
+        {
+            serialization->serializeBinary(hyperrectangle[i].left, ostr);
+            serialization->serializeBinary(hyperrectangle[i].right, ostr);
+        }
+        else
+        {
+            /// NOTE: that this serialization differs from
+            /// IMergeTreeDataPart::MinMaxIndex::store() due to preserve
+            /// backward compatibility.
+            bool is_null = hyperrectangle[i].left.isNull() || hyperrectangle[i].right.isNull(); // one is enough
+            writeBinary(is_null, ostr);
+            if (!is_null)
+            {
+                serialization->serializeBinary(hyperrectangle[i].left, ostr);
+                serialization->serializeBinary(hyperrectangle[i].right, ostr);
+            }
+        }
    }
 }

@ -54,14 +71,30 @@ void MergeTreeIndexGranuleMinMax::deserializeBinary(ReadBuffer & istr)
    {
        const DataTypePtr & type = index_sample_block.getByPosition(i).type;
        auto serialization = type->getDefaultSerialization();
-        serialization->deserializeBinary(min_val, istr);
-        serialization->deserializeBinary(max_val, istr);

-        // NULL_LAST
-        if (min_val.isNull())
-            min_val = PositiveInfinity();
-        if (max_val.isNull())
-            max_val = PositiveInfinity();
+        if (!type->isNullable())
+        {
+            serialization->deserializeBinary(min_val, istr);
+            serialization->deserializeBinary(max_val, istr);
+        }
+        else
+        {
+            /// NOTE: that this serialization differs from
+            /// IMergeTreeDataPart::MinMaxIndex::load() due to preserve
+            /// backward compatibility.
+            bool is_null;
+            readBinary(is_null, istr);
+            if (!is_null)
+            {
+                serialization->deserializeBinary(min_val, istr);
+                serialization->deserializeBinary(max_val, istr);
+            }
+            else
+            {
+                min_val = Null();
+                max_val = Null();
+            }
+        }
        hyperrectangle.emplace_back(min_val, true, max_val, true);
    }
 }
--- a/src/Storages/MergeTree/MergeTreePartInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp
@ -247,13 +247,39 @@ String MergeTreePartInfo::getPartNameV0(DayNum left_date, DayNum right_date) con
    return wb.str();
 }

+
+const std::vector<String> DetachedPartInfo::DETACH_REASONS =
+    {
+        "broken",
+        "unexpected",
+        "noquorum",
+        "ignored",
+        "broken-on-start",
+        "clone",
+        "attaching",
+        "deleting",
+        "tmp-fetch",
+    };
+
 bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, DetachedPartInfo & part_info,
                                                MergeTreeDataFormatVersion format_version)
 {
    part_info.dir_name = dir_name;

-    /// First, try to parse as <part_name>.
-    // TODO what if tryParsePartName will parse prefix as partition_id? It can happen if dir_name doesn't contain mutation number at the end
+    /// First, try to find known prefix and parse dir_name as <prefix>_<partname>.
+    /// Arbitrary strings are not allowed for partition_id, so known_prefix cannot be confused with partition_id.
+    for (const auto & known_prefix : DETACH_REASONS)
+    {
+        if (dir_name.starts_with(known_prefix) && known_prefix.size() < dir_name.size() && dir_name[known_prefix.size()] == '_')
+        {
+            part_info.prefix = known_prefix;
+            String part_name = dir_name.substr(known_prefix.size() + 1);
+            bool parsed = MergeTreePartInfo::tryParsePartName(part_name, &part_info, format_version);
+            return part_info.valid_name = parsed;
+        }
+    }
+
+    /// Next, try to parse dir_name as <part_name>.
    if (MergeTreePartInfo::tryParsePartName(dir_name, &part_info, format_version))
        return part_info.valid_name = true;

@ -263,7 +289,6 @@ bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, Detache
    if (first_separator == String::npos)
        return part_info.valid_name = false;

-    // TODO what if <prefix> contains '_'?
    const auto part_name = dir_name.substr(first_separator + 1,
                                           dir_name.size() - first_separator - 1);
    if (!MergeTreePartInfo::tryParsePartName(part_name, &part_info, format_version))
--- a/src/Storages/MergeTree/MergeTreePartInfo.h
+++ b/src/Storages/MergeTree/MergeTreePartInfo.h
@ -2,6 +2,7 @@

 #include <limits>
 #include <tuple>
+#include <vector>
 #include <common/types.h>
 #include <common/DayNum.h>
 #include <Storages/MergeTree/MergeTreeDataFormatVersion.h>
@ -115,6 +116,10 @@ struct DetachedPartInfo : public MergeTreePartInfo
    /// If false, MergeTreePartInfo is in invalid state (directory name was not successfully parsed).
    bool valid_name;

+    static const std::vector<String> DETACH_REASONS;
+
+    /// NOTE: It may parse part info incorrectly.
+    /// For example, if prefix contain '_' or if DETACH_REASONS doesn't contain prefix.
    static bool tryParseDetachedPartName(const String & dir_name, DetachedPartInfo & part_info, MergeTreeDataFormatVersion format_version);
 };

--- a/src/Storages/MergeTree/MergeTreeReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp
@ -228,29 +228,20 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(

        per_part_sum_marks.push_back(sum_marks);

-        auto [required_columns, required_pre_columns, should_reorder] =
-            getReadTaskColumns(data, metadata_snapshot, part.data_part, column_names, prewhere_info, check_columns);
+        auto task_columns = getReadTaskColumns(data, metadata_snapshot, part.data_part, column_names, prewhere_info, check_columns);

-        if (predict_block_size_bytes)
-        {
-            const auto & required_column_names = required_columns.getNames();
-            const auto & required_pre_column_names = required_pre_columns.getNames();
-            NameSet complete_column_names(required_column_names.begin(), required_column_names.end());
-            complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end());
+        auto size_predictor = !predict_block_size_bytes ? nullptr
+            : MergeTreeBaseSelectProcessor::getSizePredictor(part.data_part, task_columns, sample_block);

-            per_part_size_predictor.emplace_back(std::make_unique<MergeTreeBlockSizePredictor>(
-                part.data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block));
-        }
-        else
-            per_part_size_predictor.emplace_back(nullptr);
+        per_part_size_predictor.emplace_back(std::move(size_predictor));

        /// will be used to distinguish between PREWHERE and WHERE columns when applying filter
-        const auto & required_column_names = required_columns.getNames();
+        const auto & required_column_names = task_columns.columns.getNames();
        per_part_column_name_set.emplace_back(required_column_names.begin(), required_column_names.end());

-        per_part_pre_columns.push_back(std::move(required_pre_columns));
-        per_part_columns.push_back(std::move(required_columns));
-        per_part_should_reorder.push_back(should_reorder);
+        per_part_pre_columns.push_back(std::move(task_columns.pre_columns));
+        per_part_columns.push_back(std::move(task_columns.columns));
+        per_part_should_reorder.push_back(task_columns.should_reorder);

        parts_with_idx.push_back({ part.data_part, part.part_index_in_query });
    }
--- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp
@ -1,8 +1,4 @@
 #include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
-#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
-#include <Storages/MergeTree/IMergeTreeReader.h>
-#include <Interpreters/Context.h>
-

 namespace DB
 {
@ -12,74 +8,10 @@ namespace ErrorCodes
    extern const int MEMORY_LIMIT_EXCEEDED;
 }

-MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor(
-    const MergeTreeData & storage_,
-    const StorageMetadataPtr & metadata_snapshot_,
-    const MergeTreeData::DataPartPtr & owned_data_part_,
-    UInt64 max_block_size_rows_,
-    size_t preferred_block_size_bytes_,
-    size_t preferred_max_column_in_block_size_bytes_,
-    Names required_columns_,
-    MarkRanges mark_ranges_,
-    bool use_uncompressed_cache_,
-    const PrewhereInfoPtr & prewhere_info_,
-    ExpressionActionsSettings actions_settings,
-    bool check_columns,
-    const MergeTreeReaderSettings & reader_settings_,
-    const Names & virt_column_names_,
-    size_t part_index_in_query_,
-    bool quiet)
-    :
-    MergeTreeBaseSelectProcessor{
-        metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals(), storage_.getStorageID()),
-        storage_, metadata_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_,
-        preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_,
-        reader_settings_, use_uncompressed_cache_, virt_column_names_},
-    required_columns{std::move(required_columns_)},
-    data_part{owned_data_part_},
-    all_mark_ranges(std::move(mark_ranges_)),
-    part_index_in_query(part_index_in_query_),
-    path(data_part->getFullRelativePath())
-{
-    /// Let's estimate total number of rows for progress bar.
-    for (const auto & range : all_mark_ranges)
-        total_marks_count += range.end - range.begin;
-
-    size_t total_rows = data_part->index_granularity.getRowsCountInRanges(all_mark_ranges);
-
-    if (!quiet)
-        LOG_DEBUG(log, "Reading {} ranges in reverse order from part {}, approx. {} rows starting from {}",
-            all_mark_ranges.size(), data_part->name, total_rows,
-            data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin));
-
-    addTotalRowsApprox(total_rows);
-
-    ordered_names = header_without_virtual_columns.getNames();
-
-    task_columns = getReadTaskColumns(storage, metadata_snapshot, data_part, required_columns, prewhere_info, check_columns);
-
-    /// will be used to distinguish between PREWHERE and WHERE columns when applying filter
-    const auto & column_names = task_columns.columns.getNames();
-    column_name_set = NameSet{column_names.begin(), column_names.end()};
-
-    if (use_uncompressed_cache)
-        owned_uncompressed_cache = storage.getContext()->getUncompressedCache();
-
-    owned_mark_cache = storage.getContext()->getMarkCache();
-
-    reader = data_part->getReader(task_columns.columns, metadata_snapshot,
-        all_mark_ranges, owned_uncompressed_cache.get(),
-        owned_mark_cache.get(), reader_settings);
-
-    if (prewhere_info)
-        pre_reader = data_part->getReader(task_columns.pre_columns, metadata_snapshot, all_mark_ranges,
-            owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings);
-}
-
 bool MergeTreeReverseSelectProcessor::getNewTask()
 try
 {
-    if ((chunks.empty() && all_mark_ranges.empty()) || total_marks_count == 0)
+    if (chunks.empty() && all_mark_ranges.empty())
    {
        finish();
        return false;
@ -90,21 +22,15 @@ try
    if (all_mark_ranges.empty())
        return true;

+    if (!reader)
+        initializeReaders();
+
    /// Read ranges from right to left.
    MarkRanges mark_ranges_for_task = { all_mark_ranges.back() };
    all_mark_ranges.pop_back();

-    std::unique_ptr<MergeTreeBlockSizePredictor> size_predictor;
-    if (preferred_block_size_bytes)
-    {
-        const auto & required_column_names = task_columns.columns.getNames();
-        const auto & required_pre_column_names = task_columns.pre_columns.getNames();
-        NameSet complete_column_names(required_column_names.begin(), required_column_names.end());
-        complete_column_names.insert(required_pre_column_names.begin(), required_pre_column_names.end());
-
-        size_predictor = std::make_unique<MergeTreeBlockSizePredictor>(
-            data_part, Names(complete_column_names.begin(), complete_column_names.end()), metadata_snapshot->getSampleBlock());
-    }
+    auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr
+        : getSizePredictor(data_part, task_columns, sample_block);

    task = std::make_unique<MergeTreeReadTask>(
        data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set,
@ -150,17 +76,4 @@ Chunk MergeTreeReverseSelectProcessor::readFromPart()
    return res;
 }

-void MergeTreeReverseSelectProcessor::finish()
-{
-    /** Close the files (before destroying the object).
-    * When many sources are created, but simultaneously reading only a few of them,
-    * buffers don't waste memory.
-    */
-    reader.reset();
-    pre_reader.reset();
-    data_part.reset();
-}
-
-MergeTreeReverseSelectProcessor::~MergeTreeReverseSelectProcessor() = default;
-
 }
--- a/Show More
+++ b/Show More