Merge branch 'master' into dont-replicate-more-keepermap

2024-11-23 16:12:01 +00:00 · 2023-04-06 09:35:37 +02:00 · 2023-04-06 09:35:37 +02:00 · a52fc83e14
commit a52fc83e14
parent 7975df538e ce2692ba79
163 changed files with 5149 additions and 2809 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -110,6 +110,7 @@ Checks: '*,
    -misc-const-correctness,
    -misc-no-recursion,
    -misc-non-private-member-variables-in-classes,
+    -misc-confusable-identifiers, # useful but slooow

    -modernize-avoid-c-arrays,
    -modernize-concat-nested-namespaces,
@ -148,19 +149,6 @@ Checks: '*,
    -readability-use-anyofallof,

    -zirkon-*,
-
-    -misc-*, # temporarily disabled due to being too slow
-    # also disable checks in other categories which are aliases of checks in misc-*:
-    # https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/checks/list.html
-    -cert-dcl54-cpp,                                            # alias of misc-new-delete-overloads
-    -hicpp-new-delete-operators,                                # alias of misc-new-delete-overloads
-    -cert-fio38-c,                                              # alias of misc-non-copyable-objects
-    -cert-dcl03-c,                                              # alias of misc-static-assert
-    -hicpp-static-assert,                                       # alias of misc-static-assert
-    -cert-err09-cpp,                                            # alias of misc-throw-by-value-catch-by-reference
-    -cert-err61-cpp,                                            # alias of misc-throw-by-value-catch-by-reference
-    -cppcoreguidelines-c-copy-assignment-signature,             # alias of misc-unconventional-assign-operator
-    -cppcoreguidelines-non-private-member-variables-in-classes, # alias of misc-non-private-member-variables-in-classes
 '

 WarningsAsErrors: '*'
--- a/base/base/find_symbols.h
+++ b/base/base/find_symbols.h
@ -36,7 +36,7 @@

 namespace detail
 {
-template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); }
+template <char ...chars> constexpr bool is_in(char x) { return ((x == chars) || ...); } // NOLINT(misc-redundant-expression)

 #if defined(__SSE2__)
 template <char s0>
--- a/docs/en/engines/table-engines/mergetree-family/replication.md
+++ b/docs/en/engines/table-engines/mergetree-family/replication.md
@ -8,11 +8,18 @@ sidebar_label: Data Replication

 :::note
 In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments.  For example, in the text below you would replace:
+
+```sql
+ENGINE = ReplicatedReplacingMergeTree(
+    '/clickhouse/tables/{shard}/table_name',
+    '{replica}',
+    ver
+)
 ```
-ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
-```
+
 with:
-```
+
+```sql
 ENGINE = ReplicatedReplacingMergeTree
 ```
 :::
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -1235,8 +1235,8 @@ For output it uses the following correspondence between ClickHouse types and BSO
 | ClickHouse type                                                                                                       | BSON Type                                                                                                     |
 |-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
 | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                  | `\x08` boolean                                                                                                |
-| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                           | `\x10` int32                                                                                                  |
-| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md)                                                          | `\x10` int32                                                                                                  |
+| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md)        | `\x10` int32                                                                                                  |
+| [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md)      | `\x10` int32                                                                                                  |
 | [Int32](/docs/en/sql-reference/data-types/int-uint.md)                                                                | `\x10` int32                                                                                                  |
 | [UInt32](/docs/en/sql-reference/data-types/int-uint.md)                                                               | `\x12` int64                                                                                                  |
 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                         | `\x12` int64                                                                                                  |
@ -1255,30 +1255,30 @@ For output it uses the following correspondence between ClickHouse types and BSO
 | [Array](/docs/en/sql-reference/data-types/array.md)                                                                   | `\x04` array                                                                                                  |
 | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                   | `\x04` array                                                                                                  |
 | [Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                             | `\x03` document                                                                                               |
-| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys)                                                    | `\x03` document                                                                                               |
+| [Map](/docs/en/sql-reference/data-types/map.md)                                                                       | `\x03` document                                                                                               |
 | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                             | `\x10` int32                                                                                                  |
 | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                             | `\x05` binary, `\x00` binary subtype                                                                          |

 For input it uses the following correspondence between BSON types and ClickHouse types:

-| BSON Type                                | ClickHouse Type                                                                                                                                                                       |
-|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `\x01` double                            | [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                                                                                         |
-| `\x02` string                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x03` document                          | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                             |
-| `\x04` array                             | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                               |
-| `\x05` binary, `\x00` binary subtype     | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)       |
-| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x05` binary, `\x03` old uuid subtype   | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                     |
-| `\x05` binary, `\x04` uuid subtype       | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                     |
-| `\x07` ObjectId                          | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x08` boolean                           | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                                                                                  |
-| `\x09` datetime                          | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                                                                                         |
-| `\x0A` null value                        | [NULL](/docs/en/sql-reference/data-types/nullable.md)                                                                                                                                 |
-| `\x0D` JavaScript code                   | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x0E` symbol                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                 |
-| `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)     |
-| `\x12` int64                             | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
+| BSON Type                                | ClickHouse Type                                                                                                                                                                                                                             |
+|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `\x01` double                            | [Float32/Float64](/docs/en/sql-reference/data-types/float.md)                                                                                                                                                                               |
+| `\x02` string                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x03` document                          | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                                                                                   |
+| `\x04` array                             | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                                                                                     |
+| `\x05` binary, `\x00` binary subtype     | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                             |
+| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x05` binary, `\x03` old uuid subtype   | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                                                                           |
+| `\x05` binary, `\x04` uuid subtype       | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                                                                                                                                           |
+| `\x07` ObjectId                          | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x08` boolean                           | [Bool](/docs/en/sql-reference/data-types/boolean.md)                                                                                                                                                                                        |
+| `\x09` datetime                          | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                                                                                                                                               |
+| `\x0A` null value                        | [NULL](/docs/en/sql-reference/data-types/nullable.md)                                                                                                                                                                                       |
+| `\x0D` JavaScript code                   | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x0E` symbol                            | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                                                                                       |
+| `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)/[Enum8/Enum16](/docs/en/sql-reference/data-types/enum.md) |
+| `\x12` int64                             | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md)                                                       |

 Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
 Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value.
@ -1610,29 +1610,34 @@ See also [Format Schema](#formatschema).

 The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.

-| CapnProto data type (`INSERT`)   | ClickHouse data type                                                                                                   | CapnProto data type (`SELECT`) |
-|----------------------------------|------------------------------------------------------------------------------------------------------------------------|------------------------------|
-| `UINT8`, `BOOL`                  | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                                 | `UINT8`                      |
-| `INT8`                           | [Int8](/docs/en/sql-reference/data-types/int-uint.md)                                                                  | `INT8`                       |
-| `UINT16`                         | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md)             | `UINT16`                     |
-| `INT16`                          | [Int16](/docs/en/sql-reference/data-types/int-uint.md)                                                                 | `INT16`                      |
-| `UINT32`                         | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md)     | `UINT32`                     |
-| `INT32`                          | [Int32](/docs/en/sql-reference/data-types/int-uint.md)                                                                 | `INT32`                      |
-| `UINT64`                         | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                                | `UINT64`                     |
-| `INT64`                          | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md)    | `INT64`                      |
-| `FLOAT32`                        | [Float32](/docs/en/sql-reference/data-types/float.md)                                                                  | `FLOAT32`                    |
-| `FLOAT64`                        | [Float64](/docs/en/sql-reference/data-types/float.md)                                                                  | `FLOAT64`                    |
-| `TEXT, DATA`                     | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `TEXT, DATA`                 |
-| `union(T, Void), union(Void, T)` | [Nullable(T)](/docs/en/sql-reference/data-types/date.md)                                                               | `union(T, Void), union(Void, T)`                 |
-| `ENUM`                           | [Enum(8\                                                                                                               |16)](/docs/en/sql-reference/data-types/enum.md)  | `ENUM`                         |
-| `LIST`                           | [Array](/docs/en/sql-reference/data-types/array.md)                                                                    | `LIST`                       |
-| `STRUCT`                         | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                    | `STRUCT`                     |
-| `UINT32`                         | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                              | `UINT32`                     |
-| `DATA`                           | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                              | `DATA`                       |
+| CapnProto data type (`INSERT`)                       | ClickHouse data type                                                                                                                                                           | CapnProto data type (`SELECT`)                       |
+|------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------|
+| `UINT8`, `BOOL`                                      | [UInt8](/docs/en/sql-reference/data-types/int-uint.md)                                                                                                                         | `UINT8`                                              |
+| `INT8`                                               | [Int8](/docs/en/sql-reference/data-types/int-uint.md)                                                                                                                          | `INT8`                                               |
+| `UINT16`                                             | [UInt16](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md)                                                                     | `UINT16`                                             |
+| `INT16`                                              | [Int16](/docs/en/sql-reference/data-types/int-uint.md)                                                                                                                         | `INT16`                                              |
+| `UINT32`                                             | [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [DateTime](/docs/en/sql-reference/data-types/datetime.md)                                                             | `UINT32`                                             |
+| `INT32`                                              | [Int32](/docs/en/sql-reference/data-types/int-uint.md), [Decimal32](/docs/en/sql-reference/data-types/decimal.md)                                                              | `INT32`                                              |
+| `UINT64`                                             | [UInt64](/docs/en/sql-reference/data-types/int-uint.md)                                                                                                                        | `UINT64`                                             |
+| `INT64`                                              | [Int64](/docs/en/sql-reference/data-types/int-uint.md), [DateTime64](/docs/en/sql-reference/data-types/datetime.md), [Decimal64](/docs/en/sql-reference/data-types/decimal.md) | `INT64`                                              |
+| `FLOAT32`                                            | [Float32](/docs/en/sql-reference/data-types/float.md)                                                                                                                          | `FLOAT32`                                            |
+| `FLOAT64`                                            | [Float64](/docs/en/sql-reference/data-types/float.md)                                                                                                                          | `FLOAT64`                                            |
+| `TEXT, DATA`                                         | [String](/docs/en/sql-reference/data-types/string.md), [FixedString](/docs/en/sql-reference/data-types/fixedstring.md)                                                         | `TEXT, DATA`                                         |
+| `union(T, Void), union(Void, T)`                     | [Nullable(T)](/docs/en/sql-reference/data-types/date.md)                                                                                                                       | `union(T, Void), union(Void, T)`                     |
+| `ENUM`                                               | [Enum(8/16)](/docs/en/sql-reference/data-types/enum.md)                                                                                                                        | `ENUM`                                               |
+| `LIST`                                               | [Array](/docs/en/sql-reference/data-types/array.md)                                                                                                                            | `LIST`                                               |
+| `STRUCT`                                             | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                                                                            | `STRUCT`                                             |
+| `UINT32`                                             | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)                                                                                                                      | `UINT32`                                             |
+| `DATA`                                               | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md)                                                                                                                      | `DATA`                                               |
+| `DATA`                                               | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md)                                                                                                 | `DATA`                                               |
+| `DATA`                                               | [Decimal128/Decimal256](/docs/en/sql-reference/data-types/decimal.md)                                                                                                          | `DATA`                                               |
+| `STRUCT(entries LIST(STRUCT(key Key, value Value)))` | [Map](/docs/en/sql-reference/data-types/map.md)                                                                                                                                | `STRUCT(entries LIST(STRUCT(key Key, value Value)))` |
+
+Integer types can be converted into each other during input/output.

 For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting.

-Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested.
+Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.

 ### Inserting and Selecting Data {#inserting-and-selecting-data-capnproto}

--- a/docs/en/sql-reference/aggregate-functions/reference/quantileApprox.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantileApprox.md
@ -0,0 +1,76 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/quantileApprox
+sidebar_position: 204
+---
+
+# quantileApprox
+
+Computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [Greenwald-Khanna](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf) algorithm. The Greenwald-Khanna algorithm is an algorithm used to compute quantiles on a stream of data in a highly efficient manner. It was introduced by Michael Greenwald and Sanjeev Khanna in 2001. It is widely used in databases and big data systems where computing accurate quantiles on a large stream of data in real-time is necessary. The algorithm is highly efficient, taking only O(log n) space and O(log log n) time per item (where n is the size of the input). It is also highly accurate, providing an approximate quantile value with high probability. 
+
+`quantileApprox` is different from other quantile functions in ClickHouse, because it enables user to control the accuracy of the approximate quantile result.
+
+**Syntax**
+
+``` sql
+quantileApprox(accuracy, level)(expr)
+```
+
+Alias: `medianApprox`.
+
+**Arguments**
+
+-   `accuracy` — Accuracy of quantile. Constant positive integer. Larger accuracy value means less error. For example, if the accuracy argument is set to 100, the computed quantile will have an error no greater than 1% with high probability. There is a trade-off between the accuracy of the computed quantiles and the computational complexity of the algorithm. A larger accuracy requires more memory and computational resources to compute the quantile accurately, while a smaller accuracy argument allows for a faster and more memory-efficient computation but with a slightly lower accuracy.
+
+-   `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
+
+-   `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
+
+
+**Returned value**
+
+-   Quantile of the specified level and accuracy.
+
+
+Type:
+
+-   [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
+-   [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
+-   [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
+
+**Example**
+
+``` sql
+SELECT quantileApprox(1, 0.25)(number + 1)
+FROM numbers(1000)
+
+┌─quantileApprox(1, 0.25)(plus(number, 1))─┐
+│                                        1 │
+└──────────────────────────────────────────┘
+
+SELECT quantileApprox(10, 0.25)(number + 1)
+FROM numbers(1000)
+
+┌─quantileApprox(10, 0.25)(plus(number, 1))─┐
+│                                       156 │
+└───────────────────────────────────────────┘
+
+SELECT quantileApprox(100, 0.25)(number + 1)
+FROM numbers(1000)
+
+┌─quantileApprox(100, 0.25)(plus(number, 1))─┐
+│                                        251 │
+└────────────────────────────────────────────┘
+
+SELECT quantileApprox(1000, 0.25)(number + 1)
+FROM numbers(1000)
+
+┌─quantileApprox(1000, 0.25)(plus(number, 1))─┐
+│                                         249 │
+└─────────────────────────────────────────────┘
+```
+
+
+**See Also**
+
+-   [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
+-   [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
@ -114,3 +114,59 @@ Result:
 │ [249.75,499.5,749.25,899.1,949.05,989.01,998.001]                   │
 └─────────────────────────────────────────────────────────────────────┘
 ```
+
+## quantilesApprox
+
+`quantilesApprox` works similarly with `quantileApprox` but allows us to calculate quantities at different levels simultaneously and returns an array.
+
+**Syntax**
+
+``` sql
+quantilesApprox(accuracy, level1, level2, ...)(expr)
+```
+
+**Returned value**
+
+-   [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels.
+
+Type of array values:
+
+-   [Float64](../../../sql-reference/data-types/float.md) for numeric data type input.
+-   [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type.
+-   [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type.
+
+**Example**
+
+Query:
+
+
+``` sql
+SELECT quantilesApprox(1, 0.25, 0.5, 0.75)(number + 1)
+FROM numbers(1000)
+
+┌─quantilesApprox(1, 0.25, 0.5, 0.75)(plus(number, 1))─┐
+│ [1,1,1]                                              │
+└──────────────────────────────────────────────────────┘
+
+SELECT quantilesApprox(10, 0.25, 0.5, 0.75)(number + 1)
+FROM numbers(1000)
+
+┌─quantilesApprox(10, 0.25, 0.5, 0.75)(plus(number, 1))─┐
+│ [156,413,659]                                         │
+└───────────────────────────────────────────────────────┘
+
+
+SELECT quantilesApprox(100, 0.25, 0.5, 0.75)(number + 1)
+FROM numbers(1000)
+
+┌─quantilesApprox(100, 0.25, 0.5, 0.75)(plus(number, 1))─┐
+│ [251,498,741]                                          │
+└────────────────────────────────────────────────────────┘
+
+SELECT quantilesApprox(1000, 0.25, 0.5, 0.75)(number + 1)
+FROM numbers(1000)
+
+┌─quantilesApprox(1000, 0.25, 0.5, 0.75)(plus(number, 1))─┐
+│ [249,499,749]                                           │
+└─────────────────────────────────────────────────────────┘
+```
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -1276,16 +1276,16 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %k       | hour in 24h format (00-23)                              | 22         |
 | %l       | hour in 12h format (01-12)                              | 09         |
 | %m       | month as an integer number (01-12)                      | 01         |
-| %M       | minute (00-59)                                          | 33         |
+| %M       | full month name (January-December)                      | January    |
 | %n       | new-line character (‘’)                                 |            |
 | %p       | AM or PM designation                                    | PM         |
 | %Q       | Quarter (1-4)                                           | 1          |
-| %r       | 12-hour HH:MM AM/PM time, equivalent to %H:%M %p        | 10:30 PM   |
-| %R       | 24-hour HH:MM time, equivalent to %H:%M                 | 22:33      |
+| %r       | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p        | 10:30 PM   |
+| %R       | 24-hour HH:MM time, equivalent to %H:%i                 | 22:33      |
 | %s       | second (00-59)                                          | 44         |
 | %S       | second (00-59)                                          | 44         |
 | %t       | horizontal-tab character (’)                            |            |
-| %T       | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44   |
+| %T       | ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S | 22:33:44   |
 | %u       | ISO 8601 weekday as number with Monday as 1 (1-7)       | 2          |
 | %V       | ISO 8601 week number (01-53)                            | 01         |
 | %w       | weekday as a integer number with Sunday as 0 (0-6)      | 2          |
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@ -13,17 +13,18 @@ Functions for [searching](../../sql-reference/functions/string-search-functions.
 ## replaceOne(haystack, pattern, replacement)

 Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string.
-‘pattern’ and ‘replacement’ must be constants.

 ## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement)

 Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string.

+Alias: `replace`.
+
 ## replaceRegexpOne(haystack, pattern, replacement)

 Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string.
-‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
-‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`.
+‘pattern‘ must be a [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
+‘replacement’ must be a plain string or a string containing substitutions `\0-\9`.
 Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
 To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`.
 Also keep in mind that string literals require an extra escaping.
@ -88,6 +89,8 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res
 └─────────────────────┘
 ```

+Alias: `REGEXP_REPLACE`.
+
 ## regexpQuoteMeta(s)

 The function adds a backslash before some predefined characters in the string.
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@ -26,9 +26,11 @@ namespace ErrorCodes
 {
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
 }

 template <typename> class QuantileTiming;
+template <typename> class QuantileApprox;


 /** Generic aggregate function for calculation of quantiles.
@ -60,6 +62,7 @@ private:
    using ColVecType = ColumnVectorOrDecimal<Value>;

    static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
+    static constexpr bool is_quantile_approx = std::is_same_v<Data, QuantileApprox<Value>>;
    static_assert(!is_decimal<Value> || !returns_float);

    QuantileLevels<Float64> levels;
@ -67,22 +70,57 @@ private:
    /// Used when there are single level to get.
    Float64 level = 0.5;

+    /// Used for the approximate version of the algorithm (Greenwald-Khanna)
+    ssize_t accuracy = 10000;
+
    DataTypePtr & argument_type;

 public:
    AggregateFunctionQuantile(const DataTypes & argument_types_, const Array & params)
        : IAggregateFunctionDataHelper<Data, AggregateFunctionQuantile<Value, Data, Name, has_second_arg, FloatReturnType, returns_many>>(
            argument_types_, params, createResultType(argument_types_))
-        , levels(params, returns_many)
+        , levels(is_quantile_approx && !params.empty() ? Array(params.begin() + 1, params.end()) : params, returns_many)
        , level(levels.levels[0])
        , argument_type(this->argument_types[0])
    {
        if (!returns_many && levels.size() > 1)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter or less", getName());
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one level parameter or less", getName());
+
+        if constexpr (is_quantile_approx)
+        {
+            if (params.empty())
+                throw Exception(
+                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one param", getName());
+
+            const auto & accuracy_field = params[0];
+            if (!isInt64OrUInt64FieldType(accuracy_field.getType()))
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires accuracy parameter with integer type", getName());
+
+            if (accuracy_field.getType() == Field::Types::Int64)
+                accuracy = accuracy_field.get<Int64>();
+            else
+                accuracy = accuracy_field.get<UInt64>();
+
+            if (accuracy <= 0)
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "Aggregate function {} requires accuracy parameter with positive value but is {}",
+                    getName(),
+                    accuracy);
+        }
    }

    String getName() const override { return Name::name; }

+    void create(AggregateDataPtr __restrict place) const override /// NOLINT
+    {
+        if constexpr (is_quantile_approx)
+            new (place) Data(accuracy);
+        else
+            new (place) Data;
+    }
+
    static DataTypePtr createResultType(const DataTypes & argument_types_)
    {
        DataTypePtr res;
@ -257,4 +295,7 @@ struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16";
 struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; };
 struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; };

+struct NameQuantileApprox { static constexpr auto name = "quantileApprox"; };
+struct NameQuantilesApprox { static constexpr auto name = "quantilesApprox"; };
+
 }
--- a/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileApprox.cpp
@ -0,0 +1,71 @@
+#include <AggregateFunctions/AggregateFunctionQuantile.h>
+#include <AggregateFunctions/QuantileApprox.h>
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/Helpers.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <Core/Field.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+
+template <typename Value, bool _> using FuncQuantileApprox = AggregateFunctionQuantile<Value, QuantileApprox<Value>, NameQuantileApprox, false, void, false>;
+template <typename Value, bool _> using FuncQuantilesApprox = AggregateFunctionQuantile<Value, QuantileApprox<Value>, NameQuantilesApprox, false, void, true>;
+
+template <template <typename, bool> class Function>
+AggregateFunctionPtr createAggregateFunctionQuantile(
+    const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+{
+    /// Second argument type check doesn't depend on the type of the first one.
+    Function<void, true>::assertSecondArg(argument_types);
+
+    const DataTypePtr & argument_type = argument_types[0];
+    WhichDataType which(argument_type);
+
+#define DISPATCH(TYPE) \
+    if (which.idx == TypeIndex::TYPE) \
+        return std::make_shared<Function<TYPE, true>>(argument_types, params);
+    FOR_BASIC_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+
+    if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
+    if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
+
+    if (which.idx == TypeIndex::Decimal32) return std::make_shared<Function<Decimal32, false>>(argument_types, params);
+    if (which.idx == TypeIndex::Decimal64) return std::make_shared<Function<Decimal64, false>>(argument_types, params);
+    if (which.idx == TypeIndex::Decimal128) return std::make_shared<Function<Decimal128, false>>(argument_types, params);
+    if (which.idx == TypeIndex::Decimal256) return std::make_shared<Function<Decimal256, false>>(argument_types, params);
+    if (which.idx == TypeIndex::DateTime64) return std::make_shared<Function<DateTime64, false>>(argument_types, params);
+
+    if (which.idx == TypeIndex::Int128) return std::make_shared<Function<Int128, true>>(argument_types, params);
+    if (which.idx == TypeIndex::UInt128) return std::make_shared<Function<UInt128, true>>(argument_types, params);
+    if (which.idx == TypeIndex::Int256) return std::make_shared<Function<Int256, true>>(argument_types, params);
+    if (which.idx == TypeIndex::UInt256) return std::make_shared<Function<UInt256, true>>(argument_types, params);
+
+    throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
+                    argument_type->getName(), name);
+}
+
+}
+
+void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory & factory)
+{
+    /// For aggregate functions returning array we cannot return NULL on empty set.
+    AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
+
+    factory.registerFunction(NameQuantileApprox::name, createAggregateFunctionQuantile<FuncQuantileApprox>);
+    factory.registerFunction(NameQuantilesApprox::name, {createAggregateFunctionQuantile<FuncQuantilesApprox>, properties});
+
+    /// 'median' is an alias for 'quantile'
+    factory.registerAlias("medianApprox", NameQuantileApprox::name);
+}
+
+}
--- a/src/AggregateFunctions/QuantileApprox.h
+++ b/src/AggregateFunctions/QuantileApprox.h
@ -0,0 +1,477 @@
+#pragma once
+
+#include <cmath>
+#include <base/sort.h>
+#include <Common/RadixSort.h>
+#include <IO/WriteBuffer.h>
+#include <IO/ReadBuffer.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
+}
+
+template <typename T>
+class ApproxSampler
+{
+public:
+    struct Stats
+    {
+        T value;      // the sampled value
+        Int64 g;      // the minimum rank jump from the previous value's minimum rank
+        Int64 delta;  // the maximum span of the rank
+
+        Stats() = default;
+        Stats(T value_, Int64 g_, Int64 delta_) : value(value_), g(g_), delta(delta_) {}
+    };
+
+    struct QueryResult
+    {
+        size_t index;
+        Int64 rank;
+        T value;
+
+        QueryResult(size_t index_, Int64 rank_, T value_) : index(index_), rank(rank_), value(value_) { }
+    };
+
+    ApproxSampler() = default;
+
+    explicit ApproxSampler(
+        double relative_error_,
+        size_t compress_threshold_ = default_compress_threshold,
+        size_t count_ = 0,
+        bool compressed_ = false)
+        : relative_error(relative_error_)
+        , compress_threshold(compress_threshold_)
+        , count(count_)
+        , compressed(compressed_)
+    {
+        sampled.reserve(compress_threshold);
+        backup_sampled.reserve(compress_threshold);
+
+        head_sampled.reserve(default_head_size);
+    }
+
+    bool isCompressed() const { return compressed; }
+    void setCompressed() { compressed = true; }
+
+    void insert(T x)
+    {
+        head_sampled.push_back(x);
+        compressed = false;
+        if (head_sampled.size() >= default_head_size)
+        {
+            withHeadBufferInserted();
+            if (sampled.size() >= compress_threshold)
+                compress();
+        }
+    }
+
+    void query(const Float64 * percentiles, const size_t * indices, size_t size, T * result) const
+    {
+        if (!head_sampled.empty())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot operate on an uncompressed summary, call compress() first");
+
+        if (sampled.empty())
+        {
+            for (size_t i = 0; i < size; ++i)
+                result[i] = T();
+            return;
+        }
+
+        Int64 current_max = std::numeric_limits<Int64>::min();
+        for (const auto & stats : sampled)
+            current_max = std::max(stats.delta + stats.g, current_max);
+        Int64 target_error = current_max/2;
+
+        size_t index= 0;
+        auto min_rank = sampled[0].g;
+        for (size_t i = 0; i < size; ++i)
+        {
+            double percentile = percentiles[indices[i]];
+            if (percentile <= relative_error)
+            {
+                result[indices[i]] = sampled.front().value;
+            }
+            else if (percentile >= 1 - relative_error)
+            {
+                result[indices[i]] = sampled.back().value;
+            }
+            else
+            {
+                QueryResult res = findApproxQuantile(index, min_rank, target_error, percentile);
+                index = res.index;
+                min_rank = res.rank;
+                result[indices[i]] = res.value;
+            }
+        }
+
+    }
+
+    void compress()
+    {
+        if (compressed)
+            return;
+
+        withHeadBufferInserted();
+
+        doCompress(2 * relative_error * count);
+        compressed = true;
+    }
+
+
+    void merge(const ApproxSampler & other)
+    {
+        if (other.count == 0)
+            return;
+        else if (count == 0)
+        {
+            compress_threshold = other.compress_threshold;
+            relative_error = other.relative_error;
+            count = other.count;
+            compressed = other.compressed;
+
+            sampled.resize(other.sampled.size());
+            memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size());
+            return;
+        }
+        else
+        {
+            // Merge the two buffers.
+            // The GK algorithm is a bit unclear about it, but we need to adjust the statistics during the
+            // merging. The main idea is that samples that come from one side will suffer from the lack of
+            // precision of the other.
+            // As a concrete example, take two QuantileSummaries whose samples (value, g, delta) are:
+            // `a = [(0, 1, 0), (20, 99, 0)]` and `b = [(10, 1, 0), (30, 49, 0)]`
+            // This means `a` has 100 values, whose minimum is 0 and maximum is 20,
+            // while `b` has 50 values, between 10 and 30.
+            // The resulting samples of the merge will be:
+            // a+b = [(0, 1, 0), (10, 1, ??), (20, 99, ??), (30, 49, 0)]
+            // The values of `g` do not change, as they represent the minimum number of values between two
+            // consecutive samples. The values of `delta` should be adjusted, however.
+            // Take the case of the sample `10` from `b`. In the original stream, it could have appeared
+            // right after `0` (as expressed by `g=1`) or right before `20`, so `delta=99+0-1=98`.
+            // In the GK algorithm's style of working in terms of maximum bounds, one can observe that the
+            // maximum additional uncertainty over samples coming from `b` is `max(g_a + delta_a) =
+            // floor(2 * eps_a * n_a)`. Likewise, additional uncertainty over samples from `a` is
+            // `floor(2 * eps_b * n_b)`.
+            // Only samples that interleave the other side are affected. That means that samples from
+            // one side that are lesser (or greater) than all samples from the other side are just copied
+            // unmodified.
+            // If the merging instances have different `relativeError`, the resulting instance will carry
+            // the largest one: `eps_ab = max(eps_a, eps_b)`.
+            // The main invariant of the GK algorithm is kept:
+            // `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since
+            // `max(g_ab + delta_ab) <= floor(2 * eps_a * n_a) + floor(2 * eps_b * n_b)`
+            // Finally, one can see how the `insert(x)` operation can be expressed as `merge([(x, 1, 0])`
+            compress();
+
+            backup_sampled.clear();
+            backup_sampled.reserve(sampled.size() + other.sampled.size());
+            double merged_relative_error = std::max(relative_error, other.relative_error);
+            size_t merged_count = count + other.count;
+            Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
+            Int64 additional_other_delta = static_cast<Int64>(std::floor(2 * relative_error * count));
+
+            // Do a merge of two sorted lists until one of the lists is fully consumed
+            size_t self_idx = 0;
+            size_t other_idx = 0;
+            while (self_idx < sampled.size() && other_idx < other.sampled.size())
+            {
+                const Stats & self_sample = sampled[self_idx];
+                const Stats & other_sample = other.sampled[other_idx];
+
+                // Detect next sample
+                Stats next_sample;
+                Int64 additional_delta = 0;
+                if (self_sample.value < other_sample.value)
+                {
+                    ++self_idx;
+                    next_sample = self_sample;
+                    additional_delta = other_idx > 0 ? additional_self_delta : 0;
+                }
+                else
+                {
+                    ++other_idx;
+                    next_sample = other_sample;
+                    additional_delta = self_idx > 0 ? additional_other_delta : 0;
+                }
+
+                // Insert it
+                next_sample.delta += additional_delta;
+                backup_sampled.emplace_back(std::move(next_sample));
+            }
+
+            // Copy the remaining samples from the other list
+            // (by construction, at most one `while` loop will run)
+            while (self_idx < sampled.size())
+            {
+                backup_sampled.emplace_back(sampled[self_idx]);
+                ++self_idx;
+            }
+            while (other_idx < other.sampled.size())
+            {
+                backup_sampled.emplace_back(other.sampled[other_idx]);
+                ++other_idx;
+            }
+
+            std::swap(sampled, backup_sampled);
+            relative_error = merged_relative_error;
+            count = merged_count;
+            compress_threshold = other.compress_threshold;
+
+            doCompress(2 * merged_relative_error * merged_count);
+            compressed = true;
+        }
+    }
+
+    void write(WriteBuffer & buf) const
+    {
+        writeIntBinary<size_t>(compress_threshold, buf);
+        writeFloatBinary<double>(relative_error, buf);
+        writeIntBinary<size_t>(count, buf);
+        writeIntBinary<size_t>(sampled.size(), buf);
+
+        for (const auto & stats : sampled)
+        {
+            writeFloatBinary<T>(stats.value, buf);
+            writeIntBinary<Int64>(stats.g, buf);
+            writeIntBinary<Int64>(stats.delta, buf);
+        }
+    }
+
+    void read(ReadBuffer & buf)
+    {
+        readIntBinary<size_t>(compress_threshold, buf);
+        readFloatBinary<double>(relative_error, buf);
+        readIntBinary<size_t>(count, buf);
+
+        size_t sampled_len = 0;
+        readIntBinary<size_t>(sampled_len, buf);
+        sampled.resize(sampled_len);
+
+        for (size_t i = 0; i < sampled_len; ++i)
+        {
+            auto stats = sampled[i];
+            readFloatBinary<T>(stats.value, buf);
+            readIntBinary<Int64>(stats.g, buf);
+            readIntBinary<Int64>(stats.delta, buf);
+        }
+    }
+
+private:
+    QueryResult findApproxQuantile(size_t index, Int64 min_rank_at_index, double target_error, double percentile) const
+    {
+        Stats curr_sample = sampled[index];
+        Int64 rank = static_cast<Int64>(std::ceil(percentile * count));
+        size_t i = index;
+        Int64 min_rank = min_rank_at_index;
+        while (i < sampled.size() - 1)
+        {
+            Int64 max_rank = min_rank + curr_sample.delta;
+            if (max_rank - target_error <= rank && rank <= min_rank + target_error)
+                return {i, min_rank, curr_sample.value};
+            else
+            {
+                ++i;
+                curr_sample = sampled[i];
+                min_rank += curr_sample.g;
+            }
+        }
+        return {sampled.size()-1, 0, sampled.back().value};
+    }
+
+    void withHeadBufferInserted()
+    {
+        if (head_sampled.empty())
+            return;
+
+        bool use_radix_sort = head_sampled.size() >= 256 && (is_arithmetic_v<T> && !is_big_int_v<T>);
+        if (use_radix_sort)
+            RadixSort<RadixSortNumTraits<T>>::executeLSD(head_sampled.data(), head_sampled.size());
+        else
+            ::sort(head_sampled.begin(), head_sampled.end());
+
+        backup_sampled.clear();
+        backup_sampled.reserve(sampled.size() + head_sampled.size());
+
+        size_t sample_idx = 0;
+        size_t ops_idx = 0;
+        size_t current_count = count;
+        for (; ops_idx < head_sampled.size(); ++ops_idx)
+        {
+            T current_sample = head_sampled[ops_idx];
+
+            // Add all the samples before the next observation.
+            while (sample_idx < sampled.size() && sampled[sample_idx].value <= current_sample)
+            {
+                backup_sampled.emplace_back(sampled[sample_idx]);
+                ++sample_idx;
+            }
+
+            // If it is the first one to insert, of if it is the last one
+            ++current_count;
+            Int64 delta;
+            if (backup_sampled.empty() || (sample_idx == sampled.size() && ops_idx == (head_sampled.size() - 1)))
+                delta = 0;
+            else
+                delta = static_cast<Int64>(std::floor(2 * relative_error * current_count));
+
+            backup_sampled.emplace_back(current_sample, 1, delta);
+        }
+
+        // Add all the remaining existing samples
+        for (; sample_idx < sampled.size(); ++sample_idx)
+            backup_sampled.emplace_back(sampled[sample_idx]);
+
+        std::swap(sampled, backup_sampled);
+        head_sampled.clear();
+        count = current_count;
+    }
+
+
+    void doCompress(double merge_threshold)
+    {
+        if (sampled.empty())
+            return;
+
+        backup_sampled.clear();
+        // Start for the last element, which is always part of the set.
+        // The head contains the current new head, that may be merged with the current element.
+        Stats head = sampled.back();
+        ssize_t i = sampled.size() - 2;
+
+        // Do not compress the last element
+        while (i >= 1)
+        {
+            // The current sample:
+            const auto & sample1 = sampled[i];
+            // Do we need to compress?
+            if (sample1.g + head.g + head.delta < merge_threshold)
+            {
+                // Do not insert yet, just merge the current element into the head.
+                head.g += sample1.g;
+            }
+            else
+            {
+                // Prepend the current head, and keep the current sample as target for merging.
+                backup_sampled.push_back(head);
+                head = sample1;
+            }
+            --i;
+        }
+
+        backup_sampled.push_back(head);
+        // If necessary, add the minimum element:
+        auto curr_head = sampled.front();
+
+        // don't add the minimum element if `currentSamples` has only one element (both `currHead` and
+        // `head` point to the same element)
+        if (curr_head.value <= head.value && sampled.size() > 1)
+            backup_sampled.emplace_back(sampled.front());
+
+        std::reverse(backup_sampled.begin(), backup_sampled.end());
+        std::swap(sampled, backup_sampled);
+    }
+
+    double relative_error;
+    size_t compress_threshold;
+    size_t count = 0;
+    bool compressed;
+
+    PaddedPODArray<Stats> sampled;
+    PaddedPODArray<Stats> backup_sampled;
+
+    PaddedPODArray<T> head_sampled;
+
+    static constexpr size_t default_compress_threshold = 10000;
+    static constexpr size_t default_head_size = 50000;
+};
+
+template <typename Value>
+class QuantileApprox
+{
+private:
+    using Data = ApproxSampler<Value>;
+    mutable Data data;
+
+public:
+    QuantileApprox() = default;
+
+    explicit QuantileApprox(size_t accuracy) : data(1.0 / static_cast<double>(accuracy)) { }
+
+    void add(const Value & x)
+    {
+        data.insert(x);
+    }
+
+    template <typename Weight>
+    void add(const Value &, const Weight &)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method add with weight is not implemented for GKSampler");
+    }
+
+    void merge(const QuantileApprox & rhs)
+    {
+        if (!data.isCompressed())
+            data.compress();
+
+        data.merge(rhs.data);
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        /// Always compress before serialization
+        if (!data.isCompressed())
+            data.compress();
+
+        data.write(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        data.read(buf);
+
+        data.setCompressed();
+    }
+
+    /// Get the value of the `level` quantile. The level must be between 0 and 1.
+    Value get(Float64 level)
+    {
+        if (!data.isCompressed())
+            data.compress();
+
+        Value res;
+        size_t indice = 0;
+        data.query(&level, &indice, 1, &res);
+        return res;
+    }
+
+    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
+    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
+    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
+    {
+        if (!data.isCompressed())
+            data.compress();
+
+        data.query(levels, indices, size, result);
+    }
+
+    Float64 getFloat64(Float64 /*level*/)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFloat64 is not implemented for GKSampler");
+    }
+
+    void getManyFloat(const Float64 * /*levels*/, const size_t * /*indices*/, size_t /*size*/, Float64 * /*result*/)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getManyFloat is not implemented for GKSampler");
+    }
+};
+
+}
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -32,6 +32,7 @@ void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
+void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
 void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
 void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory &);
 void registerAggregateFunctionRate(AggregateFunctionFactory &);
@ -124,6 +125,7 @@ void registerAggregateFunctions()
        registerAggregateFunctionsQuantileTDigestWeighted(factory);
        registerAggregateFunctionsQuantileBFloat16(factory);
        registerAggregateFunctionsQuantileBFloat16Weighted(factory);
+        registerAggregateFunctionsQuantileApprox(factory);
        registerAggregateFunctionsSequenceMatch(factory);
        registerAggregateFunctionWindowFunnel(factory);
        registerAggregateFunctionRate(factory);
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@ -771,16 +771,19 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
                String existing_backup_uuid = existing_backup_path;
                existing_backup_uuid.erase(0, String("backup-").size());

-
                if (existing_backup_uuid == toString(backup_uuid))
                    continue;

-                const auto status = zk->get(root_zookeeper_path + "/" + existing_backup_path + "/stage");
-                if (status != Stage::COMPLETED)
+                String status;
+                if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status))
                {
-                    LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
-                    result = true;
-                    return;
+                    /// If status is not COMPLETED it could be because the backup failed, check if 'error' exists
+                    if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_backup_path + "/error"))
+                    {
+                        LOG_WARNING(log, "Found a concurrent backup: {}, current backup: {}", existing_backup_uuid, toString(backup_uuid));
+                        result = true;
+                        return;
+                    }
                }
            }

--- a/src/Backups/BackupEntryFromAppendOnlyFile.cpp
+++ b/src/Backups/BackupEntryFromAppendOnlyFile.cpp
@ -8,10 +8,11 @@ namespace DB
 BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile(
    const DiskPtr & disk_,
    const String & file_path_,
+    const ReadSettings & settings_,
    const std::optional<UInt64> & file_size_,
    const std::optional<UInt128> & checksum_,
    const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
-    : BackupEntryFromImmutableFile(disk_, file_path_, file_size_, checksum_, temporary_file_)
+    : BackupEntryFromImmutableFile(disk_, file_path_, settings_, file_size_, checksum_, temporary_file_)
    , limit(BackupEntryFromImmutableFile::getSize())
 {
 }
--- a/src/Backups/BackupEntryFromAppendOnlyFile.h
+++ b/src/Backups/BackupEntryFromAppendOnlyFile.h
@ -16,6 +16,7 @@ public:
    BackupEntryFromAppendOnlyFile(
        const DiskPtr & disk_,
        const String & file_path_,
+        const ReadSettings & settings_,
        const std::optional<UInt64> & file_size_ = {},
        const std::optional<UInt128> & checksum_ = {},
        const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
--- a/src/Backups/BackupEntryFromImmutableFile.cpp
+++ b/src/Backups/BackupEntryFromImmutableFile.cpp
@ -11,10 +11,16 @@ namespace DB
 BackupEntryFromImmutableFile::BackupEntryFromImmutableFile(
    const DiskPtr & disk_,
    const String & file_path_,
+    const ReadSettings & settings_,
    const std::optional<UInt64> & file_size_,
    const std::optional<UInt128> & checksum_,
    const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
-    : disk(disk_), file_path(file_path_), file_size(file_size_), checksum(checksum_), temporary_file_on_disk(temporary_file_)
+    : disk(disk_)
+    , file_path(file_path_)
+    , settings(settings_)
+    , file_size(file_size_)
+    , checksum(checksum_)
+    , temporary_file_on_disk(temporary_file_)
 {
 }

@ -30,7 +36,7 @@ UInt64 BackupEntryFromImmutableFile::getSize() const

 std::unique_ptr<SeekableReadBuffer> BackupEntryFromImmutableFile::getReadBuffer() const
 {
-    return disk->readFile(file_path);
+    return disk->readFile(file_path, settings);
 }


--- a/src/Backups/BackupEntryFromImmutableFile.h
+++ b/src/Backups/BackupEntryFromImmutableFile.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Backups/IBackupEntry.h>
+#include <IO/ReadSettings.h>
 #include <base/defines.h>
 #include <mutex>

@ -19,6 +20,7 @@ public:
    BackupEntryFromImmutableFile(
        const DiskPtr & disk_,
        const String & file_path_,
+        const ReadSettings & settings_,
        const std::optional<UInt64> & file_size_ = {},
        const std::optional<UInt128> & checksum_ = {},
        const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
@ -37,6 +39,7 @@ public:
 private:
    const DiskPtr disk;
    const String file_path;
+    ReadSettings settings;
    mutable std::optional<UInt64> file_size TSA_GUARDED_BY(get_file_size_mutex);
    mutable std::mutex get_file_size_mutex;
    const std::optional<UInt128> checksum;
--- a/src/Backups/BackupIO.cpp
+++ b/src/Backups/BackupIO.cpp
@ -3,6 +3,7 @@
 #include <IO/copyData.h>
 #include <IO/WriteBufferFromFileBase.h>
 #include <IO/SeekableReadBuffer.h>
+#include <Interpreters/Context.h>


 namespace DB
@ -22,6 +23,11 @@ void IBackupReader::copyFileToDisk(const String & file_name, size_t size, DiskPt
    write_buffer->finalize();
 }

+IBackupWriter::IBackupWriter(const ContextPtr & context_)
+    : read_settings(context_->getBackupReadSettings())
+    , has_throttling(static_cast<bool>(context_->getBackupsThrottler()))
+{}
+
 void IBackupWriter::copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
 {
    auto read_buffer = create_read_buffer();
--- a/src/Backups/BackupIO.h
+++ b/src/Backups/BackupIO.h
@ -3,6 +3,8 @@
 #include <Core/Types.h>
 #include <Disks/DiskType.h>
 #include <Disks/IDisk.h>
+#include <IO/ReadSettings.h>
+#include <Interpreters/Context_fwd.h>

 namespace DB
 {
@ -28,6 +30,8 @@ class IBackupWriter /// BackupWriterFile, BackupWriterDisk
 public:
    using CreateReadBufferFunction = std::function<std::unique_ptr<SeekableReadBuffer>()>;

+    explicit IBackupWriter(const ContextPtr & context_);
+
    virtual ~IBackupWriter() = default;
    virtual bool fileExists(const String & file_name) = 0;
    virtual UInt64 getFileSize(const String & file_name) = 0;
@ -38,7 +42,17 @@ public:
    virtual DataSourceDescription getDataSourceDescription() const = 0;
    virtual void copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name);
    virtual bool supportNativeCopy(DataSourceDescription /* data_source_description */) const { return false; }
+
+    /// Copy file using native copy (optimized for S3 to use CopyObject)
+    ///
+    /// NOTE: It still may fall back to copyDataToFile() if native copy is not possible:
+    /// - different buckets
+    /// - throttling had been requested
    virtual void copyFileNative(DiskPtr src_disk, const String & src_file_name, UInt64 src_offset, UInt64 src_size, const String & dest_file_name);
+
+protected:
+    const ReadSettings read_settings;
+    const bool has_throttling;
 };

 }
--- a/src/Backups/BackupIO_Disk.cpp
+++ b/src/Backups/BackupIO_Disk.cpp
@ -50,7 +50,10 @@ void BackupReaderDisk::copyFileToDisk(const String & file_name, size_t size, Dis
 }


-BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_) : disk(disk_), path(path_)
+BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_, const ContextPtr & context_)
+    : IBackupWriter(context_)
+    , disk(disk_)
+    , path(path_)
 {
 }

@ -127,9 +130,9 @@ void BackupWriterDisk::copyFileNative(DiskPtr src_disk, const String & src_file_
    if (!src_disk)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot natively copy data to disk without source disk");

-    if ((src_offset != 0) || (src_size != src_disk->getFileSize(src_file_name)))
+    if (has_throttling || (src_offset != 0) || (src_size != src_disk->getFileSize(src_file_name)))
    {
-        auto create_read_buffer = [src_disk, src_file_name] { return src_disk->readFile(src_file_name); };
+        auto create_read_buffer = [this, src_disk, src_file_name] { return src_disk->readFile(src_file_name, read_settings); };
        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
        return;
    }
--- a/src/Backups/BackupIO_Disk.h
+++ b/src/Backups/BackupIO_Disk.h
@ -2,6 +2,7 @@

 #include <filesystem>
 #include <Backups/BackupIO.h>
+#include <Interpreters/Context_fwd.h>

 namespace DB
 {
@ -30,7 +31,7 @@ private:
 class BackupWriterDisk : public IBackupWriter
 {
 public:
-    BackupWriterDisk(const DiskPtr & disk_, const String & path_);
+    BackupWriterDisk(const DiskPtr & disk_, const String & path_, const ContextPtr & context_);
    ~BackupWriterDisk() override;

    bool fileExists(const String & file_name) override;
--- a/src/Backups/BackupIO_File.cpp
+++ b/src/Backups/BackupIO_File.cpp
@ -49,7 +49,9 @@ void BackupReaderFile::copyFileToDisk(const String & file_name, size_t size, Dis
 }


-BackupWriterFile::BackupWriterFile(const String & path_) : path(path_)
+BackupWriterFile::BackupWriterFile(const String & path_, const ContextPtr & context_)
+    : IBackupWriter(context_)
+    , path(path_)
 {
 }

@ -152,9 +154,9 @@ void BackupWriterFile::copyFileNative(DiskPtr src_disk, const String & src_file_
    else
        abs_source_path = fs::absolute(src_file_name);

-    if ((src_offset != 0) || (src_size != fs::file_size(abs_source_path)))
+    if (has_throttling || (src_offset != 0) || (src_size != fs::file_size(abs_source_path)))
    {
-        auto create_read_buffer = [abs_source_path] { return createReadBufferFromFileBase(abs_source_path, {}); };
+        auto create_read_buffer = [this, abs_source_path] { return createReadBufferFromFileBase(abs_source_path, read_settings); };
        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
        return;
    }
--- a/src/Backups/BackupIO_File.h
+++ b/src/Backups/BackupIO_File.h
@ -2,6 +2,7 @@

 #include <filesystem>
 #include <Backups/BackupIO.h>
+#include <Interpreters/Context_fwd.h>

 namespace DB
 {
@ -27,7 +28,7 @@ private:
 class BackupWriterFile : public IBackupWriter
 {
 public:
-    explicit BackupWriterFile(const String & path_);
+    explicit BackupWriterFile(const String & path_, const ContextPtr & context_);
    ~BackupWriterFile() override;

    bool fileExists(const String & file_name) override;
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@ -161,9 +161,9 @@ void BackupReaderS3::copyFileToDisk(const String & file_name, size_t size, DiskP

 BackupWriterS3::BackupWriterS3(
    const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
-    : s3_uri(s3_uri_)
+    : IBackupWriter(context_)
+    , s3_uri(s3_uri_)
    , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
-    , read_settings(context_->getReadSettings())
    , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
    , log(&Poco::Logger::get("BackupWriterS3"))
 {
@ -189,7 +189,7 @@ void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_na
    auto objects = src_disk->getStorageObjects(src_file_name);
    if (objects.size() > 1)
    {
-        auto create_read_buffer = [src_disk, src_file_name] { return src_disk->readFile(src_file_name); };
+        auto create_read_buffer = [this, src_disk, src_file_name] { return src_disk->readFile(src_file_name, read_settings); };
        copyDataToFile(create_read_buffer, src_offset, src_size, dest_file_name);
    }
    else
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@ -7,6 +7,7 @@
 #include <IO/ReadSettings.h>
 #include <IO/S3Common.h>
 #include <Storages/StorageS3Settings.h>
+#include <Interpreters/Context_fwd.h>


 namespace DB
@ -76,7 +77,6 @@ private:

    S3::URI s3_uri;
    std::shared_ptr<S3::Client> client;
-    ReadSettings read_settings;
    S3Settings::RequestSettings request_settings;
    Poco::Logger * log;
    std::optional<bool> supports_batch_delete;
--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@ -279,12 +279,16 @@ bool RestoreCoordinationRemote::hasConcurrentRestores(const std::atomic<size_t>
                    if (existing_restore_uuid == toString(restore_uuid))
                        continue;

-                    const auto status = zk->get(root_zookeeper_path + "/" + existing_restore_path + "/stage");
-                    if (status != Stage::COMPLETED)
+                    String status;
+                    if (zk->tryGet(root_zookeeper_path + "/" + existing_restore_path + "/stage", status))
                    {
-                        LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
-                        result = true;
-                        return;
+                        /// If status is not COMPLETED it could be because the restore failed, check if 'error' exists
+                        if (status != Stage::COMPLETED && !zk->exists(root_zookeeper_path + "/" + existing_restore_path + "/error"))
+                        {
+                            LOG_WARNING(log, "Found a concurrent restore: {}, current restore: {}", existing_restore_uuid, toString(restore_uuid));
+                            result = true;
+                            return;
+                        }
                    }
                }

--- a/src/Backups/registerBackupEnginesFileAndDisk.cpp
+++ b/src/Backups/registerBackupEnginesFileAndDisk.cpp
@ -178,9 +178,9 @@ void registerBackupEnginesFileAndDisk(BackupFactory & factory)
        {
            std::shared_ptr<IBackupWriter> writer;
            if (engine_name == "File")
-                writer = std::make_shared<BackupWriterFile>(path);
+                writer = std::make_shared<BackupWriterFile>(path, params.context);
            else
-                writer = std::make_shared<BackupWriterDisk>(disk, path);
+                writer = std::make_shared<BackupWriterDisk>(disk, path, params.context);
            return std::make_unique<BackupImpl>(
                backup_name_for_logging,
                archive_params,
--- a/src/Common/Concepts.h
+++ b/src/Common/Concepts.h
@ -5,6 +5,10 @@
 namespace DB
 {

+template<typename T, typename ... U>
+concept is_any_of = (std::same_as<T, U> || ...);
+
+
 template <typename... T>
 concept OptionalArgument = requires(T &&...)
 {
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@ -82,6 +82,26 @@ inline std::string_view toDescription(OvercommitResult result)
    }
 }

+inline void debugLogBigAllocationWithoutCheck(Int64 size [[maybe_unused]])
+{
+    /// Big allocations through allocNoThrow (without checking memory limits) may easily lead to OOM (and it's hard to debug).
+    /// Let's find them.
+#ifdef ABORT_ON_LOGICAL_ERROR
+    if (size < 0)
+        return;
+
+    constexpr Int64 threshold = 16 * 1024 * 1024;   /// The choice is arbitrary (maybe we should decrease it)
+    if (size < threshold)
+        return;
+
+    MemoryTrackerBlockerInThread blocker;
+    LOG_TEST(&Poco::Logger::get("MemoryTracker"), "Too big allocation ({} bytes) without checking memory limits, "
+                                                   "it may lead to OOM. Stack trace: {}", size, StackTrace().toString());
+#else
+    return;     /// Avoid trash logging in release builds
+#endif
+}
+
 }

 namespace ProfileEvents
@ -235,7 +255,10 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
                formatReadableSizeWithBinarySuffix(current_hard_limit));
        }
        else
+        {
            memory_limit_exceeded_ignored = true;
+            debugLogBigAllocationWithoutCheck(size);
+        }
    }

    Int64 limit_to_check = current_hard_limit;
@ -303,7 +326,10 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
            }
        }
        else
+        {
            memory_limit_exceeded_ignored = true;
+            debugLogBigAllocationWithoutCheck(size);
+        }
    }

    bool peak_updated = false;
@ -323,6 +349,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT
        {
            bool log_memory_usage = false;
            peak_updated = updatePeak(will_be, log_memory_usage);
+            debugLogBigAllocationWithoutCheck(size);
        }
    }

--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -75,10 +75,14 @@
    M(S3GetRequestThrottlerSleepMicroseconds, "Total time a query was sleeping to conform S3 GET and SELECT request throttling.") \
    M(S3PutRequestThrottlerCount, "Number of S3 PUT, COPY, POST and LIST requests passed through throttler.") \
    M(S3PutRequestThrottlerSleepMicroseconds, "Total time a query was sleeping to conform S3 PUT, COPY, POST and LIST request throttling.") \
-    M(RemoteReadThrottlerBytes, "Bytes passed through 'max_remote_read_network_bandwidth_for_server' throttler.") \
-    M(RemoteReadThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_read_network_bandwidth_for_server' throttling.") \
-    M(RemoteWriteThrottlerBytes, "Bytes passed through 'max_remote_write_network_bandwidth_for_server' throttler.") \
-    M(RemoteWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_write_network_bandwidth_for_server' throttling.") \
+    M(RemoteReadThrottlerBytes, "Bytes passed through 'max_remote_read_network_bandwidth_for_server'/'max_remote_read_network_bandwidth' throttler.") \
+    M(RemoteReadThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_read_network_bandwidth_for_server'/'max_remote_read_network_bandwidth' throttling.") \
+    M(RemoteWriteThrottlerBytes, "Bytes passed through 'max_remote_write_network_bandwidth_for_server'/'max_remote_write_network_bandwidth' throttler.") \
+    M(RemoteWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_remote_write_network_bandwidth_for_server'/'max_remote_write_network_bandwidth' throttling.") \
+    M(LocalReadThrottlerBytes, "Bytes passed through 'max_local_read_bandwidth_for_server'/'max_local_read_bandwidth' throttler.") \
+    M(LocalReadThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_local_read_bandwidth_for_server'/'max_local_read_bandwidth' throttling.") \
+    M(LocalWriteThrottlerBytes, "Bytes passed through 'max_local_write_bandwidth_for_server'/'max_local_write_bandwidth' throttler.") \
+    M(LocalWriteThrottlerSleepMicroseconds, "Total time a query was sleeping to conform 'max_local_write_bandwidth_for_server'/'max_local_write_bandwidth' throttling.") \
    M(ThrottlerSleepMicroseconds, "Total time a query was sleeping to conform all throttling settings.") \
    \
    M(QueryMaskingRulesMatch, "Number of times query masking rules was successfully matched.") \
--- a/src/Common/SpaceSaving.h
+++ b/src/Common/SpaceSaving.h
@ -94,8 +94,8 @@ public:
        void write(WriteBuffer & wb) const
        {
            writeBinary(key, wb);
-            writeVarUInt(count, wb);
-            writeVarUInt(error, wb);
+            writeVarUIntOverflow(count, wb);
+            writeVarUIntOverflow(error, wb);
        }

        void read(ReadBuffer & rb)
--- a/src/Common/typeid_cast.h
+++ b/src/Common/typeid_cast.h
@ -18,9 +18,6 @@ namespace DB
    }
 }

-template<typename T, typename ... U>
-concept is_any_of = (std::same_as<T, U> || ...);
-

 /** Checks type by comparing typeid.
  * The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful.
--- a/src/Core/ProtocolDefines.h
+++ b/src/Core/ProtocolDefines.h
@ -30,7 +30,7 @@

 #define DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION 1

-#define DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION 1
+#define DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION 2
 #define DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS 54453

 #define DBMS_MERGE_TREE_PART_INFO_VERSION 1
--- a/src/Core/ServerSettings.cpp
+++ b/src/Core/ServerSettings.cpp
@ -19,7 +19,10 @@ void ServerSettings::loadSettingsFromConfig(const Poco::Util::AbstractConfigurat
        "background_buffer_flush_schedule_pool_size",
        "background_schedule_pool_size",
        "background_message_broker_schedule_pool_size",
-        "background_distributed_schedule_pool_size"
+        "background_distributed_schedule_pool_size",
+
+        "max_remote_read_network_bandwidth_for_server",
+        "max_remote_write_network_bandwidth_for_server",
    };

    for (auto setting : all())
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@ -21,10 +21,15 @@ namespace DB
    M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
    M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
    M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
+    M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
+    M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
+    M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
+    M(UInt64, max_local_write_bandwidth_for_server, 0, "The maximum speed of local writes in bytes per second. Zero means unlimited.", 0) \
    M(UInt64, max_backups_io_thread_pool_size, 1000, "The maximum number of threads that would be used for IO operations for BACKUP queries", 0) \
    M(UInt64, max_backups_io_thread_pool_free_size, 0, "Max free size for backups IO thread pool.", 0) \
    M(UInt64, backups_io_thread_pool_queue_size, 0, "Queue size for backups IO thread pool.", 0) \
    M(UInt64, backup_threads, 16, "The maximum number of threads to execute BACKUP requests.", 0) \
+    M(UInt64, max_backup_bandwidth_for_server, 0, "The maximum read speed in bytes per second for all backups on server. Zero means unlimited.", 0) \
    M(UInt64, restore_threads, 16, "The maximum number of threads to execute RESTORE requests.", 0) \
    M(Int32, max_connections, 1024, "Max server connections.", 0) \
    M(UInt32, asynchronous_metrics_update_period_s, 1, "Period in seconds for updating asynchronous metrics.", 0) \
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -100,8 +100,10 @@ class IColumn;
    M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \
    M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited. Only has meaning at server startup.", 0) \
    M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited. Only has meaning at server startup.", 0) \
-    M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited. Only has meaning at server startup.", 0) \
-    M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited. Only has meaning at server startup.", 0) \
+    M(UInt64, max_remote_read_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for read.", 0) \
+    M(UInt64, max_remote_write_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for write.", 0) \
+    M(UInt64, max_local_read_bandwidth, 0, "The maximum speed of local reads in bytes per second.", 0) \
+    M(UInt64, max_local_write_bandwidth, 0, "The maximum speed of local writes in bytes per second.", 0) \
    M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \
    M(String, stream_like_engine_insert_queue, "", "When stream like engine reads from multiple queues, user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.", 0) \
    \
@ -422,6 +424,7 @@ class IColumn;
    M(UInt64, backup_restore_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
    M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \
    M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \
+    M(UInt64, max_backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
    \
    M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
    M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
@ -464,6 +467,7 @@ class IColumn;
    M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
    \
    M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
+    M(Bool, formatdatetime_parsedatetime_m_is_month_name, true, "Formatter '%M' in function 'formatDateTime' produces the month name instead of minutes.", 0) \
    \
    M(UInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.", 0) \
    M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.", 0) \
@ -735,6 +739,7 @@ class IColumn;
 #define MAKE_OBSOLETE(M, TYPE, NAME, DEFAULT) \
    M(TYPE, NAME, DEFAULT, "Obsolete setting, does nothing.", BaseSettingsHelpers::Flags::OBSOLETE)

+/// NOTE: ServerSettings::loadSettingsFromConfig() should be updated to include this settings
 #define MAKE_DEPRECATED_BY_SERVER_CONFIG(M, TYPE, NAME, DEFAULT) \
    M(TYPE, NAME, DEFAULT, "User-level setting is deprecated, and it must be defined in the server configuration instead.", BaseSettingsHelpers::Flags::OBSOLETE)

@ -768,6 +773,8 @@ class IColumn;
    MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_schedule_pool_size, 128) \
    MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_message_broker_schedule_pool_size, 16) \
    MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \
+    MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \
+    MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \
    /* ---- */ \
    MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \
    MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0)                                                                                 \
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@ -101,6 +101,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
               {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
    {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}},
+    {"23.4", {{"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}},
    {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
    {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
              {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@ -1,4 +1,5 @@
 #include "DiskLocal.h"
+#include <Common/Throttler_fwd.h>
 #include <Common/createHardLink.h>
 #include "DiskFactory.h"

@ -367,10 +368,11 @@ std::unique_ptr<ReadBufferFromFileBase> DiskLocal::readFile(const String & path,
 }

 std::unique_ptr<WriteBufferFromFileBase>
-DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings &)
+DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, const WriteSettings & settings)
 {
    int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1;
-    return std::make_unique<WriteBufferFromFile>(fs::path(disk_path) / path, buf_size, flags);
+    return std::make_unique<WriteBufferFromFile>(
+        fs::path(disk_path) / path, buf_size, flags, settings.local_throttler);
 }

 void DiskLocal::removeFile(const String & path)
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@ -76,11 +76,25 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(

        if (settings.local_fs_method == LocalFSReadMethod::read)
        {
-            res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
+            res = std::make_unique<ReadBufferFromFile>(
+                filename,
+                buffer_size,
+                actual_flags,
+                existing_memory,
+                buffer_alignment,
+                file_size,
+                settings.local_throttler);
        }
        else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
        {
-            res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
+            res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(
+                filename,
+                buffer_size,
+                actual_flags,
+                existing_memory,
+                buffer_alignment,
+                file_size,
+                settings.local_throttler);
        }
        else if (settings.local_fs_method == LocalFSReadMethod::io_uring)
        {
@ -90,7 +104,15 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
                throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system");

            res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                *reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
+                *reader,
+                settings.priority,
+                filename,
+                buffer_size,
+                actual_flags,
+                existing_memory,
+                buffer_alignment,
+                file_size,
+                settings.local_throttler);
 #else
            throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Read method io_uring is only supported in Linux");
 #endif
@ -103,7 +125,15 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(

            auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER);
            res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
+                reader,
+                settings.priority,
+                filename,
+                buffer_size,
+                actual_flags,
+                existing_memory,
+                buffer_alignment,
+                file_size,
+                settings.local_throttler);
        }
        else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
        {
@ -113,7 +143,15 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(

            auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER);
            res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
+                reader,
+                settings.priority,
+                filename,
+                buffer_size,
+                actual_flags,
+                existing_memory,
+                buffer_alignment,
+                file_size,
+                settings.local_throttler);
        }
        else
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method");
--- a/src/Formats/CapnProtoUtils.cpp
+++ b/src/Formats/CapnProtoUtils.cpp
@ -9,6 +9,7 @@
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeMap.h>
 #include <DataTypes/IDataType.h>
 #include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/join.hpp>
@ -264,23 +265,25 @@ static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & d
        return false;
    }

-    if (!tuple_data_type->haveExplicitNames())
+    bool have_explicit_names = tuple_data_type->haveExplicitNames();
+    const auto & nested_names = tuple_data_type->getElementNames();
+    for (uint32_t i = 0; i != nested_names.size(); ++i)
    {
-        error_message += "Only named Tuple can be converted to CapnProto Struct";
-        return false;
-    }
-    for (const auto & name : tuple_data_type->getElementNames())
-    {
-        KJ_IF_MAYBE(field, struct_schema.findFieldByName(name))
+        if (have_explicit_names)
        {
-            if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message, name))
+            KJ_IF_MAYBE (field, struct_schema.findFieldByName(nested_names[i]))
+            {
+                if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(nested_names[i])], mode, error_message, nested_names[i]))
+                    return false;
+            }
+            else
+            {
+                error_message += "CapnProto struct doesn't contain a field with name " + nested_names[i];
                return false;
+            }
        }
-        else
-        {
-            error_message += "CapnProto struct doesn't contain a field with name " + name;
+        else if (!checkCapnProtoType(struct_schema.getFields()[i].getType(), nested_types[tuple_data_type->getPositionByName(nested_names[i])], mode, error_message, nested_names[i]))
            return false;
-        }
    }

    return true;
@ -307,41 +310,129 @@ static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & d
    return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message, column_name);
 }

+static bool checkMapType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message)
+{
+    /// We output/input Map type as follow CapnProto schema
+    ///
+    /// struct Map {
+    ///     struct Entry {
+    ///         key @0: Key;
+    ///         value @1: Value;
+    ///     }
+    ///     entries @0 :List(Entry);
+    /// }
+
+    if (!capnp_type.isStruct())
+        return false;
+    auto struct_schema = capnp_type.asStruct();
+
+    if (checkIfStructContainsUnnamedUnion(struct_schema))
+    {
+        error_message += "CapnProto struct contains unnamed union";
+        return false;
+    }
+
+    if (struct_schema.getFields().size() != 1)
+    {
+        error_message += "CapnProto struct that represents Map type can contain only one field";
+        return false;
+    }
+
+    const auto & field_type = struct_schema.getFields()[0].getType();
+    if (!field_type.isList())
+    {
+        error_message += "Field of CapnProto struct that represents Map is not a list";
+        return false;
+    }
+
+    auto list_element_type = field_type.asList().getElementType();
+    if (!list_element_type.isStruct())
+    {
+        error_message += "Field of CapnProto struct that represents Map is not a list of structs";
+        return false;
+    }
+
+    auto key_value_struct = list_element_type.asStruct();
+    if (checkIfStructContainsUnnamedUnion(key_value_struct))
+    {
+        error_message += "CapnProto struct contains unnamed union";
+        return false;
+    }
+
+    if (key_value_struct.getFields().size() != 2)
+    {
+        error_message += "Key-value structure for Map struct should have exactly 2 fields";
+        return false;
+    }
+
+    const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
+    DataTypes types = {map_type.getKeyType(), map_type.getValueType()};
+    Names names = {"key", "value"};
+
+    for (size_t i = 0; i != types.size(); ++i)
+    {
+        KJ_IF_MAYBE(field, key_value_struct.findFieldByName(names[i]))
+        {
+            if (!checkCapnProtoType(field->getType(), types[i], mode, error_message, names[i]))
+                return false;
+        }
+        else
+        {
+            error_message += R"(Key-value structure for Map struct should have exactly 2 fields with names "key" and "value")";
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static bool isCapnInteger(const capnp::Type & capnp_type)
+{
+    return capnp_type.isInt8() || capnp_type.isUInt8() || capnp_type.isInt16() || capnp_type.isUInt16() || capnp_type.isInt32()
+        || capnp_type.isUInt32() || capnp_type.isInt64() || capnp_type.isUInt64();
+}
+
 static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message, const String & column_name)
 {
    switch (data_type->getTypeId())
    {
        case TypeIndex::UInt8:
-            return capnp_type.isBool() || capnp_type.isUInt8();
-        case TypeIndex::Date: [[fallthrough]];
-        case TypeIndex::UInt16:
+            return capnp_type.isBool() || isCapnInteger(capnp_type);
+        case TypeIndex::Int8: [[fallthrough]];
+        case TypeIndex::Int16: [[fallthrough]];
+        case TypeIndex::UInt16: [[fallthrough]];
+        case TypeIndex::Int32: [[fallthrough]];
+        case TypeIndex::UInt32: [[fallthrough]];
+        case TypeIndex::Int64: [[fallthrough]];
+        case TypeIndex::UInt64:
+            /// Allow integer conversions durin input/output.
+            return isCapnInteger(capnp_type);
+        case TypeIndex::Date:
            return capnp_type.isUInt16();
        case TypeIndex::DateTime: [[fallthrough]];
-        case TypeIndex::IPv4: [[fallthrough]];
-        case TypeIndex::UInt32:
+        case TypeIndex::IPv4:
            return capnp_type.isUInt32();
-        case TypeIndex::UInt64:
-            return capnp_type.isUInt64();
-        case TypeIndex::Int8:
-            return capnp_type.isInt8();
-        case TypeIndex::Int16:
-            return capnp_type.isInt16();
        case TypeIndex::Date32: [[fallthrough]];
-        case TypeIndex::Decimal32: [[fallthrough]];
-        case TypeIndex::Int32:
-            return capnp_type.isInt32();
+        case TypeIndex::Decimal32:
+            return capnp_type.isInt32() || capnp_type.isUInt32();
        case TypeIndex::DateTime64: [[fallthrough]];
-        case TypeIndex::Decimal64: [[fallthrough]];
-        case TypeIndex::Int64:
-            return capnp_type.isInt64();
-        case TypeIndex::Float32:
-            return capnp_type.isFloat32();
+        case TypeIndex::Decimal64:
+            return capnp_type.isInt64() || capnp_type.isUInt64();
+        case TypeIndex::Float32:[[fallthrough]];
        case TypeIndex::Float64:
-            return capnp_type.isFloat64();
+            /// Allow converting between Float32 and isFloat64
+            return capnp_type.isFloat32() || capnp_type.isFloat64();
        case TypeIndex::Enum8:
            return checkEnums<Int8>(capnp_type, data_type, mode, INT8_MAX, error_message);
        case TypeIndex::Enum16:
            return checkEnums<Int16>(capnp_type, data_type, mode, INT16_MAX, error_message);
+        case TypeIndex::Int128: [[fallthrough]];
+        case TypeIndex::UInt128: [[fallthrough]];
+        case TypeIndex::Int256: [[fallthrough]];
+        case TypeIndex::UInt256: [[fallthrough]];
+        case TypeIndex::Decimal128: [[fallthrough]];
+        case TypeIndex::Decimal256:
+            return capnp_type.isData();
        case TypeIndex::Tuple:
            return checkTupleType(capnp_type, data_type, mode, error_message);
        case TypeIndex::Nullable:
@ -359,6 +450,8 @@ static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr
        case TypeIndex::IPv6: [[fallthrough]];
        case TypeIndex::String:
            return capnp_type.isText() || capnp_type.isData();
+        case TypeIndex::Map:
+            return checkMapType(capnp_type, data_type, mode, error_message);
        default:
            return false;
    }
--- a/src/Formats/ProtobufSchemas.cpp
+++ b/src/Formats/ProtobufSchemas.cpp
@ -41,8 +41,19 @@ public:
            return descriptor;

        const auto * file_descriptor = importer.Import(schema_path);
-        // If there are parsing errors, AddError() throws an exception and in this case the following line
-        // isn't executed.
+        if (error)
+        {
+            auto info = error.value();
+            error.reset();
+            throw Exception(
+                ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA,
+                "Cannot parse '{}' file, found an error at line {}, column {}, {}",
+                info.filename,
+                std::to_string(info.line),
+                std::to_string(info.column),
+                info.message);
+        }
+
        assert(file_descriptor);

        if (with_envelope == WithEnvelope::No)
@ -74,14 +85,24 @@ private:
    // Overrides google::protobuf::compiler::MultiFileErrorCollector:
    void AddError(const String & filename, int line, int column, const String & message) override
    {
-        throw Exception(ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA,
-                        "Cannot parse '{}' file, found an error at line {}, column {}, {}",
-                        filename, std::to_string(line), std::to_string(column), message);
+        /// Protobuf library code is not exception safe, we should
+        /// remember the error and throw it later from our side.
+        error = ErrorInfo{filename, line, column, message};
    }

    google::protobuf::compiler::DiskSourceTree disk_source_tree;
    google::protobuf::compiler::Importer importer;
    const WithEnvelope with_envelope;
+
+    struct ErrorInfo
+    {
+        String filename;
+        int line;
+        int column;
+        String message;
+    };
+
+    std::optional<ErrorInfo> error;
 };


--- a/src/Formats/ProtobufSerializer.cpp
+++ b/src/Formats/ProtobufSerializer.cpp
@ -3453,15 +3453,35 @@ namespace
                    const auto & tuple_data_type = assert_cast<const DataTypeTuple &>(*data_type);
                    size_t size_of_tuple = tuple_data_type.getElements().size();

-                    if (tuple_data_type.haveExplicitNames() && field_descriptor.message_type())
+                    if (const auto * message_type = field_descriptor.message_type())
                    {
+                        bool have_explicit_names = tuple_data_type.haveExplicitNames();
+                        Names element_names;
+                        if (have_explicit_names)
+                        {
+                            element_names = tuple_data_type.getElementNames();
+                        }
+                        else
+                        {
+                            /// Match unnamed Tuple elements and Message fields by position.
+                            size_t field_count = message_type->field_count();
+                            if (field_count != size_of_tuple)
+                                throw Exception(
+                                    ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS,
+                                    "The number of fields in Protobuf message ({}) is not equal to the number of elements in unnamed Tuple ({})",
+                                    field_count,
+                                    size_of_tuple);
+                            for (size_t i = 0; i != field_count; ++i)
+                                element_names.push_back(message_type->field(static_cast<int>(i))->name());
+                        }
+
                        /// Try to serialize as a nested message.
                        std::vector<size_t> used_column_indices;
                        auto message_serializer = buildMessageSerializerImpl(
                            size_of_tuple,
-                            tuple_data_type.getElementNames().data(),
+                            element_names.data(),
                            tuple_data_type.getElements().data(),
-                            *field_descriptor.message_type(),
+                            *message_type,
                            /* with_length_delimiter = */ false,
                            google_wrappers_special_treatment,
                            &field_descriptor,
--- a/src/Functions/FunctionStringReplace.h
+++ b/src/Functions/FunctionStringReplace.h
@ -5,6 +5,7 @@
 #include <Columns/ColumnConst.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>


 namespace DB
@ -13,16 +14,14 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int ILLEGAL_COLUMN;
-    extern const int ARGUMENT_OUT_OF_BOUND;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }

-
 template <typename Impl, typename Name>
 class FunctionStringReplace : public IFunction
 {
 public:
    static constexpr auto name = Name::name;
+
    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStringReplace>(); }

    String getName() const override { return name; }
@ -32,65 +31,80 @@ public:
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }

    bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }

-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
    {
-        if (!isStringOrFixedString(arguments[0]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of first argument of function {}",
-                arguments[0]->getName(), getName());
+        FunctionArgumentDescriptors args{
+            {"haystack", &isStringOrFixedString<IDataType>, nullptr, "String or FixedString"},
+            {"pattern", &isString<IDataType>, nullptr, "String"},
+            {"replacement", &isString<IDataType>, nullptr, "String"}
+        };

-        if (!isStringOrFixedString(arguments[1]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of second argument of function {}",
-                arguments[1]->getName(), getName());
-
-        if (!isStringOrFixedString(arguments[2]))
-            throw Exception(
-                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Illegal type {} of third argument of function {}",
-                arguments[2]->getName(), getName());
+        validateFunctionArgumentTypes(*this, arguments, args);

        return std::make_shared<DataTypeString>();
    }

    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
    {
-        const ColumnPtr column_src = arguments[0].column;
+        const ColumnPtr column_haystack = arguments[0].column;
        const ColumnPtr column_needle = arguments[1].column;
        const ColumnPtr column_replacement = arguments[2].column;

-        if (!isColumnConst(*column_needle) || !isColumnConst(*column_replacement))
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN,
-                "2nd and 3rd arguments of function {} must be constants.",
-                getName());
+        const ColumnString * col_haystack = checkAndGetColumn<ColumnString>(column_haystack.get());
+        const ColumnFixedString * col_haystack_fixed = checkAndGetColumn<ColumnFixedString>(column_haystack.get());

-        const IColumn * c1 = arguments[1].column.get();
-        const IColumn * c2 = arguments[2].column.get();
-        const ColumnConst * c1_const = typeid_cast<const ColumnConst *>(c1);
-        const ColumnConst * c2_const = typeid_cast<const ColumnConst *>(c2);
-        String needle = c1_const->getValue<String>();
-        String replacement = c2_const->getValue<String>();
+        const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(column_needle.get());
+        const ColumnConst * col_needle_const = checkAndGetColumn<ColumnConst>(column_needle.get());

-        if (needle.empty())
-            throw Exception(
-                ErrorCodes::ARGUMENT_OUT_OF_BOUND,
-                "Length of the second argument of function replace must be greater than 0.");
+        const ColumnString * col_replacement_vector = checkAndGetColumn<ColumnString>(column_replacement.get());
+        const ColumnConst * col_replacement_const = checkAndGetColumn<ColumnConst>(column_replacement.get());

-        if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_src.get()))
+        auto col_res = ColumnString::create();
+
+        if (col_haystack && col_needle_const && col_replacement_const)
        {
-            auto col_res = ColumnString::create();
-            Impl::vector(col->getChars(), col->getOffsets(), needle, replacement, col_res->getChars(), col_res->getOffsets());
+            Impl::vectorConstantConstant(
+                col_haystack->getChars(), col_haystack->getOffsets(),
+                col_needle_const->getValue<String>(),
+                col_replacement_const->getValue<String>(),
+                col_res->getChars(), col_res->getOffsets());
            return col_res;
        }
-        else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_src.get()))
+        else if (col_haystack && col_needle_vector && col_replacement_const)
        {
-            auto col_res = ColumnString::create();
-            Impl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), needle, replacement, col_res->getChars(), col_res->getOffsets());
+            Impl::vectorVectorConstant(
+                col_haystack->getChars(), col_haystack->getOffsets(),
+                col_needle_vector->getChars(), col_needle_vector->getOffsets(),
+                col_replacement_const->getValue<String>(),
+                col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else if (col_haystack && col_needle_const && col_replacement_vector)
+        {
+            Impl::vectorConstantVector(
+                col_haystack->getChars(), col_haystack->getOffsets(),
+                col_needle_const->getValue<String>(),
+                col_replacement_vector->getChars(), col_replacement_vector->getOffsets(),
+                col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else if (col_haystack && col_needle_vector && col_replacement_vector)
+        {
+            Impl::vectorVectorVector(
+                col_haystack->getChars(), col_haystack->getOffsets(),
+                col_needle_vector->getChars(), col_needle_vector->getOffsets(),
+                col_replacement_vector->getChars(), col_replacement_vector->getOffsets(),
+                col_res->getChars(), col_res->getOffsets());
+            return col_res;
+        }
+        else if (col_haystack_fixed && col_needle_const && col_replacement_const)
+        {
+            Impl::vectorFixedConstantConstant(
+                col_haystack_fixed->getChars(), col_haystack_fixed->getN(),
+                col_needle_const->getValue<String>(),
+                col_replacement_const->getValue<String>(),
+                col_res->getChars(), col_res->getOffsets());
            return col_res;
        }
        else
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@ -41,6 +41,7 @@
 #include <Columns/ColumnsCommon.h>
 #include <Columns/ColumnStringHelpers.h>
 #include <Common/assert_cast.h>
+#include <Common/Concepts.h>
 #include <Common/quoteString.h>
 #include <Common/Exception.h>
 #include <Core/AccurateComparison.h>
--- a/src/Functions/FunctionsJSON.cpp
+++ b/src/Functions/FunctionsJSON.cpp
--- a/src/Functions/FunctionsJSON.h
+++ b/src/Functions/FunctionsJSON.h
--- a/src/Functions/ReplaceRegexpImpl.h
+++ b/src/Functions/ReplaceRegexpImpl.h
@ -13,6 +13,7 @@ namespace DB

 namespace ErrorCodes
 {
+    extern const int ARGUMENT_OUT_OF_BOUND;
    extern const int BAD_ARGUMENTS;
 }

@ -28,9 +29,11 @@ struct ReplaceRegexpTraits
 /** Replace all matches of regexp 'needle' to string 'replacement'. 'needle' and 'replacement' are constants.
  * 'replacement' can contain substitutions, for example: '\2-\3-\1'
  */
-template <ReplaceRegexpTraits::Replace replace>
+template <typename Name, ReplaceRegexpTraits::Replace replace>
 struct ReplaceRegexpImpl
 {
+    static constexpr auto name = Name::name;
+
    struct Instruction
    {
        /// If not negative, perform substitution of n-th subpattern from the regexp match.
@ -162,18 +165,21 @@ struct ReplaceRegexpImpl
        ++res_offset;
    }

-    static void vector(
-        const ColumnString::Chars & data,
-        const ColumnString::Offsets & offsets,
+    static void vectorConstantConstant(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
        const String & needle,
        const String & replacement,
        ColumnString::Chars & res_data,
        ColumnString::Offsets & res_offsets)
    {
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
        ColumnString::Offset res_offset = 0;
-        res_data.reserve(data.size());
-        size_t size = offsets.size();
-        res_offsets.resize(size);
+        res_data.reserve(haystack_data.size());
+        size_t haystack_size = haystack_offsets.size();
+        res_offsets.resize(haystack_size);

        re2_st::RE2::Options regexp_options;
        /// Don't write error messages to stderr.
@ -182,39 +188,89 @@ struct ReplaceRegexpImpl
        re2_st::RE2 searcher(needle, regexp_options);

        if (!searcher.ok())
-            throw Exception(
-                ErrorCodes::BAD_ARGUMENTS,
-                "The pattern argument is not a valid re2 pattern: {}",
-                searcher.error());
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());

        int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);

        Instructions instructions = createInstructions(replacement, num_captures);

        /// Cannot perform search for whole columns. Will process each string separately.
-        for (size_t i = 0; i < size; ++i)
+        for (size_t i = 0; i < haystack_size; ++i)
        {
-            size_t from = i > 0 ? offsets[i - 1] : 0;
-            const char * haystack_data = reinterpret_cast<const char *>(data.data() + from);
-            const size_t haystack_length = static_cast<unsigned>(offsets[i] - from - 1);
+            size_t from = i > 0 ? haystack_offsets[i - 1] : 0;

-            processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions);
+            const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + from);
+            const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - from - 1);
+
+            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
            res_offsets[i] = res_offset;
        }
    }

-    static void vectorFixed(
-        const ColumnString::Chars & data,
-        size_t n,
-        const String & needle,
+    static void vectorVectorConstant(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const ColumnString::Chars & needle_data,
+        const ColumnString::Offsets & needle_offsets,
        const String & replacement,
        ColumnString::Chars & res_data,
        ColumnString::Offsets & res_offsets)
    {
+        assert(haystack_offsets.size() == needle_offsets.size());
+
        ColumnString::Offset res_offset = 0;
-        size_t size = data.size() / n;
-        res_data.reserve(data.size());
-        res_offsets.resize(size);
+        res_data.reserve(haystack_data.size());
+        size_t haystack_size = haystack_offsets.size();
+        res_offsets.resize(haystack_size);
+
+        re2_st::RE2::Options regexp_options;
+        /// Don't write error messages to stderr.
+        regexp_options.set_log_errors(false);
+
+        /// Cannot perform search for whole columns. Will process each string separately.
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
+            const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
+            const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
+
+            size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0;
+            const char * ndl_data = reinterpret_cast<const char *>(needle_data.data() + ndl_from);
+            const size_t ndl_length = static_cast<unsigned>(needle_offsets[i] - ndl_from - 1);
+            std::string_view needle(ndl_data, ndl_length);
+
+            if (needle.empty())
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+            re2_st::RE2 searcher(needle, regexp_options);
+            if (!searcher.ok())
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
+            int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
+            Instructions instructions = createInstructions(replacement, num_captures);
+
+            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
+            res_offsets[i] = res_offset;
+        }
+    }
+
+    static void vectorConstantVector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const String & needle,
+        const ColumnString::Chars & replacement_data,
+        const ColumnString::Offsets & replacement_offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        assert(haystack_offsets.size() == replacement_offsets.size());
+
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+        ColumnString::Offset res_offset = 0;
+        res_data.reserve(haystack_data.size());
+        size_t haystack_size = haystack_offsets.size();
+        res_offsets.resize(haystack_size);

        re2_st::RE2::Options regexp_options;
        /// Don't write error messages to stderr.
@ -223,22 +279,116 @@ struct ReplaceRegexpImpl
        re2_st::RE2 searcher(needle, regexp_options);

        if (!searcher.ok())
-            throw Exception(
-                ErrorCodes::BAD_ARGUMENTS,
-                "The pattern argument is not a valid re2 pattern: {}",
-                searcher.error());
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
+
+        int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
+
+        /// Cannot perform search for whole columns. Will process each string separately.
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
+            const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
+            const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
+
+            size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
+            const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
+            const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
+
+            Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
+
+            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
+            res_offsets[i] = res_offset;
+        }
+    }
+
+    static void vectorVectorVector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const ColumnString::Chars & needle_data,
+        const ColumnString::Offsets & needle_offsets,
+        const ColumnString::Chars & replacement_data,
+        const ColumnString::Offsets & replacement_offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        assert(haystack_offsets.size() == needle_offsets.size());
+        assert(needle_offsets.size() == replacement_offsets.size());
+
+        ColumnString::Offset res_offset = 0;
+        res_data.reserve(haystack_data.size());
+        size_t haystack_size = haystack_offsets.size();
+        res_offsets.resize(haystack_size);
+
+        re2_st::RE2::Options regexp_options;
+        /// Don't write error messages to stderr.
+        regexp_options.set_log_errors(false);
+
+        /// Cannot perform search for whole columns. Will process each string separately.
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
+            const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
+            const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
+
+            size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0;
+            const char * ndl_data = reinterpret_cast<const char *>(needle_data.data() + ndl_from);
+            const size_t ndl_length = static_cast<unsigned>(needle_offsets[i] - ndl_from - 1);
+            std::string_view needle(ndl_data, ndl_length);
+
+            if (needle.empty())
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+            size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
+            const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
+            const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
+
+            re2_st::RE2 searcher(needle, regexp_options);
+            if (!searcher.ok())
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
+            int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
+            Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
+
+            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
+            res_offsets[i] = res_offset;
+        }
+    }
+
+    static void vectorFixedConstantConstant(
+        const ColumnString::Chars & haystack_data,
+        size_t n,
+        const String & needle,
+        const String & replacement,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+        ColumnString::Offset res_offset = 0;
+        size_t haystack_size = haystack_data.size() / n;
+        res_data.reserve(haystack_data.size());
+        res_offsets.resize(haystack_size);
+
+        re2_st::RE2::Options regexp_options;
+        /// Don't write error messages to stderr.
+        regexp_options.set_log_errors(false);
+
+        re2_st::RE2 searcher(needle, regexp_options);
+
+        if (!searcher.ok())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());

        int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);

        Instructions instructions = createInstructions(replacement, num_captures);

-        for (size_t i = 0; i < size; ++i)
+        for (size_t i = 0; i < haystack_size; ++i)
        {
            size_t from = i * n;
-            const char * haystack_data = reinterpret_cast<const char *>(data.data() + from);
-            const size_t haystack_length = n;
+            const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + from);
+            const size_t hs_length = n;

-            processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions);
+            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
            res_offsets[i] = res_offset;
        }
    }
--- a/src/Functions/ReplaceStringImpl.h
+++ b/src/Functions/ReplaceStringImpl.h
@ -8,6 +8,11 @@
 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int ARGUMENT_OUT_OF_BOUND;
+}
+
 struct ReplaceStringTraits
 {
    enum class Replace
@ -16,27 +21,33 @@ struct ReplaceStringTraits
        All
    };
 };
-/** Replace one or all occurencies of substring 'needle' to 'replacement'. 'needle' and 'replacement' are constants.
+
+/** Replace one or all occurencies of substring 'needle' to 'replacement'.
  */
-template <ReplaceStringTraits::Replace replace>
+template <typename Name, ReplaceStringTraits::Replace replace>
 struct ReplaceStringImpl
 {
-    static void vector(
-        const ColumnString::Chars & data,
-        const ColumnString::Offsets & offsets,
-        const std::string & needle,
-        const std::string & replacement,
+    static constexpr auto name = Name::name;
+
+    static void vectorConstantConstant(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const String & needle,
+        const String & replacement,
        ColumnString::Chars & res_data,
        ColumnString::Offsets & res_offsets)
    {
-        const UInt8 * begin = data.data();
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+        const UInt8 * const begin = haystack_data.data();
+        const UInt8 * const end = haystack_data.data() + haystack_data.size();
        const UInt8 * pos = begin;
-        const UInt8 * end = pos + data.size();

        ColumnString::Offset res_offset = 0;
-        res_data.reserve(data.size());
-        size_t size = offsets.size();
-        res_offsets.resize(size);
+        res_data.reserve(haystack_data.size());
+        const size_t haystack_size = haystack_offsets.size();
+        res_offsets.resize(haystack_size);

        /// The current index in the array of strings.
        size_t i = 0;
@ -53,22 +64,22 @@ struct ReplaceStringImpl
            memcpy(&res_data[res_offset], pos, match - pos);

            /// Determine which index it belongs to.
-            while (i < offsets.size() && begin + offsets[i] <= match)
+            while (i < haystack_offsets.size() && begin + haystack_offsets[i] <= match)
            {
-                res_offsets[i] = res_offset + ((begin + offsets[i]) - pos);
+                res_offsets[i] = res_offset + ((begin + haystack_offsets[i]) - pos);
                ++i;
            }
            res_offset += (match - pos);

            /// If you have reached the end, it's time to stop
-            if (i == offsets.size())
+            if (i == haystack_offsets.size())
                break;

            /// Is it true that this string no longer needs to perform transformations.
            bool can_finish_current_string = false;

            /// We check that the entry does not go through the boundaries of strings.
-            if (match + needle.size() < begin + offsets[i])
+            if (match + needle.size() < begin + haystack_offsets[i])
            {
                res_data.resize(res_data.size() + replacement.size());
                memcpy(&res_data[res_offset], replacement.data(), replacement.size());
@ -85,34 +96,268 @@ struct ReplaceStringImpl

            if (can_finish_current_string)
            {
-                res_data.resize(res_data.size() + (begin + offsets[i] - pos));
-                memcpy(&res_data[res_offset], pos, (begin + offsets[i] - pos));
-                res_offset += (begin + offsets[i] - pos);
+                res_data.resize(res_data.size() + (begin + haystack_offsets[i] - pos));
+                memcpy(&res_data[res_offset], pos, (begin + haystack_offsets[i] - pos));
+                res_offset += (begin + haystack_offsets[i] - pos);
                res_offsets[i] = res_offset;
-                pos = begin + offsets[i];
+                pos = begin + haystack_offsets[i];
                ++i;
            }
        }
    }

-    /// Note: this function converts fixed-length strings to variable-length strings
-    ///       and each variable-length string should ends with zero byte.
-    static void vectorFixed(
-        const ColumnString::Chars & data,
-        size_t n,
-        const std::string & needle,
-        const std::string & replacement,
+    template <typename CharT>
+    requires (sizeof(CharT) == 1)
+    static void copyToOutput(
+        const CharT * what_start, size_t what_size,
+        ColumnString::Chars & output, ColumnString::Offset & output_offset)
+    {
+        output.resize(output.size() + what_size);
+        memcpy(&output[output_offset], what_start, what_size);
+        output_offset += what_size;
+    }
+
+    static void vectorVectorConstant(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const ColumnString::Chars & needle_data,
+        const ColumnString::Offsets & needle_offsets,
+        const String & replacement,
        ColumnString::Chars & res_data,
        ColumnString::Offsets & res_offsets)
    {
-        const UInt8 * begin = data.data();
-        const UInt8 * pos = begin;
-        const UInt8 * end = pos + data.size();
+        chassert(haystack_offsets.size() == needle_offsets.size());
+
+        const size_t haystack_size = haystack_offsets.size();
+
+        res_data.reserve(haystack_data.size());
+        res_offsets.resize(haystack_size);

        ColumnString::Offset res_offset = 0;
-        size_t count = data.size() / n;
-        res_data.reserve(data.size());
-        res_offsets.resize(count);
+
+        size_t prev_haystack_offset = 0;
+        size_t prev_needle_offset = 0;
+
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
+            const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
+
+            const auto * const cur_needle_data = &needle_data[prev_needle_offset];
+            const size_t cur_needle_length = needle_offsets[i] - prev_needle_offset - 1;
+
+            if (cur_needle_length == 0)
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+            /// Using "slow" "stdlib searcher instead of Volnitsky because there is a different pattern in each row
+            StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(cur_needle_data, cur_needle_length);
+
+            const auto * last_match = static_cast<UInt8 *>(nullptr);
+            const auto * start_pos = cur_haystack_data;
+            const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length;
+
+            while (start_pos < cur_haystack_end)
+            {
+                if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end)
+                {
+                    /// Copy prefix before match
+                    copyToOutput(start_pos, match - start_pos, res_data, res_offset);
+
+                    /// Insert replacement for match
+                    copyToOutput(replacement.data(), replacement.size(), res_data, res_offset);
+
+                    last_match = match;
+                    start_pos = match + cur_needle_length;
+
+                    if constexpr (replace == ReplaceStringTraits::Replace::First)
+                        break;
+                }
+                else
+                    break;
+            }
+
+            /// Copy suffix after last match
+            size_t bytes = (last_match == nullptr) ? (cur_haystack_end - cur_haystack_data + 1)
+                                                   : (cur_haystack_end - last_match - cur_needle_length + 1);
+            copyToOutput(start_pos, bytes, res_data, res_offset);
+
+            res_offsets[i] = res_offset;
+
+            prev_haystack_offset = haystack_offsets[i];
+            prev_needle_offset = needle_offsets[i];
+        }
+    }
+
+    static void vectorConstantVector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const String & needle,
+        const ColumnString::Chars & replacement_data,
+        const ColumnString::Offsets & replacement_offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        chassert(haystack_offsets.size() == replacement_offsets.size());
+
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+        const size_t haystack_size = haystack_offsets.size();
+
+        res_data.reserve(haystack_data.size());
+        res_offsets.resize(haystack_size);
+
+        ColumnString::Offset res_offset = 0;
+
+        size_t prev_haystack_offset = 0;
+        size_t prev_replacement_offset = 0;
+
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
+            const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
+
+            const auto * const cur_replacement_data = &replacement_data[prev_replacement_offset];
+            const size_t cur_replacement_length = replacement_offsets[i] - prev_replacement_offset - 1;
+
+            /// Using "slow" "stdlib searcher instead of Volnitsky just to keep things simple
+            StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(needle.data(), needle.size());
+
+            const auto * last_match = static_cast<UInt8 *>(nullptr);
+            const auto * start_pos = cur_haystack_data;
+            const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length;
+
+            while (start_pos < cur_haystack_end)
+            {
+                if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end)
+                {
+                    /// Copy prefix before match
+                    copyToOutput(start_pos, match - start_pos, res_data, res_offset);
+
+                    /// Insert replacement for match
+                    copyToOutput(cur_replacement_data, cur_replacement_length, res_data, res_offset);
+
+                    last_match = match;
+                    start_pos = match + needle.size();
+
+                    if constexpr (replace == ReplaceStringTraits::Replace::First)
+                        break;
+                }
+                else
+                    break;
+            }
+
+            /// Copy suffix after last match
+            size_t bytes = (last_match == nullptr) ? (cur_haystack_end - cur_haystack_data + 1)
+                                                   : (cur_haystack_end - last_match - needle.size() + 1);
+            copyToOutput(start_pos, bytes, res_data, res_offset);
+
+            res_offsets[i] = res_offset;
+
+            prev_haystack_offset = haystack_offsets[i];
+            prev_replacement_offset = replacement_offsets[i];
+        }
+    }
+
+    static void vectorVectorVector(
+        const ColumnString::Chars & haystack_data,
+        const ColumnString::Offsets & haystack_offsets,
+        const ColumnString::Chars & needle_data,
+        const ColumnString::Offsets & needle_offsets,
+        const ColumnString::Chars & replacement_data,
+        const ColumnString::Offsets & replacement_offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        chassert(haystack_offsets.size() == needle_offsets.size());
+        chassert(needle_offsets.size() == replacement_offsets.size());
+
+        const size_t haystack_size = haystack_offsets.size();
+
+        res_data.reserve(haystack_data.size());
+        res_offsets.resize(haystack_size);
+
+        ColumnString::Offset res_offset = 0;
+
+        size_t prev_haystack_offset = 0;
+        size_t prev_needle_offset = 0;
+        size_t prev_replacement_offset = 0;
+
+        for (size_t i = 0; i < haystack_size; ++i)
+        {
+            const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
+            const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
+
+            const auto * const cur_needle_data = &needle_data[prev_needle_offset];
+            const size_t cur_needle_length = needle_offsets[i] - prev_needle_offset - 1;
+
+            const auto * const cur_replacement_data = &replacement_data[prev_replacement_offset];
+            const size_t cur_replacement_length = replacement_offsets[i] - prev_replacement_offset - 1;
+
+            if (cur_needle_length == 0)
+                throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+            /// Using "slow" "stdlib searcher instead of Volnitsky because there is a different pattern in each row
+            StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(cur_needle_data, cur_needle_length);
+
+            const auto * last_match = static_cast<UInt8 *>(nullptr);
+            const auto * start_pos = cur_haystack_data;
+            const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length;
+
+            while (start_pos < cur_haystack_end)
+            {
+                if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end)
+                {
+                    /// Copy prefix before match
+                    copyToOutput(start_pos, match - start_pos, res_data, res_offset);
+
+                    /// Insert replacement for match
+                    copyToOutput(cur_replacement_data, cur_replacement_length, res_data, res_offset);
+
+                    last_match = match;
+                    start_pos = match + cur_needle_length;
+
+                    if constexpr (replace == ReplaceStringTraits::Replace::First)
+                        break;
+                }
+                else
+                    break;
+            }
+
+            /// Copy suffix after last match
+            size_t bytes = (last_match == nullptr) ? (cur_haystack_end - cur_haystack_data + 1)
+                                                   : (cur_haystack_end - last_match - cur_needle_length + 1);
+            copyToOutput(start_pos, bytes, res_data, res_offset);
+
+            res_offsets[i] = res_offset;
+
+            prev_haystack_offset = haystack_offsets[i];
+            prev_needle_offset = needle_offsets[i];
+            prev_replacement_offset = replacement_offsets[i];
+        }
+    }
+
+    /// Note: this function converts fixed-length strings to variable-length strings
+    ///       and each variable-length string should ends with zero byte.
+    static void vectorFixedConstantConstant(
+        const ColumnString::Chars & haystack_data,
+        size_t n,
+        const String & needle,
+        const String & replacement,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (needle.empty())
+            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
+
+        const UInt8 * const begin = haystack_data.data();
+        const UInt8 * const end = haystack_data.data() + haystack_data.size();
+        const UInt8 * pos = begin;
+
+        ColumnString::Offset res_offset = 0;
+        size_t haystack_size = haystack_data.size() / n;
+        res_data.reserve(haystack_data.size());
+        res_offsets.resize(haystack_size);

        /// The current index in the string array.
        size_t i = 0;
@ -139,13 +384,13 @@ struct ReplaceStringImpl

            /// Copy skipped strings without any changes but
            /// add zero byte to the end of each string.
-            while (i < count && begin + n * (i + 1) <= match)
+            while (i < haystack_size && begin + n * (i + 1) <= match)
            {
                COPY_REST_OF_CURRENT_STRING();
            }

            /// If you have reached the end, it's time to stop
-            if (i == count)
+            if (i == haystack_size)
                break;

            /// Copy unchanged part of current string.
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@ -48,7 +48,7 @@ namespace
    const std::unordered_map<String, std::pair<String, Int32>> monthMap{
        {"jan", {"uary", 1}},
        {"feb", {"ruary", 2}},
-        {"mar", {"rch", 3}},
+        {"mar", {"ch", 3}},
        {"apr", {"il", 4}},
        {"may", {"", 5}},
        {"jun", {"e", 6}},
@ -101,16 +101,16 @@ namespace
        bool is_year_of_era = false; /// If true, year is calculated from era and year of era, the latter cannot be zero or negative.
        bool has_year = false; /// Whether year was explicitly specified.

-        /// If is_clock_hour = true, is_hour_of_half_day = true, hour's range is [1, 12]
-        /// If is_clock_hour = true, is_hour_of_half_day = false, hour's range is [1, 24]
-        /// If is_clock_hour = false, is_hour_of_half_day = true, hour's range is [0, 11]
-        /// If is_clock_hour = false, is_hour_of_half_day = false, hour's range is [0, 23]
+        /// If hour_starts_at_1 = true, is_hour_of_half_day = true, hour's range is [1, 12]
+        /// If hour_starts_at_1 = true, is_hour_of_half_day = false, hour's range is [1, 24]
+        /// If hour_starts_at_1 = false, is_hour_of_half_day = true, hour's range is [0, 11]
+        /// If hour_starts_at_1 = false, is_hour_of_half_day = false, hour's range is [0, 23]
        Int32 hour = 0;
        Int32 minute = 0; /// range [0, 59]
        Int32 second = 0; /// range [0, 59]

        bool is_am = true; /// If is_hour_of_half_day = true and is_am = false (i.e. pm) then add 12 hours to the result DateTime
-        bool is_clock_hour = false; /// Whether the hour is clockhour
+        bool hour_starts_at_1 = false; /// Whether the hour is clockhour
        bool is_hour_of_half_day = false; /// Whether the hour is of half day

        bool has_time_zone_offset = false; /// If true, time zone offset is explicitly specified.
@ -137,7 +137,7 @@ namespace
            second = 0;

            is_am = true;
-            is_clock_hour = false;
+            hour_starts_at_1 = false;
            is_hour_of_half_day = false;

            has_time_zone_offset = false;
@ -275,23 +275,23 @@ namespace
                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Unknown half day of day: {}", text);
        }

-        void setHour(Int32 hour_, bool is_hour_of_half_day_ = false, bool is_clock_hour_ = false)
+        void setHour(Int32 hour_, bool is_hour_of_half_day_ = false, bool hour_starts_at_1_ = false)
        {
            Int32 max_hour;
            Int32 min_hour;
            Int32 new_hour = hour_;
-            if (!is_hour_of_half_day_ && !is_clock_hour_)
+            if (!is_hour_of_half_day_ && !hour_starts_at_1_)
            {
                max_hour = 23;
                min_hour = 0;
            }
-            else if (!is_hour_of_half_day_ && is_clock_hour_)
+            else if (!is_hour_of_half_day_ && hour_starts_at_1_)
            {
                max_hour = 24;
                min_hour = 1;
                new_hour = hour_ % 24;
            }
-            else if (is_hour_of_half_day_ && !is_clock_hour_)
+            else if (is_hour_of_half_day_ && !hour_starts_at_1_)
            {
                max_hour = 11;
                min_hour = 0;
@ -306,16 +306,16 @@ namespace
            if (hour_ < min_hour || hour_ > max_hour)
                throw Exception(
                    ErrorCodes::CANNOT_PARSE_DATETIME,
-                    "Value {} for hour must be in the range [{}, {}] if_hour_of_half_day={} and is_clock_hour={}",
+                    "Value {} for hour must be in the range [{}, {}] if_hour_of_half_day={} and hour_starts_at_1={}",
                    hour,
                    max_hour,
                    min_hour,
                    is_hour_of_half_day_,
-                    is_clock_hour_);
+                    hour_starts_at_1_);

            hour = new_hour;
            is_hour_of_half_day = is_hour_of_half_day_;
-            is_clock_hour = is_clock_hour_;
+            hour_starts_at_1 = hour_starts_at_1_;
        }

        void setMinute(Int32 minute_)
@ -464,8 +464,15 @@ namespace
    class FunctionParseDateTimeImpl : public IFunction
    {
    public:
+        const bool mysql_M_is_month_name;
+
        static constexpr auto name = Name::name;
-        static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionParseDateTimeImpl>(); }
+        static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionParseDateTimeImpl>(context); }
+
+        explicit FunctionParseDateTimeImpl(ContextPtr context)
+            : mysql_M_is_month_name(context->getSettings().formatdatetime_parsedatetime_m_is_month_name)
+        {
+        }

        String getName() const override { return name; }

@ -768,6 +775,38 @@ namespace
                return cur;
            }

+            static Pos mysqlMonthOfYearTextLong(Pos cur, Pos end, const String & fragment, DateTime & date)
+            {
+                checkSpace(cur, end, 3, "mysqlMonthOfYearTextLong requires size >= 3", fragment);
+                String text1(cur, 3);
+                boost::to_lower(text1);
+                auto it = monthMap.find(text1);
+                if (it == monthMap.end())
+                    throw Exception(
+                        ErrorCodes::CANNOT_PARSE_DATETIME,
+                        "Unable to parse first part of fragment {} from {} because of unknown month of year text: {}",
+                        fragment,
+                        std::string_view(cur, end - cur),
+                        text1);
+                cur += 3;
+
+                size_t expected_remaining_size = it->second.first.size();
+                checkSpace(cur, end, expected_remaining_size, "mysqlMonthOfYearTextLong requires the second parg size >= " + std::to_string(expected_remaining_size), fragment);
+                String text2(cur, expected_remaining_size);
+                boost::to_lower(text2);
+                if (text2 != it->second.first)
+                    throw Exception(
+                        ErrorCodes::CANNOT_PARSE_DATETIME,
+                        "Unable to parse second part of fragment {} from {} because of unknown month of year text: {}",
+                        fragment,
+                        std::string_view(cur, end - cur),
+                        text1 + text2);
+                cur += expected_remaining_size;
+
+                date.setMonth(it->second.second);
+                return cur;
+            }
+
            static Pos mysqlMonth(Pos cur, Pos end, const String & fragment, DateTime & date)
            {
                Int32 month;
@ -900,7 +939,7 @@ namespace

            static Pos mysqlDayOfWeekTextLong(Pos cur, Pos end, const String & fragment, DateTime & date)
            {
-                checkSpace(cur, end, 6, "jodaDayOfWeekText requires size >= 6", fragment);
+                checkSpace(cur, end, 6, "mysqlDayOfWeekTextLong requires size >= 6", fragment);
                String text1(cur, 3);
                boost::to_lower(text1);
                auto it = dayOfWeekMap.find(text1);
@ -914,7 +953,7 @@ namespace
                cur += 3;

                size_t expected_remaining_size = it->second.first.size();
-                checkSpace(cur, end, expected_remaining_size, "jodaDayOfWeekText requires the second parg size >= " + std::to_string(expected_remaining_size), fragment);
+                checkSpace(cur, end, expected_remaining_size, "mysqlDayOfWeekTextLong requires the second parg size >= " + std::to_string(expected_remaining_size), fragment);
                String text2(cur, expected_remaining_size);
                boost::to_lower(text2);
                if (text2 != it->second.first)
@ -1512,9 +1551,14 @@ namespace
                            instructions.emplace_back(ACTION_ARGS(Instruction::mysqlTimezoneOffset));
                            break;

-                        // Minute (00-59)
+                        // Depending on a setting
+                        // - Full month [January...December]
+                        // - Minute (00-59) OR
                        case 'M':
-                            instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMinute));
+                            if (mysql_M_is_month_name)
+                                instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthOfYearTextLong));
+                            else
+                                instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMinute));
                            break;

                        // AM or PM
--- a/src/Functions/replaceAll.cpp
+++ b/src/Functions/replaceAll.cpp
@ -13,7 +13,7 @@ struct NameReplaceAll
    static constexpr auto name = "replaceAll";
 };

-using FunctionReplaceAll = FunctionStringReplace<ReplaceStringImpl<ReplaceStringTraits::Replace::All>, NameReplaceAll>;
+using FunctionReplaceAll = FunctionStringReplace<ReplaceStringImpl<NameReplaceAll, ReplaceStringTraits::Replace::All>, NameReplaceAll>;

 }

--- a/src/Functions/replaceOne.cpp
+++ b/src/Functions/replaceOne.cpp
@ -13,7 +13,7 @@ struct NameReplaceOne
    static constexpr auto name = "replaceOne";
 };

-using FunctionReplaceOne = FunctionStringReplace<ReplaceStringImpl<ReplaceStringTraits::Replace::First>, NameReplaceOne>;
+using FunctionReplaceOne = FunctionStringReplace<ReplaceStringImpl<NameReplaceOne, ReplaceStringTraits::Replace::First>, NameReplaceOne>;

 }

--- a/src/Functions/replaceRegexpAll.cpp
+++ b/src/Functions/replaceRegexpAll.cpp
@ -13,7 +13,7 @@ struct NameReplaceRegexpAll
    static constexpr auto name = "replaceRegexpAll";
 };

-using FunctionReplaceRegexpAll = FunctionStringReplace<ReplaceRegexpImpl<ReplaceRegexpTraits::Replace::All>, NameReplaceRegexpAll>;
+using FunctionReplaceRegexpAll = FunctionStringReplace<ReplaceRegexpImpl<NameReplaceRegexpAll, ReplaceRegexpTraits::Replace::All>, NameReplaceRegexpAll>;

 }

--- a/src/Functions/replaceRegexpOne.cpp
+++ b/src/Functions/replaceRegexpOne.cpp
@ -13,7 +13,7 @@ struct NameReplaceRegexpOne
    static constexpr auto name = "replaceRegexpOne";
 };

-using FunctionReplaceRegexpOne = FunctionStringReplace<ReplaceRegexpImpl<ReplaceRegexpTraits::Replace::First>, NameReplaceRegexpOne>;
+using FunctionReplaceRegexpOne = FunctionStringReplace<ReplaceRegexpImpl<NameReplaceRegexpOne, ReplaceRegexpTraits::Replace::First>, NameReplaceRegexpOne>;

 }

--- a/src/Functions/widthBucket.cpp
+++ b/src/Functions/widthBucket.cpp
@ -11,6 +11,7 @@
 #include <Functions/IFunction.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/castColumn.h>
+#include <Common/Concepts.h>
 #include <Common/Exception.h>
 #include <Common/NaNUtils.h>
 #include <Common/register_objects.h>
--- a/src/IO/AsynchronousReadBufferFromFile.h
+++ b/src/IO/AsynchronousReadBufferFromFile.h
@ -1,5 +1,6 @@
 #pragma once

+#include <Common/Throttler_fwd.h>
 #include <IO/AsynchronousReadBufferFromFileDescriptor.h>
 #include <IO/OpenedFileCache.h>

@ -7,6 +8,7 @@
 namespace DB
 {

+/* NOTE: Unused */
 class AsynchronousReadBufferFromFile : public AsynchronousReadBufferFromFileDescriptor
 {
 protected:
@ -62,8 +64,9 @@ public:
        int flags = -1,
        char * existing_memory = nullptr,
        size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt)
-        : AsynchronousReadBufferFromFileDescriptor(reader_, priority_, -1, buf_size, existing_memory, alignment, file_size_)
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler_ = {})
+        : AsynchronousReadBufferFromFileDescriptor(reader_, priority_, -1, buf_size, existing_memory, alignment, file_size_, throttler_)
        , file_name(file_name_)
    {
        file = OpenedFileCache::instance().get(file_name, flags);
--- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp
+++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp
@ -5,14 +5,17 @@
 #include <Common/Stopwatch.h>
 #include <Common/Exception.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/Throttler.h>
+#include <Common/filesystemHelpers.h>
 #include <IO/AsynchronousReadBufferFromFileDescriptor.h>
 #include <IO/WriteHelpers.h>
-#include <Common/filesystemHelpers.h>


 namespace ProfileEvents
 {
    extern const Event AsynchronousReadWaitMicroseconds;
+    extern const Event LocalReadThrottlerBytes;
+    extern const Event LocalReadThrottlerSleepMicroseconds;
 }

 namespace CurrentMetrics
@ -92,6 +95,8 @@ bool AsynchronousReadBufferFromFileDescriptor::nextImpl()

        assert(offset <= size);
        size_t bytes_read = size - offset;
+        if (throttler)
+            throttler->add(bytes_read, ProfileEvents::LocalReadThrottlerBytes, ProfileEvents::LocalReadThrottlerSleepMicroseconds);

        if (bytes_read)
        {
@ -117,6 +122,8 @@ bool AsynchronousReadBufferFromFileDescriptor::nextImpl()

        assert(offset <= size);
        size_t bytes_read = size - offset;
+        if (throttler)
+            throttler->add(bytes_read, ProfileEvents::LocalReadThrottlerBytes, ProfileEvents::LocalReadThrottlerSleepMicroseconds);

        if (bytes_read)
        {
@ -149,12 +156,14 @@ AsynchronousReadBufferFromFileDescriptor::AsynchronousReadBufferFromFileDescript
    size_t buf_size,
    char * existing_memory,
    size_t alignment,
-    std::optional<size_t> file_size_)
+    std::optional<size_t> file_size_,
+    ThrottlerPtr throttler_)
    : ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_)
    , reader(reader_)
    , base_priority(priority_)
    , required_alignment(alignment)
    , fd(fd_)
+    , throttler(throttler_)
 {
    if (required_alignment > buf_size)
        throw Exception(
--- a/src/IO/AsynchronousReadBufferFromFileDescriptor.h
+++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.h
@ -3,6 +3,7 @@
 #include <IO/ReadBufferFromFileBase.h>
 #include <IO/AsynchronousReader.h>
 #include <Interpreters/Context.h>
+#include <Common/Throttler_fwd.h>

 #include <optional>
 #include <unistd.h>
@ -26,6 +27,7 @@ protected:
    size_t file_offset_of_buffer_end = 0; /// What offset in file corresponds to working_buffer.end().
    size_t bytes_to_ignore = 0;           /// How many bytes should we ignore upon a new read request.
    int fd;
+    ThrottlerPtr throttler;

    bool nextImpl() override;

@ -42,7 +44,8 @@ public:
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        char * existing_memory = nullptr,
        size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt);
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler_ = {});

    ~AsynchronousReadBufferFromFileDescriptor() override;

--- a/src/IO/ReadBufferFromFile.cpp
+++ b/src/IO/ReadBufferFromFile.cpp
@ -30,8 +30,10 @@ ReadBufferFromFile::ReadBufferFromFile(
    int flags,
    char * existing_memory,
    size_t alignment,
-    std::optional<size_t> file_size_)
-    : ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_size_), file_name(file_name_)
+    std::optional<size_t> file_size_,
+    ThrottlerPtr throttler_)
+    : ReadBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_size_, throttler_)
+    , file_name(file_name_)
 {
    ProfileEvents::increment(ProfileEvents::FileOpen);

@ -61,8 +63,9 @@ ReadBufferFromFile::ReadBufferFromFile(
    size_t buf_size,
    char * existing_memory,
    size_t alignment,
-    std::optional<size_t> file_size_)
-    : ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_)
+    std::optional<size_t> file_size_,
+    ThrottlerPtr throttler_)
+    : ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_, throttler_)
    , file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
 {
    fd_ = -1;
--- a/src/IO/ReadBufferFromFile.h
+++ b/src/IO/ReadBufferFromFile.h
@ -29,7 +29,8 @@ public:
        int flags = -1,
        char * existing_memory = nullptr,
        size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt);
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler = {});

    /// Use pre-opened file descriptor.
    explicit ReadBufferFromFile(
@ -38,7 +39,8 @@ public:
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        char * existing_memory = nullptr,
        size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt);
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler = {});

    ~ReadBufferFromFile() override;

@ -88,8 +90,9 @@ public:
        int flags = -1,
        char * existing_memory = nullptr,
        size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt)
-        : ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment, file_size_)
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler_ = {})
+        : ReadBufferFromFileDescriptorPRead(-1, buf_size, existing_memory, alignment, file_size_, throttler_)
        , file_name(file_name_)
    {
        file = OpenedFileCache::instance().get(file_name, flags);
--- a/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/src/IO/ReadBufferFromFileDescriptor.cpp
@ -5,6 +5,7 @@
 #include <Common/Stopwatch.h>
 #include <Common/Exception.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/Throttler.h>
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/WriteHelpers.h>
 #include <Common/filesystemHelpers.h>
@ -21,6 +22,8 @@ namespace ProfileEvents
    extern const Event ReadBufferFromFileDescriptorReadBytes;
    extern const Event DiskReadElapsedMicroseconds;
    extern const Event Seek;
+    extern const Event LocalReadThrottlerBytes;
+    extern const Event LocalReadThrottlerSleepMicroseconds;
 }

 namespace CurrentMetrics
@ -82,7 +85,12 @@ bool ReadBufferFromFileDescriptor::nextImpl()
        }

        if (res > 0)
+        {
            bytes_read += res;
+            if (throttler)
+                throttler->add(res, ProfileEvents::LocalReadThrottlerBytes, ProfileEvents::LocalReadThrottlerSleepMicroseconds);
+        }
+

        /// It reports real time spent including the time spent while thread was preempted doing nothing.
        /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
--- a/src/IO/ReadBufferFromFileDescriptor.h
+++ b/src/IO/ReadBufferFromFileDescriptor.h
@ -2,6 +2,7 @@

 #include <IO/ReadBufferFromFileBase.h>
 #include <Interpreters/Context_fwd.h>
+#include <Common/Throttler_fwd.h>

 #include <unistd.h>

@ -21,6 +22,8 @@ protected:

    int fd;

+    ThrottlerPtr throttler;
+
    bool nextImpl() override;
    void prefetch(int64_t priority) override;

@ -33,10 +36,12 @@ public:
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        char * existing_memory = nullptr,
        size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt)
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler_ = {})
        : ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_)
        , required_alignment(alignment)
        , fd(fd_)
+        , throttler(throttler_)
    {
    }

@ -78,8 +83,9 @@ public:
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        char * existing_memory = nullptr,
        size_t alignment = 0,
-        std::optional<size_t> file_size_ = std::nullopt)
-        : ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_)
+        std::optional<size_t> file_size_ = std::nullopt,
+        ThrottlerPtr throttler_ = {})
+        : ReadBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, file_size_, throttler_)
    {
        use_pread = true;
    }
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@ -81,7 +81,7 @@ struct ReadSettings
    size_t mmap_threshold = 0;
    MMappedFileCache * mmap_cache = nullptr;

-    /// For 'pread_threadpool' method. Lower is more priority.
+    /// For 'pread_threadpool'/'io_uring' method. Lower is more priority.
    size_t priority = 0;

    bool load_marks_asynchronously = true;
@ -109,6 +109,7 @@ struct ReadSettings

    /// Bandwidth throttler to use during reading
    ThrottlerPtr remote_throttler;
+    ThrottlerPtr local_throttler;

    // Resource to be used during reading
    ResourceLink resource_link;
--- a/src/IO/VarInt.h
+++ b/src/IO/VarInt.h
@ -15,25 +15,19 @@ namespace ErrorCodes
 }


-/** Variable-Length Quantity (VLQ) Base-128 compression
- *
- * NOTE: Due to historical reasons, only up to 1<<63-1 are supported, which
- * cannot be changed without breaking the backward compatibility.
- * Also some drivers may support full 1<<64 range (i.e. python -
- * clickhouse-driver), while others has the same limitations as ClickHouse
- * (i.e. Rust - clickhouse-rs).
- * So implementing VLQ for the whole 1<<64 range will require different set of
- * helpers.
- */
-constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1;
+/// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding.

-/** Write UInt64 in variable length format (base128) */
+/// Write UInt64 in variable length format (base128)
 void writeVarUInt(UInt64 x, std::ostream & ostr);
 void writeVarUInt(UInt64 x, WriteBuffer & ostr);
 char * writeVarUInt(UInt64 x, char * ostr);

+/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under
+/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the
+/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python))
+constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1;

-/** Write UInt64 in variable length format, wrap the value to VAR_UINT_MAX if it exceed VAR_UINT_MAX (to bypass sanity check) */
+/// Write UInt64 in variable length format (base128), limit the value to VAR_UINT_MAX if it exceed VAR_UINT_MAX (to bypass sanity check)
 template <typename ...Args>
 auto writeVarUIntOverflow(UInt64 x, Args && ... args)
 {
@ -41,20 +35,20 @@ auto writeVarUIntOverflow(UInt64 x, Args && ... args)
 }


-/** Read UInt64, written in variable length format (base128) */
+/// Read UInt64, written in variable length format (base128)
 void readVarUInt(UInt64 & x, std::istream & istr);
 void readVarUInt(UInt64 & x, ReadBuffer & istr);
 const char * readVarUInt(UInt64 & x, const char * istr, size_t size);


-/** Get the length of UInt64 in VarUInt format */
+/// Get the length of UInt64 in VarUInt format
 size_t getLengthOfVarUInt(UInt64 x);

-/** Get the Int64 length in VarInt format */
+/// Get the Int64 length in VarInt format
 size_t getLengthOfVarInt(Int64 x);


-/** Write Int64 in variable length format (base128) */
+/// Write Int64 in variable length format (base128)
 template <typename OUT>
 inline void writeVarInt(Int64 x, OUT & ostr)
 {
@ -67,7 +61,7 @@ inline char * writeVarInt(Int64 x, char * ostr)
 }


-/** Read Int64, written in variable length format (base128) */
+/// Read Int64, written in variable length format (base128)
 template <typename IN>
 inline void readVarInt(Int64 & x, IN & istr)
 {
--- a/src/IO/WriteBufferFromFile.cpp
+++ b/src/IO/WriteBufferFromFile.cpp
@ -29,10 +29,11 @@ WriteBufferFromFile::WriteBufferFromFile(
    const std::string & file_name_,
    size_t buf_size,
    int flags,
+    ThrottlerPtr throttler_,
    mode_t mode,
    char * existing_memory,
    size_t alignment)
-    : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_name_)
+    : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, throttler_, alignment, file_name_)
 {
    ProfileEvents::increment(ProfileEvents::FileOpen);

@ -63,9 +64,10 @@ WriteBufferFromFile::WriteBufferFromFile(
    int & fd_,
    const std::string & original_file_name,
    size_t buf_size,
+    ThrottlerPtr throttler_,
    char * existing_memory,
    size_t alignment)
-    : WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, original_file_name)
+    : WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, throttler_, alignment, original_file_name)
 {
    fd_ = -1;
 }
--- a/src/IO/WriteBufferFromFile.h
+++ b/src/IO/WriteBufferFromFile.h
@ -3,6 +3,7 @@
 #include <sys/types.h>

 #include <Common/CurrentMetrics.h>
+#include <Common/Throttler_fwd.h>
 #include <IO/WriteBufferFromFileDescriptor.h>


@ -32,6 +33,7 @@ public:
        const std::string & file_name_,
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        int flags = -1,
+        ThrottlerPtr throttler_ = {},
        mode_t mode = 0666,
        char * existing_memory = nullptr,
        size_t alignment = 0);
@ -41,6 +43,7 @@ public:
        int & fd,   /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object.
        const std::string & original_file_name = {},
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+        ThrottlerPtr throttler_ = {},
        char * existing_memory = nullptr,
        size_t alignment = 0);

--- a/src/IO/WriteBufferFromFileDescriptor.cpp
+++ b/src/IO/WriteBufferFromFileDescriptor.cpp
@ -3,6 +3,7 @@
 #include <cassert>
 #include <sys/stat.h>

+#include <Common/Throttler.h>
 #include <Common/Exception.h>
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentMetrics.h>
@ -20,6 +21,8 @@ namespace ProfileEvents
    extern const Event DiskWriteElapsedMicroseconds;
    extern const Event FileSync;
    extern const Event FileSyncElapsedMicroseconds;
+    extern const Event LocalWriteThrottlerBytes;
+    extern const Event LocalWriteThrottlerSleepMicroseconds;
 }

 namespace CurrentMetrics
@ -71,7 +74,11 @@ void WriteBufferFromFileDescriptor::nextImpl()
        }

        if (res > 0)
+        {
            bytes_written += res;
+            if (throttler)
+                throttler->add(res, ProfileEvents::LocalWriteThrottlerBytes, ProfileEvents::LocalWriteThrottlerSleepMicroseconds);
+        }
    }

    ProfileEvents::increment(ProfileEvents::DiskWriteElapsedMicroseconds, watch.elapsedMicroseconds());
@ -85,10 +92,12 @@ WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor(
    int fd_,
    size_t buf_size,
    char * existing_memory,
+    ThrottlerPtr throttler_,
    size_t alignment,
    std::string file_name_)
    : WriteBufferFromFileBase(buf_size, existing_memory, alignment)
    , fd(fd_)
+    , throttler(throttler_)
    , file_name(std::move(file_name_))
 {
 }
--- a/src/IO/WriteBufferFromFileDescriptor.h
+++ b/src/IO/WriteBufferFromFileDescriptor.h
@ -1,6 +1,7 @@
 #pragma once

 #include <IO/WriteBufferFromFileBase.h>
+#include <Common/Throttler_fwd.h>


 namespace DB
@ -15,6 +16,7 @@ public:
        int fd_ = -1,
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        char * existing_memory = nullptr,
+        ThrottlerPtr throttler_ = {},
        size_t alignment = 0,
        std::string file_name_ = "");

@ -49,6 +51,7 @@ protected:
    void nextImpl() override;

    int fd;
+    ThrottlerPtr throttler;

    /// If file has name contains filename, otherwise contains string "(fd=...)"
    std::string file_name;
--- a/src/IO/WriteBufferFromTemporaryFile.cpp
+++ b/src/IO/WriteBufferFromTemporaryFile.cpp
@ -14,7 +14,7 @@ namespace ErrorCodes


 WriteBufferFromTemporaryFile::WriteBufferFromTemporaryFile(std::unique_ptr<PocoTemporaryFile> && tmp_file_)
-    : WriteBufferFromFile(tmp_file_->path(), DBMS_DEFAULT_BUFFER_SIZE, O_RDWR | O_TRUNC | O_CREAT, 0600), tmp_file(std::move(tmp_file_))
+    : WriteBufferFromFile(tmp_file_->path(), DBMS_DEFAULT_BUFFER_SIZE, O_RDWR | O_TRUNC | O_CREAT, /* throttler= */ {}, 0600), tmp_file(std::move(tmp_file_))
 {}


--- a/src/IO/WriteSettings.h
+++ b/src/IO/WriteSettings.h
@ -11,6 +11,7 @@ struct WriteSettings
 {
    /// Bandwidth throttler to use during writing
    ThrottlerPtr remote_throttler;
+    ThrottlerPtr local_throttler;

    // Resource to be used during reading
    ResourceLink resource_link;
--- a/src/IO/examples/o_direct_and_dirty_pages.cpp
+++ b/src/IO/examples/o_direct_and_dirty_pages.cpp
@ -46,7 +46,7 @@ int main(int, char **)
        /// Write to file with O_DIRECT, read as usual.

        {
-            WriteBufferFromFile wb("test2", BUF_SIZE, O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0666, nullptr, page_size);
+            WriteBufferFromFile wb("test2", BUF_SIZE, O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, /* throttler= */ {}, 0666, nullptr, page_size);

            for (size_t i = 0; i < N; ++i)
                writeStringBinary(test, wb);
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@ -275,7 +275,9 @@ void executeQueryWithParallelReplicas(
    auto shard_info = not_optimized_cluster->getShardsInfo().front();

    const auto & settings = context->getSettingsRef();
-    auto all_replicas_count = std::min(static_cast<size_t>(settings.max_parallel_replicas), shard_info.all_addresses.size());
+    ClusterPtr new_cluster = not_optimized_cluster->getClusterWithReplicasAsShards(settings);
+
+    auto all_replicas_count = std::min(static_cast<size_t>(settings.max_parallel_replicas), new_cluster->getShardCount());
    auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(all_replicas_count);
    auto remote_plan = std::make_unique<QueryPlan>();
    auto plans = std::vector<QueryPlanPtr>();
@ -287,35 +289,13 @@ void executeQueryWithParallelReplicas(
    /// to then tell it about the reading method we chose.
    query_info.coordinator = coordinator;

-    UUID parallel_group_id = UUIDHelpers::generateV4();
-
-    plans.emplace_back(createLocalPlan(
-        query_ast,
-        stream_factory.header,
-        context,
-        stream_factory.processed_stage,
-        shard_info.shard_num,
-        /*shard_count*/1,
-        0,
-        all_replicas_count,
-        coordinator,
-        parallel_group_id));
-
-    if (!shard_info.hasRemoteConnections())
-    {
-        if (!plans.front())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "An empty plan was generated to read from local shard and there is no remote connections. This is a bug");
-        query_plan = std::move(*plans.front());
-        return;
-    }
-
    auto new_context = Context::createCopy(context);
    auto scalars = new_context->hasQueryContext() ? new_context->getQueryContext()->getScalars() : Scalars{};
    auto external_tables = new_context->getExternalTables();

    auto read_from_remote = std::make_unique<ReadFromParallelRemoteReplicasStep>(
        query_ast,
-        std::move(shard_info),
+        new_cluster,
        coordinator,
        stream_factory.header,
        stream_factory.processed_stage,
@ -326,8 +306,7 @@ void executeQueryWithParallelReplicas(
        std::move(scalars),
        std::move(external_tables),
        &Poco::Logger::get("ReadFromParallelRemoteReplicasStep"),
-        query_info.storage_limits,
-        parallel_group_id);
+        query_info.storage_limits);

    remote_plan->addStep(std::move(read_from_remote));
    remote_plan->addInterpreterContext(context);
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -19,6 +19,7 @@
 #include <Coordination/KeeperDispatcher.h>
 #include <Compression/ICompressionCodec.h>
 #include <Core/BackgroundSchedulePool.h>
+#include <Core/ServerSettings.h>
 #include <Formats/FormatFactory.h>
 #include <Databases/IDatabase.h>
 #include <Storages/IStorage.h>
@ -42,7 +43,6 @@
 #include <Interpreters/ExternalLoaderXMLConfigRepository.h>
 #include <Interpreters/TemporaryDataOnDisk.h>
 #include <Interpreters/Cache/QueryCache.h>
-#include <Core/ServerSettings.h>
 #include <Core/Settings.h>
 #include <Core/SettingsQuirks.h>
 #include <Access/AccessControl.h>
@ -276,9 +276,15 @@ struct ContextSharedPart : boost::noncopyable

    mutable ThrottlerPtr replicated_fetches_throttler;      /// A server-wide throttler for replicated fetches
    mutable ThrottlerPtr replicated_sends_throttler;        /// A server-wide throttler for replicated sends
+
    mutable ThrottlerPtr remote_read_throttler;             /// A server-wide throttler for remote IO reads
    mutable ThrottlerPtr remote_write_throttler;            /// A server-wide throttler for remote IO writes

+    mutable ThrottlerPtr local_read_throttler;              /// A server-wide throttler for local IO reads
+    mutable ThrottlerPtr local_write_throttler;             /// A server-wide throttler for local IO writes
+
+    mutable ThrottlerPtr backups_server_throttler;          /// A server-wide throttler for BACKUPs
+
    MultiVersion<Macros> macros;                            /// Substitutions extracted from config.
    std::unique_ptr<DDLWorker> ddl_worker;                  /// Process ddl commands from zk.
    /// Rules for selecting the compression settings, depending on the size of the part.
@ -288,6 +294,8 @@ struct ContextSharedPart : boost::noncopyable
    /// Storage policy chooser for MergeTree engines
    mutable std::shared_ptr<const StoragePolicySelector> merge_tree_storage_policy_selector;

+    ServerSettings server_settings;
+
    std::optional<MergeTreeSettings> merge_tree_settings;   /// Settings of MergeTree* engines.
    std::optional<MergeTreeSettings> replicated_merge_tree_settings;   /// Settings of ReplicatedMergeTree* engines.
    std::atomic_size_t max_table_size_to_drop = 50000000000lu; /// Protects MergeTree tables from accidental DROP (50GB by default)
@ -1757,6 +1765,32 @@ ContextMutablePtr Context::getBufferContext() const
    return buffer_context;
 }

+void Context::makeQueryContext()
+{
+    query_context = shared_from_this();
+
+    /// Create throttlers, to inherit the ThrottlePtr in the context copies.
+    {
+        getRemoteReadThrottler();
+        getRemoteWriteThrottler();
+
+        getLocalReadThrottler();
+        getLocalWriteThrottler();
+
+        getBackupsThrottler();
+    }
+}
+
+void Context::makeSessionContext()
+{
+    session_context = shared_from_this();
+}
+
+void Context::makeGlobalContext()
+{
+    initGlobal();
+    global_context = shared_from_this();
+}

 const EmbeddedDictionaries & Context::getEmbeddedDictionaries() const
 {
@ -2188,11 +2222,8 @@ BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
    auto lock = getLock();
    if (!shared->buffer_flush_schedule_pool)
    {
-        ServerSettings server_settings;
-        server_settings.loadSettingsFromConfig(getConfigRef());
-
        shared->buffer_flush_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            server_settings.background_buffer_flush_schedule_pool_size,
+            shared->server_settings.background_buffer_flush_schedule_pool_size,
            CurrentMetrics::BackgroundBufferFlushSchedulePoolTask,
            CurrentMetrics::BackgroundBufferFlushSchedulePoolSize,
            "BgBufSchPool");
@ -2237,11 +2268,8 @@ BackgroundSchedulePool & Context::getSchedulePool() const
    auto lock = getLock();
    if (!shared->schedule_pool)
    {
-        ServerSettings server_settings;
-        server_settings.loadSettingsFromConfig(getConfigRef());
-
        shared->schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            server_settings.background_schedule_pool_size,
+            shared->server_settings.background_schedule_pool_size,
            CurrentMetrics::BackgroundSchedulePoolTask,
            CurrentMetrics::BackgroundSchedulePoolSize,
            "BgSchPool");
@ -2255,11 +2283,8 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() const
    auto lock = getLock();
    if (!shared->distributed_schedule_pool)
    {
-        ServerSettings server_settings;
-        server_settings.loadSettingsFromConfig(getConfigRef());
-
        shared->distributed_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            server_settings.background_distributed_schedule_pool_size,
+            shared->server_settings.background_distributed_schedule_pool_size,
            CurrentMetrics::BackgroundDistributedSchedulePoolTask,
            CurrentMetrics::BackgroundDistributedSchedulePoolSize,
            "BgDistSchPool");
@ -2273,11 +2298,8 @@ BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const
    auto lock = getLock();
    if (!shared->message_broker_schedule_pool)
    {
-        ServerSettings server_settings;
-        server_settings.loadSettingsFromConfig(getConfigRef());
-
        shared->message_broker_schedule_pool = std::make_unique<BackgroundSchedulePool>(
-            server_settings.background_message_broker_schedule_pool_size,
+            shared->server_settings.background_message_broker_schedule_pool_size,
            CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask,
            CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize,
            "BgMBSchPool");
@ -2308,22 +2330,124 @@ ThrottlerPtr Context::getReplicatedSendsThrottler() const

 ThrottlerPtr Context::getRemoteReadThrottler() const
 {
-    auto lock = getLock();
-    if (!shared->remote_read_throttler)
-        shared->remote_read_throttler = std::make_shared<Throttler>(
-            settings.max_remote_read_network_bandwidth_for_server);
+    ThrottlerPtr throttler;

-    return shared->remote_read_throttler;
+    const auto & query_settings = getSettingsRef();
+    UInt64 bandwidth_for_server = shared->server_settings.max_remote_read_network_bandwidth_for_server;
+    if (bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->remote_read_throttler)
+            shared->remote_read_throttler = std::make_shared<Throttler>(bandwidth_for_server);
+        throttler = shared->remote_read_throttler;
+    }
+
+    if (query_settings.max_remote_read_network_bandwidth)
+    {
+        auto lock = getLock();
+        if (!remote_read_query_throttler)
+            remote_read_query_throttler = std::make_shared<Throttler>(query_settings.max_remote_read_network_bandwidth, throttler);
+        throttler = remote_read_query_throttler;
+    }
+
+    return throttler;
 }

 ThrottlerPtr Context::getRemoteWriteThrottler() const
 {
-    auto lock = getLock();
-    if (!shared->remote_write_throttler)
-        shared->remote_write_throttler = std::make_shared<Throttler>(
-            settings.max_remote_write_network_bandwidth_for_server);
+    ThrottlerPtr throttler;

-    return shared->remote_write_throttler;
+    const auto & query_settings = getSettingsRef();
+    UInt64 bandwidth_for_server = shared->server_settings.max_remote_write_network_bandwidth_for_server;
+    if (bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->remote_write_throttler)
+            shared->remote_write_throttler = std::make_shared<Throttler>(bandwidth_for_server);
+        throttler = shared->remote_write_throttler;
+    }
+
+    if (query_settings.max_remote_write_network_bandwidth)
+    {
+        auto lock = getLock();
+        if (!remote_write_query_throttler)
+            remote_write_query_throttler = std::make_shared<Throttler>(query_settings.max_remote_write_network_bandwidth, throttler);
+        throttler = remote_write_query_throttler;
+    }
+
+    return throttler;
+}
+
+ThrottlerPtr Context::getLocalReadThrottler() const
+{
+    ThrottlerPtr throttler;
+
+    if (shared->server_settings.max_local_read_bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->local_read_throttler)
+            shared->local_read_throttler = std::make_shared<Throttler>(shared->server_settings.max_local_read_bandwidth_for_server);
+        throttler = shared->local_read_throttler;
+    }
+
+    const auto & query_settings = getSettingsRef();
+    if (query_settings.max_local_read_bandwidth)
+    {
+        auto lock = getLock();
+        if (!local_read_query_throttler)
+            local_read_query_throttler = std::make_shared<Throttler>(query_settings.max_local_read_bandwidth, throttler);
+        throttler = local_read_query_throttler;
+    }
+
+    return throttler;
+}
+
+ThrottlerPtr Context::getLocalWriteThrottler() const
+{
+    ThrottlerPtr throttler;
+
+    if (shared->server_settings.max_local_write_bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->local_write_throttler)
+            shared->local_write_throttler = std::make_shared<Throttler>(shared->server_settings.max_local_write_bandwidth_for_server);
+        throttler = shared->local_write_throttler;
+    }
+
+    const auto & query_settings = getSettingsRef();
+    if (query_settings.max_local_write_bandwidth)
+    {
+        auto lock = getLock();
+        if (!local_write_query_throttler)
+            local_write_query_throttler = std::make_shared<Throttler>(query_settings.max_local_write_bandwidth, throttler);
+        throttler = local_write_query_throttler;
+    }
+
+    return throttler;
+}
+
+ThrottlerPtr Context::getBackupsThrottler() const
+{
+    ThrottlerPtr throttler;
+
+    if (shared->server_settings.max_backup_bandwidth_for_server)
+    {
+        auto lock = getLock();
+        if (!shared->backups_server_throttler)
+            shared->backups_server_throttler = std::make_shared<Throttler>(shared->server_settings.max_backup_bandwidth_for_server);
+        throttler = shared->backups_server_throttler;
+    }
+
+    const auto & query_settings = getSettingsRef();
+    if (query_settings.max_backup_bandwidth)
+    {
+        auto lock = getLock();
+        if (!backups_query_throttler)
+            backups_query_throttler = std::make_shared<Throttler>(query_settings.max_backup_bandwidth, throttler);
+        throttler = backups_query_throttler;
+    }
+
+    return throttler;
 }

 bool Context::hasDistributedDDL() const
@ -3357,6 +3481,9 @@ void Context::setApplicationType(ApplicationType type)
 {
    /// Lock isn't required, you should set it at start
    shared->application_type = type;
+
+    if (type == ApplicationType::SERVER)
+        shared->server_settings.loadSettingsFromConfig(Poco::Util::Application::instance().config());
 }

 void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & config)
@ -3813,11 +3940,7 @@ void Context::initializeBackgroundExecutorsIfNeeded()
    if (shared->are_background_executors_initialized)
        return;

-    const auto & config = getConfigRef();
-
-    ServerSettings server_settings;
-    server_settings.loadSettingsFromConfig(config);
-
+    const ServerSettings & server_settings = shared->server_settings;
    size_t background_pool_size = server_settings.background_pool_size;
    auto background_merges_mutations_concurrency_ratio = server_settings.background_merges_mutations_concurrency_ratio;
    size_t background_pool_max_tasks_count = static_cast<size_t>(background_pool_size * background_merges_mutations_concurrency_ratio);
@ -4034,6 +4157,7 @@ ReadSettings Context::getReadSettings() const
    res.priority = settings.read_priority;

    res.remote_throttler = getRemoteReadThrottler();
+    res.local_throttler = getLocalReadThrottler();

    res.http_max_tries = settings.http_max_tries;
    res.http_retry_initial_backoff_ms = settings.http_retry_initial_backoff_ms;
@ -4045,6 +4169,14 @@ ReadSettings Context::getReadSettings() const
    return res;
 }

+ReadSettings Context::getBackupReadSettings() const
+{
+    ReadSettings settings = getReadSettings();
+    settings.remote_throttler = getBackupsThrottler();
+    settings.local_throttler = getBackupsThrottler();
+    return settings;
+}
+
 WriteSettings Context::getWriteSettings() const
 {
    WriteSettings res;
@ -4056,6 +4188,7 @@ WriteSettings Context::getWriteSettings() const
    res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload;

    res.remote_throttler = getRemoteWriteThrottler();
+    res.local_throttler = getLocalWriteThrottler();

    return res;
 }
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@ -6,6 +6,7 @@
 #include <Common/OpenTelemetryTraceContext.h>
 #include <Common/RemoteHostFilter.h>
 #include <Common/ThreadPool.h>
+#include <Common/Throttler_fwd.h>
 #include <Core/Block.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/Settings.h>
@ -18,8 +19,6 @@
 #include <IO/IResourceManager.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/IAST_fwd.h>
-#include <Processors/ResizeProcessor.h>
-#include <Processors/Transforms/ReadFromMergeTreeDependencyTransform.h>
 #include <Server/HTTP/HTTPContext.h>
 #include <Storages/ColumnsDescription.h>
 #include <Storages/IStorage_fwd.h>
@ -163,9 +162,6 @@ struct BackgroundTaskSchedulingSettings;
    class Lemmatizers;
 #endif

-class Throttler;
-using ThrottlerPtr = std::shared_ptr<Throttler>;
-
 class ZooKeeperMetadataTransaction;
 using ZooKeeperMetadataTransactionPtr = std::shared_ptr<ZooKeeperMetadataTransaction>;

@ -783,9 +779,9 @@ public:
    void setQueryContext(ContextMutablePtr context_) { query_context = context_; }
    void setSessionContext(ContextMutablePtr context_) { session_context = context_; }

-    void makeQueryContext() { query_context = shared_from_this(); }
-    void makeSessionContext() { session_context = shared_from_this(); }
-    void makeGlobalContext() { initGlobal(); global_context = shared_from_this(); }
+    void makeQueryContext();
+    void makeSessionContext();
+    void makeGlobalContext();

    const Settings & getSettingsRef() const { return settings; }

@ -916,11 +912,6 @@ public:
    BackgroundSchedulePool & getMessageBrokerSchedulePool() const;
    BackgroundSchedulePool & getDistributedSchedulePool() const;

-    ThrottlerPtr getReplicatedFetchesThrottler() const;
-    ThrottlerPtr getReplicatedSendsThrottler() const;
-    ThrottlerPtr getRemoteReadThrottler() const;
-    ThrottlerPtr getRemoteWriteThrottler() const;
-
    /// Has distributed_ddl configuration or not.
    bool hasDistributedDDL() const;
    void setDDLWorker(std::unique_ptr<DDLWorker> ddl_worker);
@ -1116,6 +1107,9 @@ public:
    /** Get settings for reading from filesystem. */
    ReadSettings getReadSettings() const;

+    /** Get settings for reading from filesystem for BACKUPs. */
+    ReadSettings getBackupReadSettings() const;
+
    /** Get settings for writing to filesystem. */
    WriteSettings getWriteSettings() const;

@ -1152,6 +1146,28 @@ private:
    DiskSelectorPtr getDiskSelector(std::lock_guard<std::mutex> & lock) const;

    DisksMap getDisksMap(std::lock_guard<std::mutex> & lock) const;
+
+    /// Throttling
+public:
+    ThrottlerPtr getReplicatedFetchesThrottler() const;
+    ThrottlerPtr getReplicatedSendsThrottler() const;
+
+    ThrottlerPtr getRemoteReadThrottler() const;
+    ThrottlerPtr getRemoteWriteThrottler() const;
+
+    ThrottlerPtr getLocalReadThrottler() const;
+    ThrottlerPtr getLocalWriteThrottler() const;
+
+    ThrottlerPtr getBackupsThrottler() const;
+
+private:
+    mutable ThrottlerPtr remote_read_query_throttler;       /// A query-wide throttler for remote IO reads
+    mutable ThrottlerPtr remote_write_query_throttler;      /// A query-wide throttler for remote IO writes
+
+    mutable ThrottlerPtr local_read_query_throttler;        /// A query-wide throttler for local IO reads
+    mutable ThrottlerPtr local_write_query_throttler;       /// A query-wide throttler for local IO writes
+
+    mutable ThrottlerPtr backups_query_throttler;           /// A query-wide throttler for BACKUPs
 };

 struct HTTPContext : public IHTTPContext
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -462,6 +462,20 @@ InterpreterSelectQuery::InterpreterSelectQuery(
        context->setSetting("parallel_replicas_custom_key", String{""});
    }

+    /// Try to execute query without parallel replicas if we find that there is a FINAL modifier there.
+    bool is_query_with_final = false;
+    if (query_info.table_expression_modifiers)
+        is_query_with_final = query_info.table_expression_modifiers->hasFinal();
+    else if (query_info.query)
+        is_query_with_final = query_info.query->as<ASTSelectQuery &>().final();
+
+    if (is_query_with_final && (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas))
+    {
+        LOG_WARNING(log, "FINAL modifier is supported with parallel replicas. Will try to execute the query without using them.");
+        context->setSetting("allow_experimental_parallel_reading_from_replicas", false);
+        context->setSetting("parallel_replicas_custom_key", String{""});
+    }
+
    /// Rewrite JOINs
    if (!has_input && joined_tables.tablesCount() > 1)
    {
--- a/src/Interpreters/Session.cpp
+++ b/src/Interpreters/Session.cpp
@ -107,7 +107,7 @@ public:
        if (it == sessions.end())
        {
            if (throw_if_not_found)
-                throw Exception(ErrorCodes::SESSION_NOT_FOUND, "Session not found.");
+                throw Exception(ErrorCodes::SESSION_NOT_FOUND, "Session {} not found", session_id);

            /// Create a new session from current context.
            auto context = Context::createCopy(global_context);
@ -129,7 +129,7 @@ public:
            LOG_TEST(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first);

            if (!session.unique())
-                throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session is locked by a concurrent client.");
+                throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session {} is locked by a concurrent client", session_id);
            return {session, false};
        }
    }
--- a/src/Processors/Executors/ExecutingGraph.cpp
+++ b/src/Processors/Executors/ExecutingGraph.cpp
@ -122,13 +122,8 @@ bool ExecutingGraph::expandPipeline(std::stack<uint64_t> & stack, uint64_t pid)
        }
        processors->insert(processors->end(), new_processors.begin(), new_processors.end());

-        source_processors.reserve(source_processors.size() + new_processors.size());
-
-        for (auto & proc: new_processors)
-        {
-            bool is_source = proc->getInputs().empty();
-            source_processors.emplace_back(is_source);
-        }
+        // Do not consider sources added during pipeline expansion as cancelable to avoid tricky corner cases (e.g. ConvertingAggregatedToChunksWithMergingSource cancellation)
+        source_processors.resize(source_processors.size() + new_processors.size(), false);
    }

    uint64_t num_processors = processors->size();
--- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp
@ -446,11 +446,6 @@ void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & da

    const auto * data_type_map = assert_cast<const DataTypeMap *>(data_type.get());
    const auto & key_data_type = data_type_map->getKeyType();
-    if (!isStringOrFixedString(key_data_type))
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                        "Only maps with String key type are supported in BSON, got key type: {}",
-                        key_data_type->getName());
-
    const auto & value_data_type = data_type_map->getValueType();
    auto & column_map = assert_cast<ColumnMap &>(column);
    auto & key_column = column_map.getNestedData().getColumn(0);
@ -464,7 +459,8 @@ void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & da
    {
        auto nested_bson_type = getBSONType(readBSONType(*in));
        auto name = readBSONKeyName(*in, current_key_name);
-        key_column.insertData(name.data, name.size);
+        ReadBufferFromMemory buf(name.data, name.size);
+        key_data_type->getDefaultSerialization()->deserializeWholeText(key_column, buf, format_settings);
        readField(value_column, value_data_type, nested_bson_type);
    }

@ -511,6 +507,7 @@ bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr &
            lc_column.insertFromFullColumn(*tmp_column, 0);
            return res;
        }
+        case TypeIndex::Enum8: [[fallthrough]];
        case TypeIndex::Int8:
        {
            readAndInsertInteger<Int8>(*in, column, data_type, bson_type);
@ -521,6 +518,7 @@ bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr &
            readAndInsertInteger<UInt8>(*in, column, data_type, bson_type);
            return true;
        }
+        case TypeIndex::Enum16: [[fallthrough]];
        case TypeIndex::Int16:
        {
            readAndInsertInteger<Int16>(*in, column, data_type, bson_type);
@ -1008,6 +1006,9 @@ fileSegmentationEngineBSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t
                "the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely BSON is malformed",
                min_bytes, document_size);

+        if (document_size < sizeof(document_size))
+            throw ParsingException(ErrorCodes::INCORRECT_DATA, "Size of BSON document is invalid");
+
        size_t old_size = memory.size();
        memory.resize(old_size + document_size);
        unalignedStore<BSONSizeT>(memory.data() + old_size, document_size);
--- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp
@ -33,13 +33,14 @@ namespace ErrorCodes
 }

 /// In BSON all names should be valid UTF8 sequences
-static String toValidUTF8String(const String & name)
+static String toValidUTF8String(const String & name, const FormatSettings & settings)
 {
    WriteBufferFromOwnString buf;
    WriteBufferValidUTF8 validating_buf(buf);
-    writeString(name, validating_buf);
+    writeJSONString(name, validating_buf, settings);
    validating_buf.finalize();
-    return buf.str();
+    /// Return value without quotes
+    return buf.str().substr(1, buf.str().size() - 2);
 }

 BSONEachRowRowOutputFormat::BSONEachRowRowOutputFormat(
@ -49,7 +50,7 @@ BSONEachRowRowOutputFormat::BSONEachRowRowOutputFormat(
    const auto & sample = getPort(PortKind::Main).getHeader();
    fields.reserve(sample.columns());
    for (const auto & field : sample.getNamesAndTypes())
-        fields.emplace_back(toValidUTF8String(field.name), field.type);
+        fields.emplace_back(toValidUTF8String(field.name, settings), field.type);
 }

 static void writeBSONSize(size_t size, WriteBuffer & buf)
@ -112,7 +113,7 @@ static void writeBSONBigInteger(const IColumn & column, size_t row_num, const St
    buf.write(data.data, data.size);
 }

-size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name)
+size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name, const String & path, std::unordered_map<String, size_t> & nested_document_sizes)
 {
    size_t size = 1; // Field type
    size += name.size() + 1; // Field name and \0
@ -125,6 +126,8 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
        case TypeIndex::Date32: [[fallthrough]];
        case TypeIndex::Decimal32: [[fallthrough]];
        case TypeIndex::IPv4: [[fallthrough]];
+        case TypeIndex::Enum8: [[fallthrough]];
+        case TypeIndex::Enum16: [[fallthrough]];
        case TypeIndex::Int32:
        {
            return size + sizeof(Int32);
@ -183,7 +186,7 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
            auto dict_type = assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType();
            auto dict_column = lc_column.getDictionary().getNestedColumn();
            size_t index = lc_column.getIndexAt(row_num);
-            return countBSONFieldSize(*dict_column, dict_type, index, name);
+            return countBSONFieldSize(*dict_column, dict_type, index, name, path, nested_document_sizes);
        }
        case TypeIndex::Nullable:
        {
@ -191,11 +194,11 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
            const ColumnNullable & column_nullable = assert_cast<const ColumnNullable &>(column);
            if (column_nullable.isNullAt(row_num))
                return size; /// Null has no value, just type
-            return countBSONFieldSize(column_nullable.getNestedColumn(), nested_type, row_num, name);
+            return countBSONFieldSize(column_nullable.getNestedColumn(), nested_type, row_num, name, path, nested_document_sizes);
        }
        case TypeIndex::Array:
        {
-            size += sizeof(BSONSizeT); // Size of a document
+            size_t document_size = sizeof(BSONSizeT); // Size of a document

            const auto & nested_type = assert_cast<const DataTypeArray *>(data_type.get())->getNestedType();
            const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
@ -204,39 +207,41 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
            size_t offset = offsets[row_num - 1];
            size_t array_size = offsets[row_num] - offset;

+            String current_path = path + "." + name;
            for (size_t i = 0; i < array_size; ++i)
-                size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i)); // Add size of each value from array
+                document_size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i), current_path, nested_document_sizes); // Add size of each value from array

-            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+            nested_document_sizes[current_path] = document_size;
+            return size + document_size;
        }
        case TypeIndex::Tuple:
        {
-            size += sizeof(BSONSizeT); // Size of a document
+            size_t document_size = sizeof(BSONSizeT); // Size of a document

            const auto * tuple_type = assert_cast<const DataTypeTuple *>(data_type.get());
            const auto & nested_types = tuple_type->getElements();
-            bool have_explicit_names = tuple_type->haveExplicitNames();
            const auto & nested_names = tuple_type->getElementNames();
            const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
            const auto & nested_columns = tuple_column.getColumns();

+            String current_path = path + "." + name;
            for (size_t i = 0; i < nested_columns.size(); ++i)
            {
-                String key_name = have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i);
-                size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name); // Add size of each value from tuple
+                String key_name = toValidUTF8String(nested_names[i], settings);
+                document_size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name, current_path, nested_document_sizes); // Add size of each value from tuple
            }

-            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+            nested_document_sizes[current_path] = document_size;
+            return size + document_size;
        }
        case TypeIndex::Map:
        {
-            size += sizeof(BSONSizeT); // Size of a document
+            size_t document_size = sizeof(BSONSizeT); // Size of a document

            const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
-            if (!isStringOrFixedString(map_type.getKeyType()))
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                                "Only maps with String key type are supported in BSON, got key type: {}",
-                                map_type.getKeyType()->getName());
+            const auto & key_type = map_type.getKeyType();
            const auto & value_type = map_type.getValueType();

            const auto & map_column = assert_cast<const ColumnMap &>(column);
@ -248,20 +253,26 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co
            size_t offset = offsets[row_num - 1];
            size_t map_size = offsets[row_num] - offset;

+            WriteBufferFromOwnString buf;
+            String current_path = path + "." + name;
            for (size_t i = 0; i < map_size; ++i)
            {
-                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
-                size += countBSONFieldSize(*value_column, value_type, offset + i, key);
+                key_type->getDefaultSerialization()->serializeText(*key_column, offset + i, buf, settings);
+                auto s = countBSONFieldSize(*value_column, value_type, offset + i, toValidUTF8String(buf.str(), settings), current_path, nested_document_sizes);
+                document_size += s;
+                buf.restart();
            }

-            return size + sizeof(BSON_DOCUMENT_END); // Add final \0
+            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
+            nested_document_sizes[current_path] = document_size;
+            return size + document_size;
        }
        default:
            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported in BSON output format", data_type->getName());
    }
 }

-void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name)
+void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name, const String & path, std::unordered_map<String, size_t> & nested_document_sizes)
 {
    switch (data_type->getTypeId())
    {
@ -275,6 +286,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
            writeBSONNumber<ColumnFloat64, double>(BSONType::DOUBLE, column, row_num, name, out);
            break;
        }
+        case TypeIndex::Enum8: [[fallthrough]];
        case TypeIndex::Int8:
        {
            writeBSONNumber<ColumnInt8, Int32>(BSONType::INT32, column, row_num, name, out);
@ -288,6 +300,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
                writeBSONNumber<ColumnUInt8, Int32>(BSONType::INT32, column, row_num, name, out);
            break;
        }
+        case TypeIndex::Enum16: [[fallthrough]];
        case TypeIndex::Int16:
        {
            writeBSONNumber<ColumnInt16, Int32>(BSONType::INT32, column, row_num, name, out);
@ -403,7 +416,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
            auto dict_type = assert_cast<const DataTypeLowCardinality *>(data_type.get())->getDictionaryType();
            auto dict_column = lc_column.getDictionary().getNestedColumn();
            size_t index = lc_column.getIndexAt(row_num);
-            serializeField(*dict_column, dict_type, index, name);
+            serializeField(*dict_column, dict_type, index, name, path, nested_document_sizes);
            break;
        }
        case TypeIndex::Nullable:
@ -411,7 +424,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
            auto nested_type = removeNullable(data_type);
            const ColumnNullable & column_nullable = assert_cast<const ColumnNullable &>(column);
            if (!column_nullable.isNullAt(row_num))
-                serializeField(column_nullable.getNestedColumn(), nested_type, row_num, name);
+                serializeField(column_nullable.getNestedColumn(), nested_type, row_num, name, path, nested_document_sizes);
            else
                writeBSONTypeAndKeyName(BSONType::NULL_VALUE, name, out);
            break;
@ -427,15 +440,12 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da

            writeBSONTypeAndKeyName(BSONType::ARRAY, name, out);

-            size_t document_size = sizeof(BSONSizeT);
-            for (size_t i = 0; i < array_size; ++i)
-                document_size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i)); // Add size of each value from array
-            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
-
+            String current_path = path + "." + name;
+            size_t document_size = nested_document_sizes[current_path];
            writeBSONSize(document_size, out);

            for (size_t i = 0; i < array_size; ++i)
-                serializeField(nested_column, nested_type, offset + i, std::to_string(i));
+                serializeField(nested_column, nested_type, offset + i, std::to_string(i), current_path, nested_document_sizes);

            writeChar(BSON_DOCUMENT_END, out);
            break;
@ -444,26 +454,19 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
        {
            const auto * tuple_type = assert_cast<const DataTypeTuple *>(data_type.get());
            const auto & nested_types = tuple_type->getElements();
-            bool have_explicit_names = tuple_type->haveExplicitNames();
            const auto & nested_names = tuple_type->getElementNames();
            const auto & tuple_column = assert_cast<const ColumnTuple &>(column);
            const auto & nested_columns = tuple_column.getColumns();

-            BSONType bson_type = have_explicit_names ? BSONType::DOCUMENT : BSONType::ARRAY;
+            BSONType bson_type =  tuple_type->haveExplicitNames() ? BSONType::DOCUMENT : BSONType::ARRAY;
            writeBSONTypeAndKeyName(bson_type, name, out);

-            size_t document_size = sizeof(BSONSizeT);
-            for (size_t i = 0; i < nested_columns.size(); ++i)
-            {
-                String key_name = have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i);
-                document_size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name); // Add size of each value from tuple
-            }
-            document_size += sizeof(BSON_DOCUMENT_END); // Add final \0
-
+            String current_path = path + "." + name;
+            size_t document_size = nested_document_sizes[current_path];
            writeBSONSize(document_size, out);

            for (size_t i = 0; i < nested_columns.size(); ++i)
-                serializeField(*nested_columns[i], nested_types[i], row_num, have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i));
+                serializeField(*nested_columns[i], nested_types[i], row_num, toValidUTF8String(nested_names[i], settings), current_path, nested_document_sizes);

            writeChar(BSON_DOCUMENT_END, out);
            break;
@ -471,10 +474,7 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da
        case TypeIndex::Map:
        {
            const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
-            if (!isStringOrFixedString(map_type.getKeyType()))
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                                "Only maps with String key type are supported in BSON, got key type: {}",
-                                map_type.getKeyType()->getName());
+            const auto & key_type = map_type.getKeyType();
            const auto & value_type = map_type.getValueType();

            const auto & map_column = assert_cast<const ColumnMap &>(column);
@ -488,20 +488,16 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da

            writeBSONTypeAndKeyName(BSONType::DOCUMENT, name, out);

-            size_t document_size = sizeof(BSONSizeT);
-            for (size_t i = 0; i < map_size; ++i)
-            {
-                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
-                document_size += countBSONFieldSize(*value_column, value_type, offset + i, key);
-            }
-            document_size += sizeof(BSON_DOCUMENT_END);
-
+            String current_path = path + "." + name;
+            size_t document_size = nested_document_sizes[current_path];
            writeBSONSize(document_size, out);

+            WriteBufferFromOwnString buf;
            for (size_t i = 0; i < map_size; ++i)
            {
-                String key = toValidUTF8String(key_column->getDataAt(offset + i).toString());
-                serializeField(*value_column, value_type, offset + i, key);
+                key_type->getDefaultSerialization()->serializeText(*key_column, offset + i, buf, settings);
+                serializeField(*value_column, value_type, offset + i, toValidUTF8String(buf.str(), settings), current_path, nested_document_sizes);
+                buf.restart();
            }

            writeChar(BSON_DOCUMENT_END, out);
@ -516,15 +512,18 @@ void BSONEachRowRowOutputFormat::write(const Columns & columns, size_t row_num)
 {
    /// We should calculate and write document size before its content
    size_t document_size = sizeof(BSONSizeT);
+    /// Remember calculated sizes for nested documents (map document path -> size), so we won't need
+    /// to recalculate it while serializing.
+    std::unordered_map<String, size_t> nested_document_sizes;
    for (size_t i = 0; i != columns.size(); ++i)
-        document_size += countBSONFieldSize(*columns[i], fields[i].type, row_num, fields[i].name);
+        document_size += countBSONFieldSize(*columns[i], fields[i].type, row_num, fields[i].name, "$", nested_document_sizes);
    document_size += sizeof(BSON_DOCUMENT_END);

    size_t document_start = out.count();
    writeBSONSize(document_size, out);

    for (size_t i = 0; i != columns.size(); ++i)
-        serializeField(*columns[i], fields[i].type, row_num, fields[i].name);
+        serializeField(*columns[i], fields[i].type, row_num, fields[i].name, "$", nested_document_sizes);

    writeChar(BSON_DOCUMENT_END, out);

--- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
+++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h
@ -17,8 +17,8 @@ namespace DB
 *
 * ClickHouse type         | BSON Type
 * Bool                    | \x08 boolean
- * Int8/UInt8              | \x10 int32
- * Int16UInt16             | \x10 int32
+ * Int8/UInt8/Enum8        | \x10 int32
+ * Int16UInt16/Enum16      | \x10 int32
 * Int32                   | \x10 int32
 * UInt32                  | \x12 int64
 * Int64                   | \x12 int64
@ -38,7 +38,7 @@ namespace DB
 * Array                   | \x04 array
 * Tuple                   | \x04 array
 * Named Tuple             | \x03 document
- * Map (with String keys)  | \x03 document
+ * Map                     | \x03 document
 *
 * Note: on Big-Endian platforms this format will not work properly.
 */
@ -55,12 +55,24 @@ private:
    void write(const Columns & columns, size_t row_num) override;
    void writeField(const IColumn &, const ISerialization &, size_t) override { }

-    void serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name);
+    void serializeField(
+        const IColumn & column,
+        const DataTypePtr & data_type,
+        size_t row_num,
+        const String & name,
+        const String & path,
+        std::unordered_map<String, size_t> & nested_document_sizes);

    /// Count field size in bytes that we will get after serialization in BSON format.
    /// It's needed to calculate document size before actual serialization,
    /// because in BSON format we should write the size of the document before its content.
-    size_t countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name);
+    size_t countBSONFieldSize(
+        const IColumn & column,
+        const DataTypePtr & data_type,
+        size_t row_num,
+        const String & name,
+        const String & path,
+        std::unordered_map<String, size_t> & nested_document_sizes);

    NamesAndTypes fields;
    FormatSettings settings;
--- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp
@ -16,6 +16,7 @@
 #include <Columns/ColumnLowCardinality.h>
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnMap.h>

 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeArray.h>
@ -23,6 +24,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeMap.h>

 namespace DB
 {
@ -80,22 +82,39 @@ kj::Array<capnp::word> CapnProtoRowInputFormat::readMessage()
    return msg;
 }

-static void insertSignedInteger(IColumn & column, const DataTypePtr & column_type, Int64 value)
+static void insertInteger(IColumn & column, const DataTypePtr & column_type, UInt64 value)
 {
    switch (column_type->getTypeId())
    {
        case TypeIndex::Int8:
            assert_cast<ColumnInt8 &>(column).insertValue(value);
            break;
+        case TypeIndex::UInt8:
+            assert_cast<ColumnUInt8 &>(column).insertValue(value);
+            break;
        case TypeIndex::Int16:
            assert_cast<ColumnInt16 &>(column).insertValue(value);
            break;
+        case TypeIndex::Date: [[fallthrough]];
+        case TypeIndex::UInt16:
+            assert_cast<ColumnUInt16 &>(column).insertValue(value);
+            break;
        case TypeIndex::Int32:
            assert_cast<ColumnInt32 &>(column).insertValue(static_cast<Int32>(value));
            break;
+        case TypeIndex::DateTime: [[fallthrough]];
+        case TypeIndex::UInt32:
+            assert_cast<ColumnUInt32 &>(column).insertValue(static_cast<UInt32>(value));
+            break;
+        case TypeIndex::IPv4:
+            assert_cast<ColumnIPv4 &>(column).insertValue(IPv4(static_cast<UInt32>(value)));
+            break;
        case TypeIndex::Int64:
            assert_cast<ColumnInt64 &>(column).insertValue(value);
            break;
+        case TypeIndex::UInt64:
+            assert_cast<ColumnUInt64 &>(column).insertValue(value);
+            break;
        case TypeIndex::DateTime64:
            assert_cast<ColumnDecimal<DateTime64> &>(column).insertValue(value);
            break;
@ -106,33 +125,7 @@ static void insertSignedInteger(IColumn & column, const DataTypePtr & column_typ
            assert_cast<ColumnDecimal<Decimal64> &>(column).insertValue(value);
            break;
        default:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a signed integer.");
-    }
-}
-
-static void insertUnsignedInteger(IColumn & column, const DataTypePtr & column_type, UInt64 value)
-{
-    switch (column_type->getTypeId())
-    {
-        case TypeIndex::UInt8:
-            assert_cast<ColumnUInt8 &>(column).insertValue(value);
-            break;
-        case TypeIndex::Date: [[fallthrough]];
-        case TypeIndex::UInt16:
-            assert_cast<ColumnUInt16 &>(column).insertValue(value);
-            break;
-        case TypeIndex::DateTime: [[fallthrough]];
-        case TypeIndex::UInt32:
-            assert_cast<ColumnUInt32 &>(column).insertValue(static_cast<UInt32>(value));
-            break;
-        case TypeIndex::UInt64:
-            assert_cast<ColumnUInt64 &>(column).insertValue(value);
-            break;
-        case TypeIndex::IPv4:
-            assert_cast<ColumnIPv4 &>(column).insertValue(IPv4(static_cast<UInt32>(value)));
-            break;
-        default:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not an unsigned integer.");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type {} cannot be parsed from integer", column_type->getName());
    }
 }

@ -152,8 +145,11 @@ static void insertFloat(IColumn & column, const DataTypePtr & column_type, Float
 }

 template <typename Value>
-static void insertString(IColumn & column, Value value)
+static void insertData(IColumn & column, const DataTypePtr & column_type, Value value)
 {
+    if (column_type->haveMaximumSizeOfValue() && value.size() != column_type->getSizeOfValueInMemory())
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected size of {} value: {}", column_type->getName(), value.size());
+
    column.insertData(reinterpret_cast<const char *>(value.begin()), value.size());
 }

@ -166,10 +162,10 @@ static void insertEnum(IColumn & column, const DataTypePtr & column_type, const
    switch (enum_comparing_mode)
    {
        case FormatSettings::EnumComparingMode::BY_VALUES:
-            insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal()));
+            insertInteger(column, nested_type, Int64(enumerant.getOrdinal()));
            return;
        case FormatSettings::EnumComparingMode::BY_NAMES:
-            insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName()))));
+            insertInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName()))));
            return;
        case FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE:
        {
@ -179,7 +175,7 @@ static void insertEnum(IColumn & column, const DataTypePtr & column_type, const
            {
                if (compareEnumNames(name, enum_name, enum_comparing_mode))
                {
-                    insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name)));
+                    insertInteger(column, nested_type, Int64(enum_type->getValue(name)));
                    break;
                }
            }
@ -202,22 +198,22 @@ static void insertValue(IColumn & column, const DataTypePtr & column_type, const
    switch (value.getType())
    {
        case capnp::DynamicValue::Type::INT:
-            insertSignedInteger(column, column_type, value.as<Int64>());
+            insertInteger(column, column_type, value.as<Int64>());
            break;
        case capnp::DynamicValue::Type::UINT:
-            insertUnsignedInteger(column, column_type, value.as<UInt64>());
+            insertInteger(column, column_type, value.as<UInt64>());
            break;
        case capnp::DynamicValue::Type::FLOAT:
            insertFloat(column, column_type, value.as<Float64>());
            break;
        case capnp::DynamicValue::Type::BOOL:
-            insertUnsignedInteger(column, column_type, UInt64(value.as<bool>()));
+            insertInteger(column, column_type, UInt64(value.as<bool>()));
            break;
        case capnp::DynamicValue::Type::DATA:
-            insertString(column, value.as<capnp::Data>());
+            insertData(column, column_type, value.as<capnp::Data>());
            break;
        case capnp::DynamicValue::Type::TEXT:
-            insertString(column, value.as<capnp::Text>());
+            insertData(column, column_type, value.as<capnp::Text>());
            break;
        case capnp::DynamicValue::Type::ENUM:
            if (column_type->getTypeId() == TypeIndex::Enum8)
@ -260,14 +256,26 @@ static void insertValue(IColumn & column, const DataTypePtr & column_type, const
            {
                auto & tuple_column = assert_cast<ColumnTuple &>(column);
                const auto * tuple_type = assert_cast<const DataTypeTuple *>(column_type.get());
-                for (size_t i = 0; i != tuple_column.tupleSize(); ++i)
+                bool have_explicit_names = tuple_type->haveExplicitNames();
+                auto struct_schema = struct_value.getSchema();
+                for (uint32_t i = 0; i != tuple_column.tupleSize(); ++i)
                    insertValue(
                        tuple_column.getColumn(i),
                        tuple_type->getElements()[i],
                        tuple_type->getElementNames()[i],
-                        struct_value.get(tuple_type->getElementNames()[i]),
+                        struct_value.get(have_explicit_names ? struct_schema.getFieldByName(tuple_type->getElementNames()[i]) : struct_schema.getFields()[i]),
                        enum_comparing_mode);
            }
+            else if (isMap(column_type))
+            {
+                const auto & map_type = assert_cast<const DataTypeMap &>(*column_type);
+                DataTypes key_value_types = {map_type.getKeyType(), map_type.getValueType()};
+                Names key_value_names = {"key", "value"};
+                auto entries_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(key_value_types, key_value_names));
+                auto & entries_column = assert_cast<ColumnMap &>(column).getNestedColumn();
+                auto entries_field = struct_value.getSchema().getFields()[0];
+                insertValue(entries_column, entries_type, column_name, struct_value.get(entries_field), enum_comparing_mode);
+            }
            else
            {
                /// It can be nested column from Nested type.
--- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp
@ -14,12 +14,14 @@
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnLowCardinality.h>
 #include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnMap.h>

 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeMap.h>

 namespace DB
 {
@ -177,18 +179,46 @@ static std::optional<capnp::DynamicValue::Reader> convertToDynamicValue(
            else if (isTuple(data_type))
            {
                const auto * tuple_data_type = assert_cast<const DataTypeTuple *>(data_type.get());
-                auto nested_types = tuple_data_type->getElements();
+                const auto & nested_types = tuple_data_type->getElements();
+                const auto & nested_names = tuple_data_type->getElementNames();
                const auto & nested_columns = assert_cast<const ColumnTuple *>(column.get())->getColumns();
-                for (const auto & name : tuple_data_type->getElementNames())
+                bool have_explicit_names = tuple_data_type->haveExplicitNames();
+                for (uint32_t i = 0; i != nested_names.size(); ++i)
                {
-                    auto pos = tuple_data_type->getPositionByName(name);
-                    auto field_builder
-                        = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name));
-                    auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, column_name, field_builder, enum_comparing_mode, temporary_text_data_storage);
+                    capnp::StructSchema::Field nested_field = have_explicit_names ? nested_struct_schema.getFieldByName(nested_names[i]) : nested_struct_schema.getFields()[i];
+                    auto field_builder = initStructFieldBuilder(nested_columns[i], row_num, struct_builder, nested_field);
+                    auto value = convertToDynamicValue(nested_columns[i], nested_types[i], row_num, nested_names[i], field_builder, enum_comparing_mode, temporary_text_data_storage);
                    if (value)
-                        struct_builder.set(name, *value);
+                        struct_builder.set(nested_field, *value);
                }
            }
+            else if (isMap(data_type))
+            {
+                /// We output Map type as follow CapnProto schema
+                ///
+                /// struct Map {
+                ///     struct Entry {
+                ///         key @0: Key;
+                ///         value @1: Value;
+                ///     }
+                ///     entries @0 :List(Entry);
+                /// }
+                ///
+                /// And we don't need to check that struct have this form here because we checked it before.
+                const auto & map_type = assert_cast<const DataTypeMap &>(*data_type);
+                DataTypes key_value_types = {map_type.getKeyType(), map_type.getValueType()};
+                Names key_value_names = {"key", "value"};
+                auto entries_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(key_value_types, key_value_names));
+
+                /// Nested column in Map is actually Array(Tuple), so we can output it according to "entries" field schema.
+                const auto & entries_column = assert_cast<const ColumnMap *>(column.get())->getNestedColumnPtr();
+
+                auto entries_field = nested_struct_schema.getFields()[0];
+                auto field_builder = initStructFieldBuilder(entries_column, row_num, struct_builder, entries_field);
+                auto entries_value = convertToDynamicValue(entries_column, entries_type, row_num, column_name, field_builder, enum_comparing_mode, temporary_text_data_storage);
+                if (entries_value)
+                    struct_builder.set(entries_field, *entries_value);
+            }
            else
            {
                /// It can be nested column from Nested type.
--- a/src/Processors/QueryPlan/QueryPlan.cpp
+++ b/src/Processors/QueryPlan/QueryPlan.cpp
@ -201,13 +201,6 @@ QueryPipelineBuilderPtr QueryPlan::buildQueryPipeline(
    last_pipeline->setProcessListElement(build_pipeline_settings.process_list_element);
    last_pipeline->addResources(std::move(resources));

-    /// This is related to parallel replicas.
-    /// Not to let the remote sources starve for CPU we create an
-    /// explicit dependency between processors which read from local replica
-    /// and ones that receive data from remote replicas and constantly answer
-    /// to coordination packets.
-    last_pipeline->connectDependencies();
-
    return last_pipeline;
 }

--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@ -19,7 +19,6 @@
 #include <Processors/Sources/NullSource.h>
 #include <Processors/Transforms/ExpressionTransform.h>
 #include <Processors/Transforms/FilterTransform.h>
-#include <Processors/Transforms/ReadFromMergeTreeDependencyTransform.h>
 #include <Processors/Transforms/ReverseTransform.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
@ -100,6 +99,7 @@ namespace ErrorCodes
    extern const int INDEX_NOT_USED;
    extern const int LOGICAL_ERROR;
    extern const int TOO_MANY_ROWS;
+    extern const int SUPPORT_IS_DISABLED;
 }

 static MergeTreeReaderSettings getMergeTreeReaderSettings(
@ -295,9 +295,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(

    /// We have a special logic for local replica. It has to read less data, because in some cases it should
    /// merge states of aggregate functions or do some other important stuff other than reading from Disk.
-    auto is_local_replica = context->getClientInfo().interface == ClientInfo::Interface::LOCAL;
-    if (!is_local_replica)
-        min_marks_for_concurrent_read = static_cast<size_t>(min_marks_for_concurrent_read * context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier);
+    min_marks_for_concurrent_read = static_cast<size_t>(min_marks_for_concurrent_read * context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier);

    auto pool = std::make_shared<MergeTreeReadPoolParallelReplicas>(
        storage_snapshot,
@ -334,16 +332,6 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
            source->addTotalRowsApprox(total_rows);

        pipes.emplace_back(std::move(source));
-
-        /// Add a special dependency transform which will be connected later with
-        /// all RemoteSources through a simple scheduler (ResizeProcessor)
-        if (context->getClientInfo().interface == ClientInfo::Interface::LOCAL)
-        {
-            pipes.back().addSimpleTransform([&](const Block & header) -> ProcessorPtr
-            {
-                return std::make_shared<ReadFromMergeTreeDependencyTransform>(header, context->getParallelReplicasGroupUUID());
-            });
-        }
    }

    return Pipe::unitePipes(std::move(pipes));
@ -749,12 +737,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
            .colums_to_read = column_names
        };

-        /// We have a special logic for local replica. It has to read less data, because in some cases it should
-        /// merge states of aggregate functions or do some other important stuff other than reading from Disk.
-        auto is_local_replica = context->getClientInfo().interface == ClientInfo::Interface::LOCAL;
        auto min_marks_for_concurrent_read = info.min_marks_for_concurrent_read;
-        if (!is_local_replica)
-            min_marks_for_concurrent_read = static_cast<size_t>(min_marks_for_concurrent_read * settings.parallel_replicas_single_task_marks_count_multiplier);
+        min_marks_for_concurrent_read = static_cast<size_t>(min_marks_for_concurrent_read * settings.parallel_replicas_single_task_marks_count_multiplier);

        pool = std::make_shared<MergeTreeInOrderReadPoolParallelReplicas>(
            parts_with_ranges,
@ -1553,18 +1537,10 @@ Pipe ReadFromMergeTree::spreadMarkRanges(
        column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()), column_names_to_read.end());
    }

-    /// Construct a proper coordinator
-    if (input_order_info && is_parallel_reading_from_replicas && context->getClientInfo().interface == ClientInfo::Interface::LOCAL)
-    {
-        assert(context->parallel_reading_coordinator);
-        auto mode = input_order_info->direction == 1 ? CoordinationMode::WithOrder : CoordinationMode::ReverseOrder;
-        context->parallel_reading_coordinator->setMode(mode);
-    }
-
    if (final)
    {
        if (is_parallel_reading_from_replicas)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Final modifier is not supported with parallel replicas");
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "FINAL modifier is not supported with parallel replicas");

        if (output_each_partition_through_separate_port)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Optimisation isn't supposed to be used for queries with final");
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@ -11,7 +11,6 @@
 #include <Processors/Sources/RemoteSource.h>
 #include <Processors/Sources/DelayedSource.h>
 #include <Processors/Transforms/ExpressionTransform.h>
-#include <Processors/Transforms/ReadFromMergeTreeDependencyTransform.h>
 #include <Interpreters/ActionsDAG.h>
 #include "Common/logger_useful.h"
 #include <Common/checkStackSize.h>
@ -270,7 +269,7 @@ void ReadFromRemote::initializePipeline(QueryPipelineBuilder & pipeline, const B

 ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
    ASTPtr query_ast_,
-    Cluster::ShardInfo shard_info_,
+    ClusterPtr cluster_,
    ParallelReplicasReadingCoordinatorPtr coordinator_,
    Block header_,
    QueryProcessingStage::Enum stage_,
@ -281,10 +280,9 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
    Scalars scalars_,
    Tables external_tables_,
    Poco::Logger * log_,
-    std::shared_ptr<const StorageLimitsList> storage_limits_,
-    UUID uuid_)
+    std::shared_ptr<const StorageLimitsList> storage_limits_)
    : ISourceStep(DataStream{.header = std::move(header_)})
-    , shard_info(shard_info_)
+    , cluster(cluster_)
    , query_ast(query_ast_)
    , coordinator(std::move(coordinator_))
    , stage(std::move(stage_))
@ -296,13 +294,11 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
    , external_tables{external_tables_}
    , storage_limits(std::move(storage_limits_))
    , log(log_)
-    , uuid(uuid_)
 {
    std::vector<String> description;

-    for (const auto & address : shard_info.all_addresses)
-        if (!address.is_local)
-            description.push_back(fmt::format("Replica: {}", address.host_name));
+    for (const auto & address : cluster->getShardsAddresses())
+        description.push_back(fmt::format("Replica: {}", address[0].host_name));

    setStepDescription(boost::algorithm::join(description, ", "));
 }
@ -324,42 +320,49 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
    auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);

    size_t all_replicas_count = current_settings.max_parallel_replicas;
-    if (all_replicas_count > shard_info.all_addresses.size())
+    if (all_replicas_count > cluster->getShardsInfo().size())
    {
        LOG_INFO(&Poco::Logger::get("ReadFromParallelRemoteReplicasStep"),
            "The number of replicas requested ({}) is bigger than the real number available in the cluster ({}). "\
-            "Will use the latter number to execute the query.", current_settings.max_parallel_replicas, shard_info.all_addresses.size());
-        all_replicas_count = shard_info.all_addresses.size();
+            "Will use the latter number to execute the query.", current_settings.max_parallel_replicas, cluster->getShardsInfo().size());
+        all_replicas_count = cluster->getShardsInfo().size();
    }

-    /// The requested number of replicas to read from could be less
-    /// than the total number of replicas in the shard
-    /// And we have to pick only "remote" ones
-    /// So, that's why this loop looks like this.
-    size_t replica_num = 0;
-    while (pipes.size() != all_replicas_count - 1)
+    /// Find local shard
+    for (const auto & shard: cluster->getShardsInfo())
    {
-        if (shard_info.all_addresses[replica_num].is_local)
+        if (shard.isLocal())
        {
-            ++replica_num;
+            IConnections::ReplicaInfo replica_info
+            {
+                .all_replicas_count = all_replicas_count,
+                .number_of_current_replica = 0
+            };
+
+            addPipeForSingeReplica(pipes, shard.pool, replica_info);
+        }
+    }
+
+    if (pipes.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No local shard");
+
+    auto current_shard = cluster->getShardsInfo().begin();
+    while (pipes.size() != all_replicas_count)
+    {
+        if (current_shard->isLocal())
+        {
+            ++current_shard;
            continue;
        }

        IConnections::ReplicaInfo replica_info
        {
            .all_replicas_count = all_replicas_count,
-            /// Replica 0 is threated as local always
-            .number_of_current_replica = pipes.size() + 1
+            .number_of_current_replica = pipes.size()
        };

-        auto pool = shard_info.per_replica_pools[replica_num];
-        assert(pool);
-
-        auto pool_with_failover = std::make_shared<ConnectionPoolWithFailover>(
-            ConnectionPoolPtrs{pool}, current_settings.load_balancing);
-
-        addPipeForSingeReplica(pipes, std::move(pool_with_failover), replica_info);
-        ++replica_num;
+        addPipeForSingeReplica(pipes, current_shard->pool, replica_info);
+        ++current_shard;
    }

    auto pipe = Pipe::unitePipes(std::move(pipes));
@ -396,7 +399,7 @@ void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(Pipes & pipes, s

    remote_query_executor->setLogger(log);

-    pipes.emplace_back(createRemoteSourcePipe(std::move(remote_query_executor), add_agg_info, add_totals, add_extremes, async_read, uuid));
+    pipes.emplace_back(createRemoteSourcePipe(std::move(remote_query_executor), add_agg_info, add_totals, add_extremes, async_read));
    addConvertingActions(pipes.back(), output_stream->header);
 }

--- a/src/Processors/QueryPlan/ReadFromRemote.h
+++ b/src/Processors/QueryPlan/ReadFromRemote.h
@ -66,7 +66,7 @@ class ReadFromParallelRemoteReplicasStep : public ISourceStep
 public:
    ReadFromParallelRemoteReplicasStep(
        ASTPtr query_ast_,
-        Cluster::ShardInfo shard_info,
+        ClusterPtr cluster_,
        ParallelReplicasReadingCoordinatorPtr coordinator_,
        Block header_,
        QueryProcessingStage::Enum stage_,
@ -77,8 +77,7 @@ public:
        Scalars scalars_,
        Tables external_tables_,
        Poco::Logger * log_,
-        std::shared_ptr<const StorageLimitsList> storage_limits_,
-        UUID uuid);
+        std::shared_ptr<const StorageLimitsList> storage_limits_);

    String getName() const override { return "ReadFromRemoteParallelReplicas"; }

@ -91,7 +90,7 @@ private:

    void addPipeForSingeReplica(Pipes & pipes, std::shared_ptr<ConnectionPoolWithFailover> pool, IConnections::ReplicaInfo replica_info);

-    Cluster::ShardInfo shard_info;
+    ClusterPtr cluster;
    ASTPtr query_ast;
    ParallelReplicasReadingCoordinatorPtr coordinator;
    QueryProcessingStage::Enum stage;
@ -101,10 +100,8 @@ private:
    ThrottlerPtr throttler;
    Scalars scalars;
    Tables external_tables;
-
    std::shared_ptr<const StorageLimitsList> storage_limits;
    Poco::Logger * log;
-    UUID uuid;
 };

 }
--- a/src/Processors/Sources/RemoteSource.cpp
+++ b/src/Processors/Sources/RemoteSource.cpp
@ -14,11 +14,10 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

-RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_, bool async_read_, UUID uuid_)
+RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_, bool async_read_)
    : ISource(executor->getHeader(), false)
    , add_aggregation_info(add_aggregation_info_), query_executor(std::move(executor))
    , async_read(async_read_)
-    , uuid(uuid_)
 {
    /// Add AggregatedChunkInfo if we expect DataTypeAggregateFunction as a result.
    const auto & sample = getPort().getHeader();
@ -29,18 +28,6 @@ RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation

 RemoteSource::~RemoteSource() = default;

-void RemoteSource::connectToScheduler(InputPort & input_port)
-{
-    outputs.emplace_back(Block{}, this);
-    dependency_port = &outputs.back();
-    connect(*dependency_port, input_port);
-}
-
-UUID RemoteSource::getParallelReplicasGroupUUID()
-{
-    return uuid;
-}
-
 void RemoteSource::setStorageLimits(const std::shared_ptr<const StorageLimitsList> & storage_limits_)
 {
    /// Remove leaf limits for remote source.
@ -69,21 +56,10 @@ ISource::Status RemoteSource::prepare()
    if (status == Status::Finished)
    {
        query_executor->finish(&read_context);
-        if (dependency_port)
-            dependency_port->finish();
        is_async_state = false;
-
        return status;
    }

-    if (status == Status::PortFull || status == Status::Ready)
-    {
-        /// Also push empty chunk to dependency to signal that we read data from remote source
-        /// or answered to the incoming request from parallel replica
-        if (dependency_port && !dependency_port->isFinished() && dependency_port->canPush())
-            dependency_port->push(Chunk());
-    }
-
    return status;
 }

@ -231,9 +207,9 @@ Chunk RemoteExtremesSource::generate()

 Pipe createRemoteSourcePipe(
    RemoteQueryExecutorPtr query_executor,
-    bool add_aggregation_info, bool add_totals, bool add_extremes, bool async_read, UUID uuid)
+    bool add_aggregation_info, bool add_totals, bool add_extremes, bool async_read)
 {
-    Pipe pipe(std::make_shared<RemoteSource>(query_executor, add_aggregation_info, async_read, uuid));
+    Pipe pipe(std::make_shared<RemoteSource>(query_executor, add_aggregation_info, async_read));

    if (add_totals)
        pipe.addTotalsSource(std::make_shared<RemoteTotalsSource>(query_executor));
--- a/src/Processors/Sources/RemoteSource.h
+++ b/src/Processors/Sources/RemoteSource.h
@ -21,18 +21,14 @@ public:
    /// Flag add_aggregation_info tells if AggregatedChunkInfo should be added to result chunk.
    /// AggregatedChunkInfo stores the bucket number used for two-level aggregation.
    /// This flag should be typically enabled for queries with GROUP BY which are executed till WithMergeableState.
-    RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_, bool async_read_, UUID uuid = UUIDHelpers::Nil);
+    RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_, bool async_read_);
    ~RemoteSource() override;

    Status prepare() override;
    String getName() const override { return "Remote"; }

-    void connectToScheduler(InputPort & input_port);
-
    void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit.swap(counter); }

-    UUID getParallelReplicasGroupUUID();
-
    /// Stop reading from stream if output port is finished.
    void onUpdatePorts() override;

@ -51,12 +47,10 @@ private:
    RemoteQueryExecutorPtr query_executor;
    RowsBeforeLimitCounterPtr rows_before_limit;

-    OutputPort * dependency_port{nullptr};
-
    const bool async_read;
    bool is_async_state = false;
    std::unique_ptr<RemoteQueryExecutorReadContext> read_context;
-    UUID uuid;
+
    int fd = -1;
    size_t rows = 0;
    bool manually_add_rows_before_limit_counter = false;
@ -97,6 +91,6 @@ private:
 /// Create pipe with remote sources.
 Pipe createRemoteSourcePipe(
    RemoteQueryExecutorPtr query_executor,
-    bool add_aggregation_info, bool add_totals, bool add_extremes, bool async_read, UUID uuid = UUIDHelpers::Nil);
+    bool add_aggregation_info, bool add_totals, bool add_extremes, bool async_read);

 }
--- a/src/Processors/Transforms/ReadFromMergeTreeDependencyTransform.cpp
+++ b/src/Processors/Transforms/ReadFromMergeTreeDependencyTransform.cpp
@ -1,103 +0,0 @@
-#include <Processors/Transforms/ReadFromMergeTreeDependencyTransform.h>
-
-#include <QueryPipeline/RemoteQueryExecutor.h>
-#include "Processors/Port.h"
-
-namespace DB
-{
-
-ReadFromMergeTreeDependencyTransform::ReadFromMergeTreeDependencyTransform(const Block & header, UUID uuid_)
-    : IProcessor(InputPorts(1, header), OutputPorts(1, header))
-    , uuid(uuid_)
-    , data_port(&inputs.front())
-{
-}
-
-void ReadFromMergeTreeDependencyTransform::connectToScheduler(OutputPort & output_port)
-{
-    inputs.emplace_back(Block{}, this);
-    dependency_port = &inputs.back();
-    connect(output_port, *dependency_port);
-}
-
-UUID ReadFromMergeTreeDependencyTransform::getParallelReplicasGroupUUID()
-{
-    return uuid;
-}
-
-IProcessor::Status ReadFromMergeTreeDependencyTransform::prepare()
-{
-    Status status = Status::Ready;
-
-    while (status == Status::Ready)
-    {
-        status = !has_data ? prepareConsume()
-                           : prepareGenerate();
-    }
-
-    return status;
-}
-
-IProcessor::Status ReadFromMergeTreeDependencyTransform::prepareConsume()
-{
-    auto & output_port = getOutputPort();
-
-    /// Check all outputs are finished or ready to get data.
-    if (output_port.isFinished())
-    {
-        data_port->close();
-        dependency_port->close();
-        return Status::Finished;
-    }
-
-    /// Try get chunk from input.
-    if (data_port->isFinished())
-    {
-        if (dependency_port->hasData())
-            dependency_port->pull(true);
-        dependency_port->close();
-        output_port.finish();
-        return Status::Finished;
-    }
-
-    if (!dependency_port->isFinished())
-    {
-        dependency_port->setNeeded();
-        if (!dependency_port->hasData())
-            return Status::NeedData;
-    }
-
-    data_port->setNeeded();
-    if (!data_port->hasData())
-        return Status::NeedData;
-
-    if (!dependency_port->isFinished())
-        dependency_port->pull();
-
-    chunk = data_port->pull();
-    has_data = true;
-
-    return Status::Ready;
-}
-
-IProcessor::Status ReadFromMergeTreeDependencyTransform::prepareGenerate()
-{
-    auto & output_port = getOutputPort();
-    if (!output_port.isFinished() && output_port.canPush())
-    {
-        output_port.push(std::move(chunk));
-        has_data = false;
-        return Status::Ready;
-    }
-
-    if (output_port.isFinished())
-    {
-        data_port->close();
-        dependency_port->close();
-        return Status::Finished;
-    }
-
-    return Status::PortFull;
-}
-
-}
--- a/src/Processors/Transforms/ReadFromMergeTreeDependencyTransform.h
+++ b/src/Processors/Transforms/ReadFromMergeTreeDependencyTransform.h
@ -1,48 +0,0 @@
-#pragma once
-#include <Processors/IProcessor.h>
-
-namespace DB
-{
-
-class RemoteQueryExecutor;
-using RemoteQueryExecutorPtr = std::shared_ptr<RemoteQueryExecutor>;
-
-/// A tiny class which is used for reading with multiple replicas in parallel.
-/// Motivation is that we don't have a full control on how
-/// processors are scheduled across threads and there could be a situation
-/// when all available threads will read from local replica and will just
-/// forget about remote replicas existence. That is not what we want.
-/// For parallel replicas we have to constantly answer to incoming requests
-/// with a set of marks to read.
-/// With the help of this class, we explicitly connect a "local" source with
-/// all the remote ones. And thus achieve fairness somehow.
-class ReadFromMergeTreeDependencyTransform : public IProcessor
-{
-public:
-    ReadFromMergeTreeDependencyTransform(const Block & header, UUID uuid_);
-
-    String getName() const override { return "ReadFromMergeTreeDependency"; }
-    Status prepare() override;
-
-    InputPort & getInputPort() { assert(data_port); return *data_port; }
-    InputPort & getDependencyPort() { assert(dependency_port); return *dependency_port; }
-    OutputPort & getOutputPort() { return outputs.front(); }
-
-    UUID getParallelReplicasGroupUUID();
-
-    void connectToScheduler(OutputPort & output_port);
-private:
-    bool has_data{false};
-    Chunk chunk;
-
-    UUID uuid;
-
-    InputPort * data_port{nullptr};
-    InputPort * dependency_port{nullptr};
-
-    Status prepareGenerate();
-    Status prepareConsume();
-};
-
-
-}
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@ -1981,7 +1981,7 @@ struct WindowFunctionNtile final : public WindowFunction
            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one parameter", name_);
        }
        auto type_id = argument_types[0]->getTypeId();
-        if (type_id != TypeIndex::UInt8 && type_id != TypeIndex::UInt16 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::UInt64)
+        if (type_id != TypeIndex::UInt8 && type_id != TypeIndex::UInt16 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::UInt64)
        {
            throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's argument type must be an unsigned integer (not larger then 64-bit), but got {}", argument_types[0]->getName());
        }
--- a/src/QueryPipeline/QueryPipelineBuilder.cpp
+++ b/src/QueryPipeline/QueryPipelineBuilder.cpp
@ -27,7 +27,6 @@
 #include <Processors/Transforms/MergeJoinTransform.h>
 #include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
 #include <Processors/Transforms/PartialSortingTransform.h>
-#include <Processors/Transforms/ReadFromMergeTreeDependencyTransform.h>
 #include <Processors/Transforms/TotalsHavingTransform.h>
 #include <QueryPipeline/narrowPipe.h>

@ -621,65 +620,6 @@ void QueryPipelineBuilder::setProgressCallback(ProgressCallback callback)
    progress_callback = callback;
 }

-void QueryPipelineBuilder::connectDependencies()
-{
-    /**
-    * This is needed because among all RemoteSources there could be
-    * one or several that don't belong to the parallel replicas reading process.
-    * It could happen for example if we read through distributed table + prefer_localhost_replica=1 + parallel replicas
-    * SELECT * FROM remote('127.0.0.{1,2}', table.merge_tree)
-    * Will generate a local pipeline and a remote source. For local pipeline because of parallel replicas we will create
-    * several processors to read and several remote sources.
-    */
-    std::set<UUID> all_parallel_replicas_groups;
-    for (auto & processor : *pipe.getProcessorsPtr())
-    {
-        if (auto * remote_dependency = typeid_cast<RemoteSource *>(processor.get()); remote_dependency)
-            if (auto uuid = remote_dependency->getParallelReplicasGroupUUID(); uuid != UUIDHelpers::Nil)
-                all_parallel_replicas_groups.insert(uuid);
-        if (auto * merge_tree_dependency = typeid_cast<ReadFromMergeTreeDependencyTransform *>(processor.get()); merge_tree_dependency)
-            if (auto uuid = merge_tree_dependency->getParallelReplicasGroupUUID(); uuid != UUIDHelpers::Nil)
-                all_parallel_replicas_groups.insert(uuid);
-    }
-
-    for (const auto & group_id : all_parallel_replicas_groups)
-    {
-        std::vector<RemoteSource *> input_dependencies;
-        std::vector<ReadFromMergeTreeDependencyTransform *> output_dependencies;
-
-        for (auto & processor : *pipe.getProcessorsPtr())
-        {
-            if (auto * remote_dependency = typeid_cast<RemoteSource *>(processor.get()); remote_dependency)
-                if (auto uuid = remote_dependency->getParallelReplicasGroupUUID(); uuid == group_id)
-                    input_dependencies.emplace_back(remote_dependency);
-            if (auto * merge_tree_dependency = typeid_cast<ReadFromMergeTreeDependencyTransform *>(processor.get()); merge_tree_dependency)
-                if (auto uuid = merge_tree_dependency->getParallelReplicasGroupUUID(); uuid == group_id)
-                    output_dependencies.emplace_back(merge_tree_dependency);
-        }
-
-        if (input_dependencies.empty() || output_dependencies.empty())
-            continue;
-
-        auto input_dependency_iter = input_dependencies.begin();
-        auto output_dependency_iter = output_dependencies.begin();
-        auto scheduler = std::make_shared<ResizeProcessor>(Block{}, input_dependencies.size(), output_dependencies.size());
-
-        for (auto & scheduler_input : scheduler->getInputs())
-        {
-            (*input_dependency_iter)->connectToScheduler(scheduler_input);
-            ++input_dependency_iter;
-        }
-
-        for (auto & scheduler_output : scheduler->getOutputs())
-        {
-            (*output_dependency_iter)->connectToScheduler(scheduler_output);
-            ++output_dependency_iter;
-        }
-
-        pipe.getProcessorsPtr()->emplace_back(std::move(scheduler));
-    }
-}
-
 PipelineExecutorPtr QueryPipelineBuilder::execute()
 {
    if (!isCompleted())
--- a/src/QueryPipeline/QueryPipelineBuilder.h
+++ b/src/QueryPipeline/QueryPipelineBuilder.h
@ -140,12 +140,6 @@ public:

    void addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, ContextPtr context);

-    /// Finds all processors for reading from MergeTree
-    /// And explicitly connects them with all RemoteSources
-    /// using a ResizeProcessor. This is needed not to let
-    /// the RemoteSource to starve for CPU time
-    void connectDependencies();
-
    PipelineExecutorPtr execute();

    size_t getNumStreams() const { return pipe.numOutputPorts(); }
--- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
+++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp
@ -8,13 +8,6 @@
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromFile.h>

-namespace
-{
-
-namespace fs = std::filesystem;
-
-}
-
 namespace CurrentMetrics
 {
    extern const Metric DistributedSend;
@ -140,7 +133,7 @@ void DistributedAsyncInsertBatch::send()
    total_bytes = 0;
    recovered = false;

-    fs::resize_file(parent.current_batch_file_path, 0);
+    std::filesystem::resize_file(parent.current_batch_file_path, 0);
 }

 void DistributedAsyncInsertBatch::serialize()
@ -149,7 +142,7 @@ void DistributedAsyncInsertBatch::serialize()
    String tmp_file{parent.current_batch_file_path + ".tmp"};

    auto dir_sync_guard = parent.getDirectorySyncGuard(parent.relative_path);
-    if (fs::exists(tmp_file))
+    if (std::filesystem::exists(tmp_file))
        LOG_ERROR(parent.log, "Temporary file {} exists. Unclean shutdown?", backQuote(tmp_file));

    {
@ -161,7 +154,7 @@ void DistributedAsyncInsertBatch::serialize()
            out.sync();
    }

-    fs::rename(tmp_file, parent.current_batch_file_path);
+    std::filesystem::rename(tmp_file, parent.current_batch_file_path);
 }

 void DistributedAsyncInsertBatch::deserialize()
@ -174,7 +167,7 @@ void DistributedAsyncInsertBatch::writeText(WriteBuffer & out)
 {
    for (const auto & file : files)
    {
-        UInt64 file_index = parse<UInt64>(fs::path(file).stem());
+        UInt64 file_index = parse<UInt64>(std::filesystem::path(file).stem());
        out << file_index << '\n';
    }
 }
@ -185,7 +178,7 @@ void DistributedAsyncInsertBatch::readText(ReadBuffer & in)
    {
        UInt64 idx;
        in >> idx >> "\n";
-        files.push_back(fs::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string());
+        files.push_back(std::filesystem::absolute(fmt::format("{}/{}.bin", parent.path, idx)).string());
    }

    recovered = true;
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
@ -5,6 +5,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
 #include <Common/logger_useful.h>
+#include <Interpreters/Context.h>
 #include <Storages/MergeTree/localBackup.h>
 #include <Backups/BackupEntryFromSmallFile.h>
 #include <Backups/BackupEntryFromImmutableFile.h>
@ -311,6 +312,7 @@ DataPartStorageOnDiskBase::getReplicatedFilesDescriptionForRemoteDisk(const Name
 }

 void DataPartStorageOnDiskBase::backup(
+    const ReadSettings & read_settings,
    const MergeTreeDataPartChecksums & checksums,
    const NameSet & files_without_checksums,
    const String & path_in_backup,
@ -386,7 +388,7 @@ void DataPartStorageOnDiskBase::backup(

        backup_entries.emplace_back(
            filepath_in_backup,
-            std::make_unique<BackupEntryFromImmutableFile>(disk, filepath_on_disk, file_size, file_hash, temp_dir_owner));
+            std::make_unique<BackupEntryFromImmutableFile>(disk, filepath_on_disk, read_settings, file_size, file_hash, temp_dir_owner));
    }
 }

--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h
@ -49,6 +49,7 @@ public:
    ReplicatedFilesDescription getReplicatedFilesDescriptionForRemoteDisk(const NameSet & file_names) const override;

    void backup(
+        const ReadSettings & read_settings,
        const MergeTreeDataPartChecksums & checksums,
        const NameSet & files_without_checksums,
        const String & path_in_backup,
--- a/src/Storages/MergeTree/IDataPartStorage.h
+++ b/src/Storages/MergeTree/IDataPartStorage.h
@ -197,6 +197,7 @@ public:
    /// Also creates a new tmp_dir for internal disk (if disk is mentioned the first time).
    using TemporaryFilesOnDisks = std::map<DiskPtr, std::shared_ptr<TemporaryFileOnDisk>>;
    virtual void backup(
+        const ReadSettings & read_settings,
        const MergeTreeDataPartChecksums & checksums,
        const NameSet & files_without_checksums,
        const String & path_in_backup,
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -4869,24 +4869,12 @@ Pipe MergeTreeData::alterPartition(
 }


-void MergeTreeData::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & partitions)
-{
-    auto local_context = backup_entries_collector.getContext();
-
-    DataPartsVector data_parts;
-    if (partitions)
-        data_parts = getVisibleDataPartsVectorInPartitions(local_context, getPartitionIDsFromQuery(*partitions, local_context));
-    else
-        data_parts = getVisibleDataPartsVector(local_context);
-
-    backup_entries_collector.addBackupEntries(backupParts(data_parts, data_path_in_backup, local_context));
-}
-
 BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, const String & data_path_in_backup, const ContextPtr & local_context)
 {
    BackupEntries backup_entries;
    std::map<DiskPtr, std::shared_ptr<TemporaryFileOnDisk>> temp_dirs;
    TableLockHolder table_lock;
+    ReadSettings read_settings = local_context->getBackupReadSettings();

    for (const auto & part : data_parts)
    {
@ -4916,6 +4904,7 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con

        BackupEntries backup_entries_from_part;
        part->getDataPartStorage().backup(
+            read_settings,
            part->checksums,
            part->getFileNamesWithoutChecksums(),
            data_path_in_backup,
@ -4927,6 +4916,7 @@ BackupEntries MergeTreeData::backupParts(const DataPartsVector & data_parts, con
        for (const auto & [projection_name, projection_part] : projection_parts)
        {
            projection_part->getDataPartStorage().backup(
+                read_settings,
                projection_part->checksums,
                projection_part->getFileNamesWithoutChecksums(),
                fs::path{data_path_in_backup} / part->name,
@ -6939,8 +6929,7 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage(
    if (query_context->getClientInfo().collaborate_with_initiator)
        return QueryProcessingStage::Enum::FetchColumns;

-    if (query_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas
-        && !query_context->getClientInfo().collaborate_with_initiator
+    if (query_context->canUseParallelReplicasOnInitiator()
        && to_stage >= QueryProcessingStage::WithMergeableState)
        return QueryProcessingStage::Enum::WithMergeableState;

--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@ -751,9 +751,6 @@ public:
        ContextPtr context,
        TableLockHolder & table_lock_holder);

-    /// Makes backup entries to backup the data of the storage.
-    void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;
-
    /// Extract data from the backup and put it to the storage.
    void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional<ASTs> & partitions) override;

--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@ -9,6 +9,7 @@

 #include <consistent_hashing.h>

+#include "Common/Exception.h"
 #include <Common/logger_useful.h>
 #include <Common/SipHash.h>
 #include <Common/thread_local_rng.h>
@ -47,7 +48,6 @@ public:
    }

    Stats stats;
-    std::mutex mutex;
    size_t replicas_count;

    explicit ImplInterface(size_t replicas_count_)
@ -220,9 +220,11 @@ void DefaultCoordinator::finalizeReadingState()

 void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement)
 {
-    std::lock_guard lock(mutex);
-
    updateReadingState(announcement);
+
+    if (announcement.replica_num >= stats.size())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Replica number ({}) is bigger than total replicas count ({})", announcement.replica_num, stats.size());
+
    stats[announcement.replica_num].number_of_requests +=1;

    ++sent_initial_requests;
@ -282,8 +284,6 @@ void DefaultCoordinator::selectPartsAndRanges(const PartRefs & container, size_t

 ParallelReadResponse DefaultCoordinator::handleRequest(ParallelReadRequest request)
 {
-    std::lock_guard lock(mutex);
-
    LOG_TRACE(log, "Handling request from replica {}, minimal marks size is {}", request.replica_num, request.min_number_of_marks);

    size_t current_mark_size = 0;
@ -349,7 +349,6 @@ public:
 template <CoordinationMode mode>
 void InOrderCoordinator<mode>::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement)
 {
-    std::lock_guard lock(mutex);
    LOG_TRACE(log, "Received an announecement {}", announcement.describe());

    /// To get rid of duplicates
@ -387,8 +386,6 @@ void InOrderCoordinator<mode>::handleInitialAllRangesAnnouncement(InitialAllRang
 template <CoordinationMode mode>
 ParallelReadResponse InOrderCoordinator<mode>::handleRequest(ParallelReadRequest request)
 {
-    std::lock_guard lock(mutex);
-
    if (request.mode != mode)
        throw Exception(ErrorCodes::LOGICAL_ERROR,
            "Replica {} decided to read in {} mode, not in {}. This is a bug",
@ -479,16 +476,27 @@ ParallelReadResponse InOrderCoordinator<mode>::handleRequest(ParallelReadRequest

 void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement)
 {
+    std::lock_guard lock(mutex);
+
    if (!pimpl)
+    {
+        setMode(announcement.mode);
        initialize();
+    }
+

    return pimpl->handleInitialAllRangesAnnouncement(announcement);
 }

 ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelReadRequest request)
 {
+    std::lock_guard lock(mutex);
+
    if (!pimpl)
+    {
+        setMode(request.mode);
        initialize();
+    }

    return pimpl->handleRequest(std::move(request));
 }
--- a/Show More
+++ b/Show More