Merge branch 'master' into concurrent_hash_join_cache_ht_sizes

2024-11-22 23:52:03 +00:00 · 2024-07-09 13:14:39 +02:00 · 2024-07-09 13:14:39 +02:00 · c3c5778384
commit c3c5778384
parent fd3b809436 186e690fa6
91 changed files with 2702 additions and 1187 deletions
--- a/README.md
+++ b/README.md
@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh

 Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.

-* [v24.6 Community Call](https://clickhouse.com/company/events/v24-6-community-release-call) - Jul 2
+* [v24.7 Community Call](https://clickhouse.com/company/events/v24-7-community-release-call) - Jul 30

 ## Upcoming Events

--- a/base/poco/Net/include/Poco/Net/NameValueCollection.h
+++ b/base/poco/Net/include/Poco/Net/NameValueCollection.h
@ -79,7 +79,7 @@ namespace Net
        /// Returns the value of the first name-value pair with the given name.
        /// If no value with the given name has been found, the defaultValue is returned.

-        const std::vector<std::reference_wrapper<const std::string>> getAll(const std::string & name) const;
+        std::vector<std::string> getAll(const std::string & name) const;
        /// Returns all values of all name-value pairs with the given name.
        ///
        /// Returns an empty vector if there are no name-value pairs with the given name.
--- a/base/poco/Net/src/NameValueCollection.cpp
+++ b/base/poco/Net/src/NameValueCollection.cpp
@ -102,9 +102,9 @@ const std::string& NameValueCollection::get(const std::string& name, const std::
 		return defaultValue;
 }

-const std::vector<std::reference_wrapper<const std::string>> NameValueCollection::getAll(const std::string& name) const
+std::vector<std::string> NameValueCollection::getAll(const std::string& name) const
 {
-    std::vector<std::reference_wrapper<const std::string>> values;
+    std::vector<std::string> values;
    for (ConstIterator it = _map.find(name); it != _map.end(); it++)
        if (it->first == name)
            values.push_back(it->second);
--- a/contrib/avro
+++ b/contrib/avro
@ -1 +1 @@
-Subproject commit d43acc84d3d455b016f847d6666fbc3cd27f16a9
+Subproject commit 545e7002683cbc2198164d93088ac8e4955b4628
--- a/contrib/libpq-cmake/CMakeLists.txt
+++ b/contrib/libpq-cmake/CMakeLists.txt
@ -54,7 +54,6 @@ set(SRCS
        "${LIBPQ_SOURCE_DIR}/port/pgstrcasecmp.c"
        "${LIBPQ_SOURCE_DIR}/port/thread.c"
        "${LIBPQ_SOURCE_DIR}/port/path.c"
-        "${LIBPQ_SOURCE_DIR}/port/explicit_bzero.c"
        )

 add_library(_libpq ${SRCS})
--- a/contrib/rocksdb
+++ b/contrib/rocksdb
@ -1 +1 @@
-Subproject commit 3a0b80ca9d6eebb38fad7ea3f41dfc9db4f6a984
+Subproject commit 078fa5638690004e1f744076d1bdcc4e93767304
--- a/contrib/rocksdb-cmake/CMakeLists.txt
+++ b/contrib/rocksdb-cmake/CMakeLists.txt
@ -1,7 +1,7 @@
-option (ENABLE_ROCKSDB "Enable rocksdb library" ${ENABLE_LIBRARIES})
+option (ENABLE_ROCKSDB "Enable RocksDB" ${ENABLE_LIBRARIES})

 if (NOT ENABLE_ROCKSDB)
-  message (STATUS "Not using rocksdb")
+  message (STATUS "Not using RocksDB")
  return()
 endif()

--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -14,14 +14,13 @@ MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME))
 # NOTE: that clickhouse-test will randomize session_timezone by itself as well
 # (it will choose between default server timezone and something specific).
 TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab  | awk '{print $3}' | shuf | head -n1)"
-echo "Choosen random timezone $TZ"
+echo "Chosen random timezone $TZ"
 ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone

 dpkg -i package_folder/clickhouse-common-static_*.deb
 dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
-# Accept failure in the next two commands until 24.4 is released (for compatibility and Bugfix validation run)
-dpkg -i package_folder/clickhouse-odbc-bridge_*.deb || true
-dpkg -i package_folder/clickhouse-library-bridge_*.deb || true
+dpkg -i package_folder/clickhouse-odbc-bridge_*.deb
+dpkg -i package_folder/clickhouse-library-bridge_*.deb
 dpkg -i package_folder/clickhouse-server_*.deb
 dpkg -i package_folder/clickhouse-client_*.deb

@ -58,12 +57,6 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th
    rm /etc/clickhouse-server/users.d/s3_cache_new.xml
    rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml

-    #todo: remove these after 24.3 released.
-    sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml
-
-    #todo: remove these after 24.3 released.
-    sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml
-
    function remove_keeper_config()
    {
        sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -1170,6 +1170,10 @@ Data in the VALUES clause of INSERT queries is processed by a separate stream pa

 Default value: 262144 (= 256 KiB).

+:::note
+`max_query_size` cannot be set within an SQL query (e.g., `SELECT now() SETTINGS max_query_size=10000`) because ClickHouse needs to allocate a buffer to parse the query, and this buffer size is determined by the `max_query_size` setting, which must be configured before the query is executed.
+:::
+
 ## max_parser_depth {#max_parser_depth}

 Limits maximum recursion depth in the recursive descent parser. Allows controlling the stack size.
--- a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md
@ -5,23 +5,45 @@ sidebar_position: 165

 # maxMap

-Syntax: `maxMap(key, value)` or `maxMap(Tuple(key, value))`
-
 Calculates the maximum from `value` array according to the keys specified in the `key` array.

-Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
+**Syntax**

-The number of elements in `key` and `value` must be the same for each row that is totaled.
+```sql
+maxMap(key, value)
+```
+or
+```sql
+maxMap(Tuple(key, value))
+```

-Returns a tuple of two arrays: keys and values calculated for the corresponding keys.
+Alias: `maxMappedArrays`

-Example:
+:::note
+- Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
+- The number of elements in `key` and `value` must be the same for each row that is totaled.
+:::
+
+**Parameters**
+
+- `key` — Array of keys. [Array](../../data-types/array.md).
+- `value` — Array of values. [Array](../../data-types/array.md).
+
+**Returned value**
+
+- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)).
+
+**Example**
+
+Query:

 ``` sql
 SELECT maxMap(a, b)
 FROM values('a Array(Char), b Array(Int64)', (['x', 'y'], [2, 2]), (['y', 'z'], [3, 1]))
 ```

+Result:
+
 ``` text
 ┌─maxMap(a, b)───────────┐
 │ [['x','y','z'],[2,3,1]]│
--- a/docs/en/sql-reference/aggregate-functions/reference/minmap.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/minmap.md
@ -5,23 +5,45 @@ sidebar_position: 169

 # minMap

-Syntax: `minMap(key, value)` or `minMap(Tuple(key, value))`
-
 Calculates the minimum from `value` array according to the keys specified in the `key` array.

-Passing a tuple of keys and value arrays is identical to passing two arrays of keys and values.
+**Syntax**

-The number of elements in `key` and `value` must be the same for each row that is totaled.
+```sql
+`minMap(key, value)`
+```
+or
+```sql
+minMap(Tuple(key, value))
+```

-Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys.
+Alias: `minMappedArrays`

-Example:
+:::note
+- Passing a tuple of keys and value arrays is identical to passing an array of keys and an array of values.
+- The number of elements in `key` and `value` must be the same for each row that is totaled.
+:::
+
+**Parameters**
+
+- `key` — Array of keys. [Array](../../data-types/array.md).
+- `value` — Array of values. [Array](../../data-types/array.md).
+
+**Returned value**
+
+- Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. [Tuple](../../data-types/tuple.md)([Array](../../data-types/array.md), [Array](../../data-types/array.md)).
+
+**Example**
+
+Query:

 ``` sql
 SELECT minMap(a, b)
 FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1]))
 ```

+Result:
+
 ``` text
 ┌─minMap(a, b)──────┐
 │ ([1,2,3],[2,1,1]) │
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@ -12,9 +12,7 @@ Functions for [searching](string-search-functions.md) in strings and for [replac

 ## empty

-Checks whether the input string is empty.
-
-A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte.
+Checks whether the input string is empty. A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte.

 The function is also available for [arrays](array-functions.md#function-empty) and [UUIDs](uuid-functions.md#empty).

@ -48,9 +46,7 @@ Result:

 ## notEmpty

-Checks whether the input string is non-empty.
-
-A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte.
+Checks whether the input string is non-empty. A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte.

 The function is also available for [arrays](array-functions.md#function-notempty) and [UUIDs](uuid-functions.md#notempty).

@ -96,7 +92,7 @@ length(s)

 **Parameters**

- `s`: An input string or array. [String](../data-types/string)/[Array](../data-types/array).
+- `s` — An input string or array. [String](../data-types/string)/[Array](../data-types/array).

 **Returned value**

@ -149,7 +145,7 @@ lengthUTF8(s)

 **Parameters**

- `s`: String containing valid UTF-8 encoded text. [String](../data-types/string).
+- `s` — String containing valid UTF-8 encoded text. [String](../data-types/string).

 **Returned value**

@ -183,8 +179,8 @@ left(s, offset)

 **Parameters**

- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
+- `s` — The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint).

 **Returned value**

@ -230,8 +226,8 @@ leftUTF8(s, offset)

 **Parameters**

- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
+- `s` — The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint).

 **Returned value**

@ -347,8 +343,8 @@ right(s, offset)

 **Parameters**

- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
+- `s` — The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint).

 **Returned value**

@ -394,8 +390,8 @@ rightUTF8(s, offset)

 **Parameters**

- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
+- `s` — The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `offset` — The number of bytes of the offset. [UInt*](../data-types/int-uint).

 **Returned value**

@ -547,7 +543,7 @@ Alias: `ucase`

 **Parameters**

- `input`: A string type [String](../data-types/string.md).
+- `input` — A string type [String](../data-types/string.md).

 **Returned value**

@ -571,16 +567,47 @@ SELECT upper('clickhouse');

 Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.

-Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
+:::note
+Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point (such as `ẞ` and `ß`), the result may be incorrect for this code point.
+:::

-If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
+**Syntax**
+
+```sql
+lowerUTF8(input)
+```
+
+**Parameters**
+
+- `input` — A string type [String](../data-types/string.md).
+
+**Returned value**
+
+- A [String](../data-types/string.md) data type value.
+
+**Example**
+
+Query:
+
+``` sql
+SELECT lowerUTF8('MÜNCHEN') as Lowerutf8;
+```
+
+Result:
+
+``` response
+┌─Lowerutf8─┐
+│ münchen   │
+└───────────┘
+```

 ## upperUTF8

 Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.

-
-If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
+:::note
+Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). If the length of the UTF-8 byte sequence is different for upper and lower case of a code point (such as `ẞ` and `ß`), the result may be incorrect for this code point.
+:::

 **Syntax**

@ -590,7 +617,7 @@ upperUTF8(input)

 **Parameters**

- `input`: A string type [String](../data-types/string.md).
+- `input` — A string type [String](../data-types/string.md).

 **Returned value**

@ -604,6 +631,8 @@ Query:
 SELECT upperUTF8('München') as Upperutf8;
 ```

+Result:
+
 ``` response
 ┌─Upperutf8─┐
 │ MÜNCHEN   │
@ -614,6 +643,34 @@ SELECT upperUTF8('München') as Upperutf8;

 Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0.

+**Syntax**
+
+``` sql
+isValidUTF8(input)
+```
+
+**Parameters**
+
+- `input` — A string type [String](../data-types/string.md).
+
+**Returned value**
+
+- Returns `1`, if the set of bytes constitutes valid UTF-8-encoded text, otherwise `0`.
+
+Query:
+
+``` sql
+SELECT isValidUTF8('\xc3\xb1') AS valid, isValidUTF8('\xc3\x28') AS invalid;
+```
+
+Result:
+
+``` response
+┌─valid─┬─invalid─┐
+│     1 │       0 │
+└───────┴─────────┘
+```
+
 ## toValidUTF8

 Replaces invalid UTF-8 characters by the `<60>` (U+FFFD) character. All running in a row invalid characters are collapsed into the one replacement character.
@ -883,7 +940,7 @@ Returns the substring of a string `s` which starts at the specified byte index `
 substring(s, offset[, length])
 ```

-Alias:
+Aliases:
 - `substr`
 - `mid`
 - `byteSlice`
@ -926,9 +983,9 @@ substringUTF8(s, offset[, length])

 **Arguments**

- `s`: The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md)
- `offset`: The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md).
- `length`: The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional.
+- `s` — The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md)
+- `offset` — The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md).
+- `length` — The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional.

 **Returned value**

@ -964,9 +1021,9 @@ Alias: `SUBSTRING_INDEX`

 **Arguments**

- s: The string to extract substring from. [String](../data-types/string.md).
- delim: The character to split. [String](../data-types/string.md).
- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
+- s — The string to extract substring from. [String](../data-types/string.md).
+- delim — The character to split. [String](../data-types/string.md).
+- count — The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)

 **Example**

@ -995,9 +1052,9 @@ substringIndexUTF8(s, delim, count)

 **Arguments**

- `s`: The string to extract substring from. [String](../data-types/string.md).
- `delim`: The character to split. [String](../data-types/string.md).
- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
+- `s` — The string to extract substring from. [String](../data-types/string.md).
+- `delim` — The character to split. [String](../data-types/string.md).
+- `count` — The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)

 **Returned value**

@ -1277,7 +1334,7 @@ tryBase64Decode(encoded)

 **Arguments**

- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string.
+- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value, returns an empty string.

 **Returned value**

@ -1309,7 +1366,7 @@ tryBase64URLDecode(encodedUrl)

 **Parameters**

- `encodedURL`: [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.
+- `encodedURL` — [String](../data-types/string.md) column or constant. If the string is not a valid Base64-encoded value with URL-specific modifications, returns an empty string.

 **Returned value**

@ -1555,7 +1612,7 @@ The result type is UInt64.

 ## normalizeQuery

-Replaces literals, sequences of literals and complex aliases with placeholders.
+Replaces literals, sequences of literals and complex aliases (containing whitespace, more than two digits or at least 36 bytes long such as UUIDs) with placeholder `?`.

 **Syntax**

@ -1573,6 +1630,8 @@ normalizeQuery(x)

 **Example**

+Query:
+
 ``` sql
 SELECT normalizeQuery('[1, 2, 3, x]') AS query;
 ```
@ -1585,9 +1644,44 @@ Result:
 └──────────┘
 ```

+## normalizeQueryKeepNames
+
+Replaces literals, sequences of literals with placeholder `?` but does not replace complex aliases (containing whitespace, more than two digits
+or at least 36 bytes long such as UUIDs). This helps better analyze complex query logs.
+
+**Syntax**
+
+``` sql
+normalizeQueryKeepNames(x)
+```
+
+**Arguments**
+
+- `x` — Sequence of characters. [String](../data-types/string.md).
+
+**Returned value**
+
+- Sequence of characters with placeholders. [String](../data-types/string.md).
+
+**Example**
+
+Query:
+
+``` sql
+SELECT normalizeQuery('SELECT 1 AS aComplexName123'), normalizeQueryKeepNames('SELECT 1 AS aComplexName123');
+```
+
+Result:
+
+```result
+┌─normalizeQuery('SELECT 1 AS aComplexName123')─┬─normalizeQueryKeepNames('SELECT 1 AS aComplexName123')─┐
+│ SELECT ? AS `?`                               │ SELECT ? AS aComplexName123                            │
+└───────────────────────────────────────────────┴────────────────────────────────────────────────────────┘
+```
+
 ## normalizedQueryHash

-Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query log.
+Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query logs.

 **Syntax**

@ -1605,6 +1699,8 @@ normalizedQueryHash(x)

 **Example**

+Query:
+
 ``` sql
 SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res;
 ```
@ -1617,6 +1713,43 @@ Result:
 └─────┘
 ```

+## normalizedQueryHashKeepNames
+
+Like [normalizedQueryHash](#normalizedqueryhash) it returns identical 64bit hash values without the values of literals for similar queries but it does not replace complex aliases (containing whitespace, more than two digits
+or at least 36 bytes long such as UUIDs) with a placeholder before hashing. Can be helpful to analyze query logs.
+
+**Syntax**
+
+``` sql
+normalizedQueryHashKeepNames(x)
+```
+
+**Arguments**
+
+- `x` — Sequence of characters. [String](../data-types/string.md).
+
+**Returned value**
+
+- Hash value. [UInt64](../data-types/int-uint.md#uint-ranges).
+
+**Example**
+
+``` sql
+SELECT normalizedQueryHash('SELECT 1 AS `xyz123`') != normalizedQueryHash('SELECT 1 AS `abc123`') AS normalizedQueryHash;
+SELECT normalizedQueryHashKeepNames('SELECT 1 AS `xyz123`') != normalizedQueryHashKeepNames('SELECT 1 AS `abc123`') AS normalizedQueryHashKeepNames;
+```
+
+Result:
+
+```result
+┌─normalizedQueryHash─┐
+│                   0 │
+└─────────────────────┘
+┌─normalizedQueryHashKeepNames─┐
+│                            1 │
+└──────────────────────────────┘
+```
+
 ## normalizeUTF8NFC

 Converts a string to [NFC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text.
@ -1935,7 +2068,7 @@ soundex(val)

 **Arguments**

- `val` - Input value. [String](../data-types/string.md)
+- `val` — Input value. [String](../data-types/string.md)

 **Returned value**

@ -1968,7 +2101,7 @@ punycodeEncode(val)

 **Arguments**

- `val` - Input value. [String](../data-types/string.md)
+- `val` — Input value. [String](../data-types/string.md)

 **Returned value**

@ -2001,7 +2134,7 @@ punycodeEncode(val)

 **Arguments**

- `val` - Punycode-encoded string. [String](../data-types/string.md)
+- `val` — Punycode-encoded string. [String](../data-types/string.md)

 **Returned value**

@ -2027,7 +2160,7 @@ Like `punycodeDecode` but returns an empty string if no valid Punycode-encoded s

 ## idnaEncode

-Returns the the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
+Returns the ASCII representation (ToASCII algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
 The input string must be UTF-encoded and translatable to an ASCII string, otherwise an exception is thrown.
 Note: No percent decoding or trimming of tabs, spaces or control characters is performed.

@ -2039,7 +2172,7 @@ idnaEncode(val)

 **Arguments**

- `val` - Input value. [String](../data-types/string.md)
+- `val` — Input value. [String](../data-types/string.md)

 **Returned value**

@ -2065,7 +2198,7 @@ Like `idnaEncode` but returns an empty string in case of an error instead of thr

 ## idnaDecode

-Returns the the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
+Returns the Unicode (UTF-8) representation (ToUnicode algorithm) of a domain name according to the [Internationalized Domain Names in Applications](https://en.wikipedia.org/wiki/Internationalized_domain_name#Internationalizing_Domain_Names_in_Applications) (IDNA) mechanism.
 In case of an error (e.g. because the input is invalid), the input string is returned.
 Note that repeated application of `idnaEncode()` and `idnaDecode()` does not necessarily return the original string due to case normalization.

@ -2077,7 +2210,7 @@ idnaDecode(val)

 **Arguments**

- `val` - Input value. [String](../data-types/string.md)
+- `val` — Input value. [String](../data-types/string.md)

 **Returned value**

@ -2121,7 +2254,7 @@ Result:
 └───────────────────────────────────────────┘
 ```

-Alias: mismatches
+Alias: `mismatches`

 ## stringJaccardIndex

@ -2175,7 +2308,7 @@ Result:
 └─────────────────────────────────────┘
 ```

-Alias: levenshteinDistance
+Alias: `levenshteinDistance`

 ## editDistanceUTF8

@ -2201,7 +2334,7 @@ Result:
 └─────────────────────────────────────┘
 ```

-Alias: levenshteinDistanceUTF8
+Alias: `levenshteinDistanceUTF8`

 ## damerauLevenshteinDistance

@ -2279,13 +2412,93 @@ Result:

 Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.

+:::note
+Because `initCap` converts only the first letter of each word to upper case you may observe unexpected behaviour for words containing apostrophes or capital letters. For example:
+
+```sql
+SELECT initCap('mother''s daughter'), initCap('joe McAdam');
+```
+
+will return
+
+```response
+┌─initCap('mother\'s daughter')─┬─initCap('joe McAdam')─┐
+│ Mother'S Daughter             │ Joe Mcadam            │
+└───────────────────────────────┴───────────────────────┘
+```
+
+This is a known behaviour, with no plans currently to fix it.
+:::
+
+**Syntax**
+
+```sql
+initcap(val)
+```
+
+**Arguments**
+
+- `val` — Input value. [String](../data-types/string.md).
+
+**Returned value**
+
+- `val` with the first letter of each word converted to upper case. [String](../data-types/string.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT initcap('building for fast');
+```
+
+Result:
+
+```text
+┌─initcap('building for fast')─┐
+│ Building For Fast            │
+└──────────────────────────────┘
+```
+
 ## initcapUTF8

-Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
-
-Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
+Like [initcap](#initcap), `initcapUTF8` converts the first letter of each word to upper case and the rest to lower case. Assumes that the string contains valid UTF-8 encoded text. 
+If this assumption is violated, no exception is thrown and the result is undefined.

+:::note
+This function does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
 If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
+:::
+
+**Syntax**
+
+```sql
+initcapUTF8(val)
+```
+
+**Arguments**
+
+- `val` — Input value. [String](../data-types/string.md).
+
+**Returned value**
+
+- `val` with the first letter of each word converted to upper case. [String](../data-types/string.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT initcapUTF8('не тормозит');
+```
+
+Result:
+
+```text
+┌─initcapUTF8('не тормозит')─┐
+│ Не Тормозит                │
+└────────────────────────────┘
+```

 ## firstLine

@ -2299,7 +2512,7 @@ firstLine(val)

 **Arguments**

- `val` - Input value. [String](../data-types/string.md)
+- `val` — Input value. [String](../data-types/string.md)

 **Returned value**

--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@ -34,7 +34,7 @@ Alias: `replace`.

 Replaces the first occurrence of the substring matching the regular expression `pattern` (in [re2 syntax](https://github.com/google/re2/wiki/Syntax)) in `haystack` by the `replacement` string.

-`replacement` can containing substitutions `\0-\9`.
+`replacement` can contain substitutions `\0-\9`.
 Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.

 To use a verbatim `\` character in the `pattern` or `replacement` strings, escape it using `\`.
--- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
@ -33,7 +33,7 @@ public:
            return;

        auto * function_node = node->as<FunctionNode>();
-        if (!function_node || !function_node->isAggregateFunction())
+        if (!function_node || !function_node->isAggregateFunction() || !function_node->getResultType()->equals(DataTypeUInt64()))
            return;

        auto function_name = function_node->getFunctionName();
--- a/src/Client/HedgedConnections.cpp
+++ b/src/Client/HedgedConnections.cpp
@ -187,9 +187,9 @@ void HedgedConnections::sendQuery(
            modified_settings.group_by_two_level_threshold_bytes = 0;
        }

-        const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0;
+        const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas();

-        if (offset_states.size() > 1 && enable_sample_offset_parallel_processing)
+        if (offset_states.size() > 1 && enable_offset_parallel_processing)
        {
            modified_settings.parallel_replicas_count = offset_states.size();
            modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset;
@ -201,7 +201,8 @@ void HedgedConnections::sendQuery(
        /// all servers involved in the distributed query processing.
        modified_settings.set("allow_experimental_analyzer", static_cast<bool>(modified_settings.allow_experimental_analyzer));

-        replica.connection->sendQuery(timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {});
+        replica.connection->sendQuery(
+            timeouts, query, /* query_parameters */ {}, query_id, stage, &modified_settings, &client_info, with_pending_data, {});
        replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout);
        replica.packet_receiver->setTimeout(hedged_connections_factory.getConnectionTimeouts().receive_timeout);
    };
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@ -2,6 +2,7 @@

 #include <Common/thread_local_rng.h>
 #include <Core/Protocol.h>
+#include <Interpreters/Context.h>
 #include <IO/ConnectionTimeouts.h>
 #include <IO/Operators.h>
 #include <Interpreters/ClientInfo.h>
@ -23,8 +24,8 @@ namespace ErrorCodes
 }


-MultiplexedConnections::MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler)
-    : settings(settings_)
+MultiplexedConnections::MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler)
+    : context(std::move(context_)), settings(context->getSettingsRef())
 {
    connection.setThrottler(throttler);

@ -36,9 +37,9 @@ MultiplexedConnections::MultiplexedConnections(Connection & connection, const Se
 }


-MultiplexedConnections::MultiplexedConnections(std::shared_ptr<Connection> connection_ptr_, const Settings & settings_, const ThrottlerPtr & throttler)
-    : settings(settings_)
-    , connection_ptr(connection_ptr_)
+MultiplexedConnections::MultiplexedConnections(
+    std::shared_ptr<Connection> connection_ptr_, ContextPtr context_, const ThrottlerPtr & throttler)
+    : context(std::move(context_)), settings(context->getSettingsRef()), connection_ptr(connection_ptr_)
 {
    connection_ptr->setThrottler(throttler);

@ -50,9 +51,8 @@ MultiplexedConnections::MultiplexedConnections(std::shared_ptr<Connection> conne
 }

 MultiplexedConnections::MultiplexedConnections(
-        std::vector<IConnectionPool::Entry> && connections,
-        const Settings & settings_, const ThrottlerPtr & throttler)
-    : settings(settings_)
+    std::vector<IConnectionPool::Entry> && connections, ContextPtr context_, const ThrottlerPtr & throttler)
+    : context(std::move(context_)), settings(context->getSettingsRef())
 {
    /// If we didn't get any connections from pool and getMany() did not throw exceptions, this means that
    /// `skip_unavailable_shards` was set. Then just return.
@ -156,18 +156,18 @@ void MultiplexedConnections::sendQuery(
    /// all servers involved in the distributed query processing.
    modified_settings.set("allow_experimental_analyzer", static_cast<bool>(modified_settings.allow_experimental_analyzer));

-    const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0;
+    const bool enable_offset_parallel_processing = context->canUseOffsetParallelReplicas();

    size_t num_replicas = replica_states.size();
    if (num_replicas > 1)
    {
-        if (enable_sample_offset_parallel_processing)
+        if (enable_offset_parallel_processing)
            /// Use multiple replicas for parallel query processing.
            modified_settings.parallel_replicas_count = num_replicas;

        for (size_t i = 0; i < num_replicas; ++i)
        {
-            if (enable_sample_offset_parallel_processing)
+            if (enable_offset_parallel_processing)
                modified_settings.parallel_replica_offset = i;

            replica_states[i].connection->sendQuery(
--- a/src/Client/MultiplexedConnections.h
+++ b/src/Client/MultiplexedConnections.h
@ -10,7 +10,6 @@
 namespace DB
 {

-
 /** To retrieve data directly from multiple replicas (connections) from one shard
  * within a single thread. As a degenerate case, it can also work with one connection.
  * It is assumed that all functions except sendCancel are always executed in one thread.
@ -21,14 +20,12 @@ class MultiplexedConnections final : public IConnections
 {
 public:
    /// Accepts ready connection.
-    MultiplexedConnections(Connection & connection, const Settings & settings_, const ThrottlerPtr & throttler_);
+    MultiplexedConnections(Connection & connection, ContextPtr context_, const ThrottlerPtr & throttler_);
    /// Accepts ready connection and keep it alive before drain
-    MultiplexedConnections(std::shared_ptr<Connection> connection_, const Settings & settings_, const ThrottlerPtr & throttler_);
+    MultiplexedConnections(std::shared_ptr<Connection> connection_, ContextPtr context_, const ThrottlerPtr & throttler_);

    /// Accepts a vector of connections to replicas of one shard already taken from pool.
-    MultiplexedConnections(
-        std::vector<IConnectionPool::Entry> && connections,
-        const Settings & settings_, const ThrottlerPtr & throttler_);
+    MultiplexedConnections(std::vector<IConnectionPool::Entry> && connections, ContextPtr context_, const ThrottlerPtr & throttler_);

    void sendScalarsData(Scalars & data) override;
    void sendExternalTablesData(std::vector<ExternalTablesData> & data) override;
@ -86,6 +83,7 @@ private:
    /// Mark the replica as invalid.
    void invalidateReplica(ReplicaState & replica_state);

+    ContextPtr context;
    const Settings & settings;

    /// The current number of valid connections to the replicas of this shard.
--- a/src/Common/examples/CMakeLists.txt
+++ b/src/Common/examples/CMakeLists.txt
@ -31,8 +31,10 @@ target_link_libraries (arena_with_free_lists PRIVATE dbms)
 clickhouse_add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
 target_link_libraries (lru_hash_map_perf PRIVATE dbms)

-clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp)
-target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
+if (OS_LINUX)
+    clickhouse_add_executable (thread_creation_latency thread_creation_latency.cpp)
+    target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
+endif()

 clickhouse_add_executable (array_cache array_cache.cpp)
 target_link_libraries (array_cache PRIVATE clickhouse_common_io)
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -167,9 +167,6 @@ class IColumn;
    M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
    M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \
    \
-    M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \
-    M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \
-    \
    M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
    M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
    M(Bool, alter_move_to_space_execute_async, false, "Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously", 0) \
@ -205,21 +202,6 @@ class IColumn;
    M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \
    \
    M(NonZeroUInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled. Should be always greater than 0", 0) \
-    M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \
-    M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \
-    M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \
-    M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \
-    M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \
-    M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \
-    \
-    M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \
-    M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \
-    M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \
-    M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
-    M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
-    M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
-    M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \
-    M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
    \
    M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards. Shard is marked as unavailable when: 1) The shard cannot be reached due to a connection failure. 2) Shard is unresolvable through DNS. 3) Table does not exist on the shard.", 0) \
    \
@ -251,8 +233,6 @@ class IColumn;
    M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \
    M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \
    M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \
-    M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
-    M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \
    \
    M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
    M(Bool, mysql_map_string_to_text_in_show_columns, true, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Has an effect only when the connection is made through the MySQL wire protocol.", 0) \
@ -341,7 +321,6 @@ class IColumn;
    M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0)    \
    \
    M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
-    M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \
    \
    M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \
    M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \
@ -392,7 +371,6 @@ class IColumn;
    M(Bool, empty_result_for_aggregation_by_constant_keys_on_empty_set, true, "Return empty result when aggregating by constant keys on empty set.", 0) \
    M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \
    M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \
-    M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \
    M(Bool, enable_deflate_qpl_codec, false, "Enable/disable the DEFLATE_QPL codec.", 0) \
    M(Bool, enable_zstd_qat_codec, false, "Enable/disable the ZSTD_QAT codec.", 0) \
    M(UInt64, query_profiler_real_time_period_ns, QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS, "Period for real clock timer of query profiler (in nanoseconds). Set 0 value to turn off the real clock query profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
@ -402,8 +380,7 @@ class IColumn;
    M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \
    M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \
    M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \
-    M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer.", IMPORTANT) \
-    M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \
+    \
    M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \
    M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \
    \
@ -553,6 +530,7 @@ class IColumn;
    M(Bool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \
    M(Bool, optimize_read_in_window_order, true, "Enable ORDER BY optimization in window clause for reading data in corresponding order in MergeTree tables.", 0) \
    M(Bool, optimize_aggregation_in_order, false, "Enable GROUP BY optimization for aggregating data in corresponding order in MergeTree tables.", 0) \
+    M(Bool, read_in_order_use_buffering, true, "Use buffering before merging while reading in order of primary key. It increases the parallelism of query execution", 0) \
    M(UInt64, aggregation_in_order_max_block_bytes, 50000000, "Maximal size of block in bytes accumulated during aggregation in order of primary key. Lower block size allows to parallelize more final merge stage of aggregation.", 0) \
    M(UInt64, read_in_order_two_level_merge_threshold, 100, "Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key.", 0) \
    M(Bool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \
@ -593,13 +571,6 @@ class IColumn;
    M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \
    M(UInt64, distributed_replica_max_ignored_errors, 0, "Number of errors that will be ignored while choosing replicas", 0) \
    \
-    M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \
-    M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \
-    M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \
-    M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \
-    M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \
-    M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \
-    M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \
    M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \
    \
    M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \
@ -639,8 +610,6 @@ class IColumn;
    M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \
    M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
    M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \
-    M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
-    M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW <name> afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \
    M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \
    M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \
    M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \
@ -657,8 +626,6 @@ class IColumn;
    M(Bool, cast_keep_nullable, false, "CAST operator keep Nullable for result data type", 0) \
    M(Bool, cast_ipv4_ipv6_default_on_conversion_error, false, "CAST operator into IPv4, CAST operator into IPV6 type, toIPv4, toIPv6 functions will return default value instead of throwing exception on conversion error.", 0) \
    M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \
-    M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \
-    M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \
    M(Bool, system_events_show_zero_values, false, "When querying system.events or system.metrics tables, include all metrics, even with zero values.", 0) \
    M(MySQLDataTypesSupport, mysql_datatypes_support_level, MySQLDataTypesSupportList{}, "Defines how MySQL types are converted to corresponding ClickHouse types. A comma separated list in any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. decimal: convert NUMERIC and DECIMAL types to Decimal when precision allows it. datetime64: convert DATETIME and TIMESTAMP types to DateTime64 instead of DateTime when precision is not 0. date2Date32: convert DATE to Date32 instead of Date. Takes precedence over date2String. date2String: convert DATE to String instead of Date. Overridden by datetime64.", 0) \
    M(Bool, optimize_trivial_insert_select, false, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \
@ -722,9 +689,6 @@ class IColumn;
    M(Bool, force_aggregate_partitions_independently, false, "Force the use of optimization when it is applicable, but heuristics decided not to use it", 0) \
    M(UInt64, max_number_of_partitions_for_independent_aggregation, 128, "Maximal number of partitions in table to apply optimization", 0) \
    M(Float, min_hit_rate_to_use_consecutive_keys_optimization, 0.5, "Minimal hit rate of a cache which is used for consecutive keys optimization in aggregation to keep it enabled", 0) \
-    /** Experimental feature for moving data between shards. */ \
-    \
-    M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \
    \
    M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \
    M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \
@ -906,34 +870,11 @@ class IColumn;
    M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \
    M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \
    M(Bool, enable_blob_storage_log, true, "Write information about blob storage operations to system.blob_storage_log table", 0) \
-    \
-    /** Experimental functions */ \
-    M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
-    M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
-    M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
-    M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
-    M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
-    M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
-    M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \
-    M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
-    M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
-    M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
-    M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \
-    M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \
-    M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
-    M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
-    M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
-    M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
-    M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
-    M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
-    M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
-    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \
-    M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
    M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \
    M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \
    M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \
    M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \
-    M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0)\
+    M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \
    M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \
    M(SQLSecurityType, default_materialized_view_sql_security, SQLSecurityType::DEFINER, "Allows to set a default value for SQL SECURITY option when creating a materialized view.", 0) \
    M(String, default_view_definer, "CURRENT_USER", "Allows to set a default value for DEFINER option when creating view.", 0) \
@ -943,6 +884,81 @@ class IColumn;
    M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \
    M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \
    M(Bool, allow_deprecated_snowflake_conversion_functions, false, "Enables deprecated functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \
+    M(Bool, optimize_distinct_in_order, true, "Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement", 0) \
+    M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
+    M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \
+    \
+    \
+    /* ###################################### */ \
+    /* ######## EXPERIMENTAL FEATURES ####### */ \
+    /* ###################################### */ \
+    M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
+    M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
+    M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
+    M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
+    M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
+    M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
+    M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \
+    M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
+    M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
+    M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \
+    M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
+    M(UInt64, max_threads_for_annoy_index_creation, 4, "Number of threads used to build Annoy indexes (0 means all cores, not recommended)", 0) \
+    M(Int64, annoy_index_search_k_nodes, -1, "SELECT queries search up to this many nodes in Annoy indexes.", 0) \
+    M(Bool, throw_on_unsupported_query_inside_transaction, true, "Throw exception if unsupported query is used inside transaction", 0) \
+    M(TransactionsWaitCSNMode, wait_changes_become_visible_after_commit_mode, TransactionsWaitCSNMode::WAIT_UNKNOWN, "Wait for committed changes to become actually visible in the latest snapshot", 0) \
+    M(Bool, implicit_transaction, false, "If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)", 0) \
+    M(UInt64, grace_hash_join_initial_buckets, 1, "Initial number of grace hash join buckets", 0) \
+    M(UInt64, grace_hash_join_max_buckets, 1024, "Limit on the number of grace hash join buckets", 0) \
+    M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
+    \
+    M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \
+    M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \
+    \
+    /* Parallel replicas */ \
+    M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \
+    M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \
+    M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \
+    M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \
+    M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \
+    M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \
+    M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \
+    M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \
+    M(Bool, parallel_replicas_allow_in_with_subquery, true, "If true, subquery for IN will be executed on every follower replica.", 0) \
+    M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \
+    M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
+    M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
+    M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \
+    M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
+    \
+    M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
+    M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \
+    \
+    M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \
+    \
+    /* Analyzer: It's not experimental anymore (WIP) */ \
+    M(Bool, allow_experimental_analyzer, true, "Allow new query analyzer.", IMPORTANT) \
+    M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \
+    \
+    M(Bool, allow_experimental_live_view, false, "Enable LIVE VIEW. Not mature enough.", 0) \
+    M(Seconds, live_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate live query is alive.", 0) \
+    M(UInt64, max_live_view_insert_blocks_before_refresh, 64, "Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.", 0) \
+    \
+    M(Bool, allow_experimental_window_view, false, "Enable WINDOW VIEW. Not mature enough.", 0) \
+    M(Seconds, window_view_clean_interval, 60, "The clean interval of window view in seconds to free outdated data.", 0) \
+    M(Seconds, window_view_heartbeat_interval, 15, "The heartbeat interval in seconds to indicate watch query is alive.", 0) \
+    M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \
+    \
+    M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW <name> REFRESH ...).", 0) \
+    M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW <name> afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \
+    \
+    M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \
+    M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \
+    \
+    /** Experimental feature for moving data between shards. */ \
+    M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \
+
+    /** End of experimental features */

 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS.
@ -1050,6 +1066,7 @@ class IColumn;
    M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \
    M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \
    M(Bool, input_format_orc_filter_push_down, true, "When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.", 0) \
+    M(Bool, input_format_orc_read_use_writer_time_zone, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT.", 0) \
    M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \
    M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \
    M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@ -58,11 +58,13 @@ String ClickHouseVersion::toString() const
 static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory::SettingsChanges>> settings_changes_history_initializer =
 {
    {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."},
+              {"read_in_order_use_buffering", false, true, "Use buffering before merging while reading in order of primary key"},
              {"optimize_functions_to_subcolumns", false, true, "Enable optimization by default"},
              {"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
              {"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
              {"collect_hash_table_stats_during_joins", false, true, "New setting."},
              {"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."},
+              {"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."},
              {"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
              {"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
              {"query_plan_merge_filters", false, false, "Allow to merge filters in the query plan"},
--- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
+++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
@ -716,6 +716,16 @@ static void writeFieldsToColumn(

                null_map_column->insertValue(0);
            }
+            else
+            {
+                // Column is not null but field is null. It's possible due to overrides
+                if (field.isNull())
+                {
+                    column_to.insertDefault();
+                    return false;
+                }
+            }
+

            return true;
        };
@ -791,7 +801,7 @@ static void writeFieldsToColumn(

                if (write_data_to_null_map(value, index))
                {
-                    const String & data = value.get<const String &>();
+                    const String & data = value.safeGet<const String &>();
                    casted_string_column->insertData(data.data(), data.size());
                }
            }
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -243,6 +243,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
    format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride;
    format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder;
    format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down;
+    format_settings.orc.read_use_writer_time_zone = settings.input_format_orc_read_use_writer_time_zone;
    format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
    format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
    format_settings.capn_proto.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference;
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -403,6 +403,7 @@ struct FormatSettings
        bool use_fast_decoder = true;
        bool filter_push_down = true;
        UInt64 output_row_index_stride = 10'000;
+        bool read_use_writer_time_zone = false;
    } orc{};

    /// For capnProto format we should determine how to
--- a/src/Functions/ReplaceRegexpImpl.h
+++ b/src/Functions/ReplaceRegexpImpl.h
@ -1,9 +1,12 @@
 #pragma once

-#include <base/types.h>
 #include <Columns/ColumnString.h>
+#include <Common/OptimizedRegularExpression.h>
 #include <Common/re2.h>
+#include <Functions/Regexps.h>
+#include <Functions/ReplaceStringImpl.h>
 #include <IO/WriteHelpers.h>
+#include <base/types.h>

 namespace DB
 {
@ -48,45 +51,75 @@ struct ReplaceRegexpImpl

    static constexpr int max_captures = 10;

-    static Instructions createInstructions(std::string_view replacement, int num_captures)
+    /// The replacement string references must not contain non-existing capturing groups.
+    static void checkSubstitutions(std::string_view replacement, int num_captures)
    {
-        Instructions instructions;
-
-        String literals;
        for (size_t i = 0; i < replacement.size(); ++i)
        {
            if (replacement[i] == '\\' && i + 1 < replacement.size())
            {
-                if (isNumericASCII(replacement[i + 1])) /// Substitution
+                if (isNumericASCII(replacement[i + 1])) /// substitution
+                {
+                    int substitution_num = replacement[i + 1] - '0';
+                    if (substitution_num >= num_captures)
+                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Substitution '\\{}' in replacement argument is invalid, regexp has only {} capturing groups", substitution_num, num_captures - 1);
+                }
+            }
+        }
+    }
+
+    static Instructions createInstructions(std::string_view replacement, int num_captures)
+    {
+        checkSubstitutions(replacement, num_captures);
+
+        Instructions instructions;
+
+        String literals;
+        literals.reserve(replacement.size());
+
+        for (size_t i = 0; i < replacement.size(); ++i)
+        {
+            if (replacement[i] == '\\' && i + 1 < replacement.size())
+            {
+                if (isNumericASCII(replacement[i + 1])) /// substitution
                {
                    if (!literals.empty())
                    {
                        instructions.emplace_back(literals);
                        literals = "";
                    }
-                    instructions.emplace_back(replacement[i + 1] - '0');
+                    int substitution_num = replacement[i + 1] - '0';
+                    instructions.emplace_back(substitution_num);
                }
                else
-                    literals += replacement[i + 1]; /// Escaping
+                    literals += replacement[i + 1]; /// escaping
                ++i;
            }
            else
-                literals += replacement[i]; /// Plain character
+                literals += replacement[i]; /// plain character
        }

        if (!literals.empty())
            instructions.emplace_back(literals);

-        for (const auto & instr : instructions)
-            if (instr.substitution_num >= num_captures)
-                throw Exception(
-                    ErrorCodes::BAD_ARGUMENTS,
-                    "Id {} in replacement string is an invalid substitution, regexp has only {} capturing groups",
-                    instr.substitution_num, num_captures - 1);
-
        return instructions;
    }

+    static bool canFallbackToStringReplacement(const String & needle, const String & replacement, const re2::RE2 & searcher, int num_captures)
+    {
+        if (searcher.NumberOfCapturingGroups())
+            return false;
+
+        checkSubstitutions(replacement, num_captures);
+
+        String required_substring;
+        bool is_trivial;
+        bool required_substring_is_prefix;
+        std::vector<String> alternatives;
+        OptimizedRegularExpression::analyze(needle, required_substring, is_trivial, required_substring_is_prefix, alternatives);
+        return is_trivial && required_substring_is_prefix && required_substring == needle;
+    }
+
    static void processString(
        const char * haystack_data,
        size_t haystack_length,
@ -124,7 +157,7 @@ struct ReplaceRegexpImpl
                {
                    std::string_view replacement;
                    if (instr.substitution_num >= 0)
-                        replacement = std::string_view(matches[instr.substitution_num].data(), matches[instr.substitution_num].size());
+                        replacement = {matches[instr.substitution_num].data(), matches[instr.substitution_num].size()};
                    else
                        replacement = instr.literal;
                    res_data.resize(res_data.size() + replacement.size());
@ -179,19 +212,32 @@ struct ReplaceRegexpImpl
        res_offsets.resize(haystack_size);

        re2::RE2::Options regexp_options;
-        /// Don't write error messages to stderr.
-        regexp_options.set_log_errors(false);
+        regexp_options.set_log_errors(false); /// don't write error messages to stderr

        re2::RE2 searcher(needle, regexp_options);
-
        if (!searcher.ok())
            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());

        int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);

+        /// Try to use non-regexp string replacement. This shortcut is implemented only for const-needles + const-replacement as
+        /// pattern analysis incurs some cost too.
+        if (canFallbackToStringReplacement(needle, replacement, searcher, num_captures))
+        {
+            auto convertTrait = [](ReplaceRegexpTraits::Replace first_or_all)
+            {
+                switch (first_or_all)
+                {
+                    case ReplaceRegexpTraits::Replace::First: return ReplaceStringTraits::Replace::First;
+                    case ReplaceRegexpTraits::Replace::All:   return ReplaceStringTraits::Replace::All;
+                }
+            };
+            ReplaceStringImpl<Name, convertTrait(replace)>::vectorConstantConstant(haystack_data, haystack_offsets, needle, replacement, res_data, res_offsets);
+            return;
+        }
+
        Instructions instructions = createInstructions(replacement, num_captures);

-        /// Cannot perform search for whole columns. Will process each string separately.
        for (size_t i = 0; i < haystack_size; ++i)
        {
            size_t from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -221,10 +267,8 @@ struct ReplaceRegexpImpl
        res_offsets.resize(haystack_size);

        re2::RE2::Options regexp_options;
-        /// Don't write error messages to stderr.
-        regexp_options.set_log_errors(false);
+        regexp_options.set_log_errors(false); /// don't write error messages to stderr

-        /// Cannot perform search for whole columns. Will process each string separately.
        for (size_t i = 0; i < haystack_size; ++i)
        {
            size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -242,6 +286,7 @@ struct ReplaceRegexpImpl
            re2::RE2 searcher(needle, regexp_options);
            if (!searcher.ok())
                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
+
            int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
            Instructions instructions = createInstructions(replacement, num_captures);

@ -270,17 +315,14 @@ struct ReplaceRegexpImpl
        res_offsets.resize(haystack_size);

        re2::RE2::Options regexp_options;
-        /// Don't write error messages to stderr.
-        regexp_options.set_log_errors(false);
+        regexp_options.set_log_errors(false); /// don't write error messages to stderr

        re2::RE2 searcher(needle, regexp_options);
-
        if (!searcher.ok())
            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());

        int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);

-        /// Cannot perform search for whole columns. Will process each string separately.
        for (size_t i = 0; i < haystack_size; ++i)
        {
            size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -290,8 +332,9 @@ struct ReplaceRegexpImpl
            size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
            const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
            const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
+            std::string_view replacement(repl_data, repl_length);

-            Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
+            Instructions instructions = createInstructions(replacement, num_captures);

            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
            res_offsets[i] = res_offset;
@ -317,10 +360,8 @@ struct ReplaceRegexpImpl
        res_offsets.resize(haystack_size);

        re2::RE2::Options regexp_options;
-        /// Don't write error messages to stderr.
-        regexp_options.set_log_errors(false);
+        regexp_options.set_log_errors(false); /// don't write error messages to stderr

-        /// Cannot perform search for whole columns. Will process each string separately.
        for (size_t i = 0; i < haystack_size; ++i)
        {
            size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -338,12 +379,14 @@ struct ReplaceRegexpImpl
            size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
            const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
            const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
+            std::string_view replacement(repl_data, repl_length);

            re2::RE2 searcher(needle, regexp_options);
            if (!searcher.ok())
                throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
+
            int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
-            Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
+            Instructions instructions = createInstructions(replacement, num_captures);

            processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
            res_offsets[i] = res_offset;
@ -367,16 +410,13 @@ struct ReplaceRegexpImpl
        res_offsets.resize(haystack_size);

        re2::RE2::Options regexp_options;
-        /// Don't write error messages to stderr.
-        regexp_options.set_log_errors(false);
+        regexp_options.set_log_errors(false); /// don't write error messages to stderr

        re2::RE2 searcher(needle, regexp_options);
-
        if (!searcher.ok())
            throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());

        int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
-
        Instructions instructions = createInstructions(replacement, num_captures);

        for (size_t i = 0; i < haystack_size; ++i)
--- a/src/IO/ReadWriteBufferFromHTTP.cpp
+++ b/src/IO/ReadWriteBufferFromHTTP.cpp
@ -713,8 +713,12 @@ ReadWriteBufferFromHTTP::HTTPFileInfo ReadWriteBufferFromHTTP::getFileInfo()
        /// fall back to slow whole-file reads when HEAD is actually supported; that sounds
        /// like a nightmare to debug.)
        if (e.getHTTPStatus() >= 400 && e.getHTTPStatus() <= 499 &&
-            e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS)
+            e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_TOO_MANY_REQUESTS &&
+            e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_REQUEST_TIMEOUT &&
+            e.getHTTPStatus() != Poco::Net::HTTPResponse::HTTP_MISDIRECTED_REQUEST)
+        {
            return HTTPFileInfo{};
+        }

        throw;
    }
--- a/src/IO/examples/CMakeLists.txt
+++ b/src/IO/examples/CMakeLists.txt
@ -59,10 +59,10 @@ clickhouse_add_executable (parse_date_time_best_effort parse_date_time_best_effo
 target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io)

 clickhouse_add_executable (zlib_ng_bug zlib_ng_bug.cpp)
-target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib)
+target_link_libraries (zlib_ng_bug PRIVATE ch_contrib::zlib clickhouse_common_io)

 clickhouse_add_executable (dragonbox_test dragonbox_test.cpp)
-target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars)
+target_link_libraries (dragonbox_test PRIVATE ch_contrib::dragonbox_to_chars clickhouse_common_io)

 clickhouse_add_executable (zstd_buffers zstd_buffers.cpp)
 target_link_libraries (zstd_buffers PRIVATE clickhouse_common_io)
--- a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp
+++ b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp
@ -29,33 +29,12 @@ BlockIO InterpreterSetRoleQuery::execute()

 void InterpreterSetRoleQuery::setRole(const ASTSetRoleQuery & query)
 {
-    auto & access_control = getContext()->getAccessControl();
    auto session_context = getContext()->getSessionContext();
-    auto user = session_context->getUser();

    if (query.kind == ASTSetRoleQuery::Kind::SET_ROLE_DEFAULT)
-    {
        session_context->setCurrentRolesDefault();
-    }
    else
-    {
-        RolesOrUsersSet roles_from_query{*query.roles, access_control};
-        std::vector<UUID> new_current_roles;
-        if (roles_from_query.all)
-        {
-            new_current_roles = user->granted_roles.findGranted(roles_from_query);
-        }
-        else
-        {
-            for (const auto & id : roles_from_query.getMatchingIDs())
-            {
-                if (!user->granted_roles.isGranted(id))
-                    throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role should be granted to set current");
-                new_current_roles.emplace_back(id);
-            }
-        }
-        session_context->setCurrentRoles(new_current_roles);
-    }
+        session_context->setCurrentRoles(RolesOrUsersSet{*query.roles, session_context->getAccessControl()});
 }


--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@ -726,7 +726,10 @@ try

    /// Access rights must be checked for the user who executed the initial INSERT query.
    if (key.user_id)
-        insert_context->setUser(*key.user_id, key.current_roles);
+    {
+        insert_context->setUser(*key.user_id);
+        insert_context->setCurrentRoles(key.current_roles);
+    }

    insert_context->setSettings(key.settings);

--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@ -8,23 +8,28 @@
 #include <Interpreters/ClusterProxy/executeQuery.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/IInterpreter.h>
+#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
 #include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
-#include <Parsers/queryToString.h>
-#include <Parsers/ASTFunction.h>
 #include <Interpreters/ProcessList.h>
+#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/queryToString.h>
+#include <Planner/Utils.h>
+#include <Processors/QueryPlan/DistributedCreateLocalPlan.h>
 #include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/ReadFromPreparedSource.h>
 #include <Processors/QueryPlan/ReadFromRemote.h>
 #include <Processors/QueryPlan/UnionStep.h>
-#include <Processors/QueryPlan/DistributedCreateLocalPlan.h>
 #include <Processors/ResizeProcessor.h>
+#include <Processors/Sources/NullSource.h>
 #include <QueryPipeline/Pipe.h>
+#include <Storages/Distributed/DistributedSettings.h>
 #include <Storages/MergeTree/ParallelReplicasReadingCoordinator.h>
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/StorageReplicatedMergeTree.h>
-#include <Storages/Distributed/DistributedSettings.h>
+#include <Storages/StorageSnapshot.h>
 #include <Storages/buildQueryTreeForShard.h>
-#include <Planner/Utils.h>
-#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
+#include <Storages/getStructureOfRemoteTable.h>

 namespace DB
 {
@ -33,7 +38,6 @@ namespace ErrorCodes
 {
    extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
    extern const int LOGICAL_ERROR;
-    extern const int CLUSTER_DOESNT_EXIST;
    extern const int UNEXPECTED_CLUSTER;
 }

@ -172,7 +176,7 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster,

    /// in case of parallel replicas custom key use round robing load balancing
    /// so custom key partitions will be spread over nodes in round-robin fashion
-    if (context->canUseParallelReplicasCustomKey(cluster) && !settings.load_balancing.changed)
+    if (context->canUseParallelReplicasCustomKeyForCluster(cluster) && !settings.load_balancing.changed)
    {
        new_settings.load_balancing = LoadBalancing::ROUND_ROBIN;
    }
@ -180,6 +184,10 @@ ContextMutablePtr updateSettingsAndClientInfoForCluster(const Cluster & cluster,
    auto new_context = Context::createCopy(context);
    new_context->setSettings(new_settings);
    new_context->setClientInfo(new_client_info);
+
+    if (context->canUseParallelReplicasCustomKeyForCluster(cluster))
+        new_context->disableOffsetParallelReplicas();
+
    return new_context;
 }

@ -220,6 +228,35 @@ static ThrottlerPtr getThrottler(const ContextPtr & context)
    return throttler;
 }

+AdditionalShardFilterGenerator
+getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns)
+{
+    if (!context->canUseParallelReplicasCustomKeyForCluster(cluster))
+        return {};
+
+    const auto & settings = context->getSettingsRef();
+    auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *context);
+    if (custom_key_ast == nullptr)
+        return {};
+
+    return [my_custom_key_ast = std::move(custom_key_ast),
+            column_description = columns,
+            custom_key_type = settings.parallel_replicas_custom_key_filter_type.value,
+            custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value,
+            custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value,
+            query_context = context,
+            replica_count = cluster.getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr
+    {
+        return getCustomKeyFilterForParallelReplica(
+            replica_count,
+            replica_num - 1,
+            my_custom_key_ast,
+            {custom_key_type, custom_key_range_lower, custom_key_range_upper},
+            column_description,
+            query_context);
+    };
+}
+

 void executeQuery(
    QueryPlan & query_plan,
@ -412,14 +449,7 @@ void executeQueryWithParallelReplicas(
    const auto & settings = context->getSettingsRef();

    /// check cluster for parallel replicas
-    if (settings.cluster_for_parallel_replicas.value.empty())
-    {
-        throw Exception(
-            ErrorCodes::CLUSTER_DOESNT_EXIST,
-            "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set "
-            "'cluster_for_parallel_replicas' setting");
-    }
-    auto not_optimized_cluster = context->getCluster(settings.cluster_for_parallel_replicas);
+    auto not_optimized_cluster = context->getClusterForParallelReplicas();

    auto new_context = Context::createCopy(context);

@ -542,6 +572,84 @@ void executeQueryWithParallelReplicas(
    executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits);
 }

+void executeQueryWithParallelReplicasCustomKey(
+    QueryPlan & query_plan,
+    const StorageID & storage_id,
+    const SelectQueryInfo & query_info,
+    const ColumnsDescription & columns,
+    const StorageSnapshotPtr & snapshot,
+    QueryProcessingStage::Enum processed_stage,
+    const Block & header,
+    ContextPtr context)
+{
+    /// Return directly (with correct header) if no shard to query.
+    if (query_info.getCluster()->getShardsInfo().empty())
+    {
+        if (context->getSettingsRef().allow_experimental_analyzer)
+            return;
+
+        Pipe pipe(std::make_shared<NullSource>(header));
+        auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
+        read_from_pipe->setStepDescription("Read from NullSource (Distributed)");
+        query_plan.addStep(std::move(read_from_pipe));
+        return;
+    }
+
+    ColumnsDescriptionByShardNum columns_object;
+    if (hasDynamicSubcolumns(columns))
+        columns_object = getExtendedObjectsOfRemoteTables(*query_info.cluster, storage_id, columns, context);
+
+    ClusterProxy::SelectStreamFactory select_stream_factory
+        = ClusterProxy::SelectStreamFactory(header, columns_object, snapshot, processed_stage);
+
+    auto shard_filter_generator = getShardFilterGeneratorForCustomKey(*query_info.getCluster(), context, columns);
+
+    ClusterProxy::executeQuery(
+        query_plan,
+        header,
+        processed_stage,
+        storage_id,
+        /*table_func_ptr=*/nullptr,
+        select_stream_factory,
+        getLogger("executeQueryWithParallelReplicasCustomKey"),
+        context,
+        query_info,
+        /*sharding_key_expr=*/nullptr,
+        /*sharding_key_column_name=*/{},
+        /*distributed_settings=*/{},
+        shard_filter_generator,
+        /*is_remote_function=*/false);
+}
+
+void executeQueryWithParallelReplicasCustomKey(
+    QueryPlan & query_plan,
+    const StorageID & storage_id,
+    const SelectQueryInfo & query_info,
+    const ColumnsDescription & columns,
+    const StorageSnapshotPtr & snapshot,
+    QueryProcessingStage::Enum processed_stage,
+    const QueryTreeNodePtr & query_tree,
+    ContextPtr context)
+{
+    auto header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_tree, context, SelectQueryOptions(processed_stage).analyze());
+    executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context);
+}
+
+void executeQueryWithParallelReplicasCustomKey(
+    QueryPlan & query_plan,
+    const StorageID & storage_id,
+    SelectQueryInfo query_info,
+    const ColumnsDescription & columns,
+    const StorageSnapshotPtr & snapshot,
+    QueryProcessingStage::Enum processed_stage,
+    const ASTPtr & query_ast,
+    ContextPtr context)
+{
+    auto header = InterpreterSelectQuery(query_ast, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
+    query_info.query = ClusterProxy::rewriteSelectQuery(
+        context, query_info.query, storage_id.getDatabaseName(), storage_id.getTableName(), /*table_function_ptr=*/nullptr);
+    executeQueryWithParallelReplicasCustomKey(query_plan, storage_id, query_info, columns, snapshot, processed_stage, header, context);
+}
 }

 }
--- a/src/Interpreters/ClusterProxy/executeQuery.h
+++ b/src/Interpreters/ClusterProxy/executeQuery.h
@ -1,7 +1,7 @@
 #pragma once

-#include <Interpreters/Context_fwd.h>
 #include <Core/QueryProcessingStage.h>
+#include <Interpreters/Context_fwd.h>
 #include <Parsers/IAST.h>

 namespace DB
@ -13,6 +13,11 @@ class Cluster;
 using ClusterPtr = std::shared_ptr<Cluster>;
 struct SelectQueryInfo;

+class ColumnsDescription;
+struct StorageSnapshot;
+
+using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>;
+
 class Pipe;
 class QueryPlan;

@ -47,6 +52,9 @@ class SelectStreamFactory;
 ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table);

 using AdditionalShardFilterGenerator = std::function<ASTPtr(uint64_t)>;
+AdditionalShardFilterGenerator
+getShardFilterGeneratorForCustomKey(const Cluster & cluster, ContextPtr context, const ColumnsDescription & columns);
+
 /// Execute a distributed query, creating a query plan, from which the query pipeline can be built.
 /// `stream_factory` object encapsulates the logic of creating plans for a different type of query
 /// (currently SELECT, DESCRIBE).
@ -91,6 +99,36 @@ void executeQueryWithParallelReplicas(
    const PlannerContextPtr & planner_context,
    ContextPtr context,
    std::shared_ptr<const StorageLimitsList> storage_limits);
+
+void executeQueryWithParallelReplicasCustomKey(
+    QueryPlan & query_plan,
+    const StorageID & storage_id,
+    const SelectQueryInfo & query_info,
+    const ColumnsDescription & columns,
+    const StorageSnapshotPtr & snapshot,
+    QueryProcessingStage::Enum processed_stage,
+    const Block & header,
+    ContextPtr context);
+
+void executeQueryWithParallelReplicasCustomKey(
+    QueryPlan & query_plan,
+    const StorageID & storage_id,
+    const SelectQueryInfo & query_info,
+    const ColumnsDescription & columns,
+    const StorageSnapshotPtr & snapshot,
+    QueryProcessingStage::Enum processed_stage,
+    const QueryTreeNodePtr & query_tree,
+    ContextPtr context);
+
+void executeQueryWithParallelReplicasCustomKey(
+    QueryPlan & query_plan,
+    const StorageID & storage_id,
+    SelectQueryInfo query_info,
+    const ColumnsDescription & columns,
+    const StorageSnapshotPtr & snapshot,
+    QueryProcessingStage::Enum processed_stage,
+    const ASTPtr & query_ast,
+    ContextPtr context);
 }

 }
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -58,6 +58,7 @@
 #include <Access/EnabledRowPolicies.h>
 #include <Access/QuotaUsage.h>
 #include <Access/User.h>
+#include <Access/Role.h>
 #include <Access/SettingsProfile.h>
 #include <Access/SettingsProfilesInfo.h>
 #include <Access/SettingsConstraintsAndProfileIDs.h>
@ -190,6 +191,7 @@ namespace ErrorCodes
    extern const int ILLEGAL_COLUMN;
    extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
    extern const int CLUSTER_DOESNT_EXIST;
+    extern const int SET_NON_GRANTED_ROLE;
 }

 #define SHUTDOWN(log, desc, ptr, method) do             \
@ -1303,7 +1305,7 @@ ConfigurationPtr Context::getUsersConfig()
    return shared->users_config;
 }

-void Context::setUser(const UUID & user_id_, const std::optional<const std::vector<UUID>> & current_roles_)
+void Context::setUser(const UUID & user_id_)
 {
    /// Prepare lists of user's profiles, constraints, settings, roles.
    /// NOTE: AccessControl::read<User>() and other AccessControl's functions may require some IO work,
@ -1312,8 +1314,8 @@ void Context::setUser(const UUID & user_id_, const std::optional<const std::vect
    auto & access_control = getAccessControl();
    auto user = access_control.read<User>(user_id_);

-    auto new_current_roles = current_roles_ ? user->granted_roles.findGranted(*current_roles_) : user->granted_roles.findGranted(user->default_roles);
-    auto enabled_roles = access_control.getEnabledRolesInfo(new_current_roles, {});
+    auto default_roles = user->granted_roles.findGranted(user->default_roles);
+    auto enabled_roles = access_control.getEnabledRolesInfo(default_roles, {});
    auto enabled_profiles = access_control.getEnabledSettingsInfo(user_id_, user->settings, enabled_roles->enabled_roles, enabled_roles->settings_from_enabled_roles);
    const auto & database = user->default_database;

@ -1327,7 +1329,7 @@ void Context::setUser(const UUID & user_id_, const std::optional<const std::vect
    /// so we shouldn't check constraints here.
    setCurrentProfilesWithLock(*enabled_profiles, /* check_constraints= */ false, lock);

-    setCurrentRolesWithLock(new_current_roles, lock);
+    setCurrentRolesWithLock(default_roles, lock);

    /// It's optional to specify the DEFAULT DATABASE in the user's definition.
    if (!database.empty())
@ -1362,25 +1364,66 @@ std::optional<UUID> Context::getUserID() const
    return user_id;
 }

-void Context::setCurrentRolesWithLock(const std::vector<UUID> & current_roles_, const std::lock_guard<ContextSharedMutex> &)
+void Context::setCurrentRolesWithLock(const std::vector<UUID> & new_current_roles, const std::lock_guard<ContextSharedMutex> &)
 {
-    if (current_roles_.empty())
+    if (new_current_roles.empty())
        current_roles = nullptr;
    else
-        current_roles = std::make_shared<std::vector<UUID>>(current_roles_);
+        current_roles = std::make_shared<std::vector<UUID>>(new_current_roles);
    need_recalculate_access = true;
 }

-void Context::setCurrentRoles(const std::vector<UUID> & current_roles_)
+void Context::setCurrentRolesImpl(const std::vector<UUID> & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr<const User> & user)
 {
-    std::lock_guard lock(mutex);
-    setCurrentRolesWithLock(current_roles_, lock);
+    if (skip_if_not_granted)
+    {
+        auto filtered_role_ids = user->granted_roles.findGranted(new_current_roles);
+        std::lock_guard lock{mutex};
+        setCurrentRolesWithLock(filtered_role_ids, lock);
+        return;
+    }
+    if (throw_if_not_granted)
+    {
+        for (const auto & role_id : new_current_roles)
+        {
+            if (!user->granted_roles.isGranted(role_id))
+            {
+                auto role_name = getAccessControl().tryReadName(role_id);
+                throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role {} should be granted to set as a current", role_name.value_or(toString(role_id)));
+            }
+        }
+    }
+    std::lock_guard lock2{mutex};
+    setCurrentRolesWithLock(new_current_roles, lock2);
+}
+
+void Context::setCurrentRoles(const std::vector<UUID> & new_current_roles, bool check_grants)
+{
+    setCurrentRolesImpl(new_current_roles, /* throw_if_not_granted= */ check_grants, /* skip_if_not_granted= */ !check_grants, getUser());
+}
+
+void Context::setCurrentRoles(const RolesOrUsersSet & new_current_roles, bool check_grants)
+{
+    if (new_current_roles.all)
+    {
+        auto user = getUser();
+        setCurrentRolesImpl(user->granted_roles.findGranted(new_current_roles), /* throw_if_not_granted= */ false, /* skip_if_not_granted= */ false, user);
+    }
+    else
+    {
+        setCurrentRoles(new_current_roles.getMatchingIDs(), check_grants);
+    }
+}
+
+void Context::setCurrentRoles(const Strings & new_current_roles, bool check_grants)
+{
+    setCurrentRoles(getAccessControl().getIDs<Role>(new_current_roles), check_grants);
 }

 void Context::setCurrentRolesDefault()
 {
    auto user = getUser();
-    setCurrentRoles(user->granted_roles.findGranted(user->default_roles));
+    setCurrentRolesImpl(user->granted_roles.findGranted(user->default_roles), /* throw_if_not_granted= */ false, /* skip_if_not_granted= */ false, user);
 }

 std::vector<UUID> Context::getCurrentRoles() const
@ -5475,10 +5518,37 @@ bool Context::canUseParallelReplicasOnFollower() const
    return canUseTaskBasedParallelReplicas() && getClientInfo().collaborate_with_initiator;
 }

-bool Context::canUseParallelReplicasCustomKey(const Cluster & cluster) const
+bool Context::canUseParallelReplicasCustomKey() const
 {
-    return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY
-        && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1;
+    return settings.max_parallel_replicas > 1 && getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY;
+}
+
+bool Context::canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const
+{
+    return canUseParallelReplicasCustomKey() && cluster.getShardCount() == 1 && cluster.getShardsInfo()[0].getAllNodeCount() > 1;
+}
+
+bool Context::canUseOffsetParallelReplicas() const
+{
+    return offset_parallel_replicas_enabled && settings.max_parallel_replicas > 1
+        && getParallelReplicasMode() != Context::ParallelReplicasMode::READ_TASKS;
+}
+
+void Context::disableOffsetParallelReplicas()
+{
+    offset_parallel_replicas_enabled = false;
+}
+
+ClusterPtr Context::getClusterForParallelReplicas() const
+{
+    /// check cluster for parallel replicas
+    if (settings.cluster_for_parallel_replicas.value.empty())
+        throw Exception(
+            ErrorCodes::CLUSTER_DOESNT_EXIST,
+            "Reading in parallel from replicas is enabled but cluster to execute query is not provided. Please set "
+            "'cluster_for_parallel_replicas' setting");
+
+    return getCluster(settings.cluster_for_parallel_replicas);
 }

 void Context::setPreparedSetsCache(const PreparedSetsCachePtr & cache)
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@ -61,6 +61,7 @@ class AccessFlags;
 struct AccessRightsElement;
 class AccessRightsElements;
 enum class RowPolicyFilterType : uint8_t;
+struct RolesOrUsersSet;
 class EmbeddedDictionaries;
 class ExternalDictionariesLoader;
 class ExternalUserDefinedExecutableFunctionsLoader;
@ -150,6 +151,8 @@ class AsyncLoader;
 struct TemporaryTableHolder;
 using TemporaryTablesMapping = std::map<String, std::shared_ptr<TemporaryTableHolder>>;

+using ClusterPtr = std::shared_ptr<Cluster>;
+
 class LoadTask;
 using LoadTaskPtr = std::shared_ptr<LoadTask>;
 using LoadTaskPtrs = std::vector<LoadTaskPtr>;
@ -457,6 +460,11 @@ protected:
    /// mutation tasks of one mutation executed against different parts of the same table.
    PreparedSetsCachePtr prepared_sets_cache;

+    /// this is a mode of parallel replicas where we set parallel_replicas_count and parallel_replicas_offset
+    /// and generate specific filters on the replicas (e.g. when using parallel replicas with sample key)
+    /// if we already use a different mode of parallel replicas we want to disable this mode
+    bool offset_parallel_replicas_enabled = true;
+
 public:
    /// Some counters for current query execution.
    /// Most of them are workarounds and should be removed in the future.
@ -600,13 +608,15 @@ public:

    /// Sets the current user assuming that he/she is already authenticated.
    /// WARNING: This function doesn't check password!
-    void setUser(const UUID & user_id_, const std::optional<const std::vector<UUID>> & current_roles_ = {});
+    void setUser(const UUID & user_id_);
    UserPtr getUser() const;

    std::optional<UUID> getUserID() const;
    String getUserName() const;

-    void setCurrentRoles(const std::vector<UUID> & current_roles_);
+    void setCurrentRoles(const Strings & new_current_roles, bool check_grants = true);
+    void setCurrentRoles(const std::vector<UUID> & new_current_roles, bool check_grants = true);
+    void setCurrentRoles(const RolesOrUsersSet & new_current_roles, bool check_grants = true);
    void setCurrentRolesDefault();
    std::vector<UUID> getCurrentRoles() const;
    std::vector<UUID> getEnabledRoles() const;
@ -1309,7 +1319,13 @@ public:
    bool canUseTaskBasedParallelReplicas() const;
    bool canUseParallelReplicasOnInitiator() const;
    bool canUseParallelReplicasOnFollower() const;
-    bool canUseParallelReplicasCustomKey(const Cluster & cluster) const;
+    bool canUseParallelReplicasCustomKey() const;
+    bool canUseParallelReplicasCustomKeyForCluster(const Cluster & cluster) const;
+    bool canUseOffsetParallelReplicas() const;
+
+    void disableOffsetParallelReplicas();
+
+    ClusterPtr getClusterForParallelReplicas() const;

    enum class ParallelReplicasMode : uint8_t
    {
@ -1334,7 +1350,7 @@ private:

    void setCurrentProfilesWithLock(const SettingsProfilesInfo & profiles_info, bool check_constraints, const std::lock_guard<ContextSharedMutex> & lock);

-    void setCurrentRolesWithLock(const std::vector<UUID> & current_roles_, const std::lock_guard<ContextSharedMutex> & lock);
+    void setCurrentRolesWithLock(const std::vector<UUID> & new_current_roles, const std::lock_guard<ContextSharedMutex> & lock);

    void setSettingWithLock(std::string_view name, const String & value, const std::lock_guard<ContextSharedMutex> & lock);

@ -1367,6 +1383,7 @@ private:
    void initGlobal();

    void setUserID(const UUID & user_id_);
+    void setCurrentRolesImpl(const std::vector<UUID> & new_current_roles, bool throw_if_not_granted, bool skip_if_not_granted, const std::shared_ptr<const User> & user);

    template <typename... Args>
    void checkAccessImpl(const Args &... args) const;
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -566,7 +566,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
            settings.additional_table_filters, joined_tables.tablesWithColumns().front().table, *context);

    ASTPtr parallel_replicas_custom_filter_ast = nullptr;
-    if (storage && context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY && !joined_tables.tablesWithColumns().empty())
+    if (storage && context->canUseParallelReplicasCustomKey() && !joined_tables.tablesWithColumns().empty())
    {
        if (settings.parallel_replicas_count > 1)
        {
@ -587,16 +587,28 @@ InterpreterSelectQuery::InterpreterSelectQuery(
            else if (settings.parallel_replica_offset > 0)
            {
                throw Exception(
-                        ErrorCodes::BAD_ARGUMENTS,
-                        "Parallel replicas processing with custom_key has been requested "
-                        "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it "
-                        "or it's invalid (settings `parallel_replicas_custom_key`)");
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "Parallel replicas processing with custom_key has been requested "
+                    "(setting 'max_parallel_replicas') but the table does not have custom_key defined for it "
+                    "or it's invalid (settings `parallel_replicas_custom_key`)");
            }
        }
+        /// We disable prefer_localhost_replica because if one of the replicas is local it will create a single local plan
+        /// instead of executing the query with multiple replicas
+        /// We can enable this setting again for custom key parallel replicas when we can generate a plan that will use both a
+        /// local plan and remote replicas
        else if (auto * distributed = dynamic_cast<StorageDistributed *>(storage.get());
-                 distributed && context->canUseParallelReplicasCustomKey(*distributed->getCluster()))
+                 distributed && context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster()))
        {
            context->setSetting("distributed_group_by_no_merge", 2);
+            context->setSetting("prefer_localhost_replica", Field(0));
+        }
+        else if (
+            storage->isMergeTree() && (storage->supportsReplication() || settings.parallel_replicas_for_non_replicated_merge_tree)
+            && context->getClientInfo().distributed_depth == 0
+            && context->canUseParallelReplicasCustomKeyForCluster(*context->getClusterForParallelReplicas()))
+        {
+            context->setSetting("prefer_localhost_replica", Field(0));
        }
    }

--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@ -233,7 +233,8 @@ void ThreadStatus::attachToGroupImpl(const ThreadGroupPtr & thread_group_)
 {
    /// Attach or init current thread to thread group and copy useful information from it
    thread_group = thread_group_;
-    thread_group->linkThread(thread_id);
+    if (!internal_thread)
+        thread_group->linkThread(thread_id);

    performance_counters.setParent(&thread_group->performance_counters);
    memory_tracker.setParent(&thread_group->memory_tracker);
@ -269,7 +270,8 @@ void ThreadStatus::detachFromGroup()
    /// Extract MemoryTracker out from query and user context
    memory_tracker.setParent(&total_memory_tracker);

-    thread_group->unlinkThread();
+    if (!internal_thread)
+        thread_group->unlinkThread();

    thread_group.reset();

--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@ -834,7 +834,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                if (row_policy_filter_info.actions)
                    table_expression_data.setRowLevelFilterActions(row_policy_filter_info.actions);

-                if (query_context->getParallelReplicasMode() == Context::ParallelReplicasMode::CUSTOM_KEY)
+                if (query_context->canUseParallelReplicasCustomKey())
                {
                    if (settings.parallel_replicas_count > 1)
                    {
@ -843,9 +843,14 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                        add_filter(parallel_replicas_custom_key_filter_info, "Parallel replicas custom key filter");
                    }
                    else if (auto * distributed = typeid_cast<StorageDistributed *>(storage.get());
-                             distributed && query_context->canUseParallelReplicasCustomKey(*distributed->getCluster()))
+                             distributed && query_context->canUseParallelReplicasCustomKeyForCluster(*distributed->getCluster()))
                    {
                        planner_context->getMutableQueryContext()->setSetting("distributed_group_by_no_merge", 2);
+                        /// We disable prefer_localhost_replica because if one of the replicas is local it will create a single local plan
+                        /// instead of executing the query with multiple replicas
+                        /// We can enable this setting again for custom key parallel replicas when we can generate a plan that will use both a
+                        /// local plan and remote replicas
+                        planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0});
                    }
                }

@ -879,7 +884,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                };

                /// query_plan can be empty if there is nothing to read
-                if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings) && query_context->canUseParallelReplicasOnInitiator())
+                if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings))
                {
                    // (1) find read step
                    QueryPlan::Node * node = query_plan.getRootNode();
@ -906,54 +911,78 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                    }

                    chassert(reading);
-
-                    // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read
-                    if (settings.parallel_replicas_min_number_of_rows_per_replica > 0)
+                    if (query_context->canUseParallelReplicasCustomKey() && query_context->getClientInfo().distributed_depth == 0)
                    {
-                        auto result_ptr = reading->selectRangesToRead();
-
-                        UInt64 rows_to_read = result_ptr->selected_rows;
-                        if (table_expression_query_info.trivial_limit > 0 && table_expression_query_info.trivial_limit < rows_to_read)
-                            rows_to_read = table_expression_query_info.trivial_limit;
-
-                        if (max_block_size_limited && (max_block_size_limited < rows_to_read))
-                            rows_to_read = max_block_size_limited;
-
-                        const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica;
-                        LOG_TRACE(
-                            getLogger("Planner"),
-                            "Estimated {} rows to read. It is enough work for {} parallel replicas",
-                            rows_to_read,
-                            number_of_replicas_to_use);
-
-                        if (number_of_replicas_to_use <= 1)
+                        if (auto cluster = query_context->getClusterForParallelReplicas();
+                            query_context->canUseParallelReplicasCustomKeyForCluster(*cluster))
                        {
-                            planner_context->getMutableQueryContext()->setSetting(
-                                "allow_experimental_parallel_reading_from_replicas", Field(0));
-                            planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1});
-                            LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read");
-                        }
-                        else if (number_of_replicas_to_use < settings.max_parallel_replicas)
-                        {
-                            planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use);
-                            LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use);
+                            planner_context->getMutableQueryContext()->setSetting("prefer_localhost_replica", Field{0});
+                            auto modified_query_info = select_query_info;
+                            modified_query_info.cluster = std::move(cluster);
+                            from_stage = QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
+                            QueryPlan query_plan_parallel_replicas;
+                            ClusterProxy::executeQueryWithParallelReplicasCustomKey(
+                                query_plan_parallel_replicas,
+                                storage->getStorageID(),
+                                modified_query_info,
+                                storage->getInMemoryMetadataPtr()->getColumns(),
+                                storage_snapshot,
+                                from_stage,
+                                table_expression_query_info.query_tree,
+                                query_context);
+                            query_plan = std::move(query_plan_parallel_replicas);
                        }
                    }
-
-                    // (3) if parallel replicas still enabled - replace reading step
-                    if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator())
+                    else if (query_context->canUseParallelReplicasOnInitiator())
                    {
-                        from_stage = QueryProcessingStage::WithMergeableState;
-                        QueryPlan query_plan_parallel_replicas;
-                        ClusterProxy::executeQueryWithParallelReplicas(
-                            query_plan_parallel_replicas,
-                            storage->getStorageID(),
-                            from_stage,
-                            table_expression_query_info.query_tree,
-                            table_expression_query_info.planner_context,
-                            query_context,
-                            table_expression_query_info.storage_limits);
-                        query_plan = std::move(query_plan_parallel_replicas);
+                        // (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read
+                        if (settings.parallel_replicas_min_number_of_rows_per_replica > 0)
+                        {
+                            auto result_ptr = reading->selectRangesToRead();
+
+                            UInt64 rows_to_read = result_ptr->selected_rows;
+                            if (table_expression_query_info.trivial_limit > 0 && table_expression_query_info.trivial_limit < rows_to_read)
+                                rows_to_read = table_expression_query_info.trivial_limit;
+
+                            if (max_block_size_limited && (max_block_size_limited < rows_to_read))
+                                rows_to_read = max_block_size_limited;
+
+                            const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica;
+                            LOG_TRACE(
+                                getLogger("Planner"),
+                                "Estimated {} rows to read. It is enough work for {} parallel replicas",
+                                rows_to_read,
+                                number_of_replicas_to_use);
+
+                            if (number_of_replicas_to_use <= 1)
+                            {
+                                planner_context->getMutableQueryContext()->setSetting(
+                                    "allow_experimental_parallel_reading_from_replicas", Field(0));
+                                planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1});
+                                LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read");
+                            }
+                            else if (number_of_replicas_to_use < settings.max_parallel_replicas)
+                            {
+                                planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use);
+                                LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use);
+                            }
+                        }
+
+                        // (3) if parallel replicas still enabled - replace reading step
+                        if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator())
+                        {
+                            from_stage = QueryProcessingStage::WithMergeableState;
+                            QueryPlan query_plan_parallel_replicas;
+                            ClusterProxy::executeQueryWithParallelReplicas(
+                                query_plan_parallel_replicas,
+                                storage->getStorageID(),
+                                from_stage,
+                                table_expression_query_info.query_tree,
+                                table_expression_query_info.planner_context,
+                                query_context,
+                                table_expression_query_info.storage_limits);
+                            query_plan = std::move(query_plan_parallel_replicas);
+                        }
                    }
                }

--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@ -900,6 +900,11 @@ bool NativeORCBlockInputFormat::prepareStripeReader()

    orc::RowReaderOptions row_reader_options;
    row_reader_options.includeTypes(include_indices);
+    if (format_settings.orc.read_use_writer_time_zone)
+    {
+        String writer_time_zone = current_stripe_info->getWriterTimezone();
+        row_reader_options.setTimezoneName(writer_time_zone);
+    }
    row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength());
    if (format_settings.orc.filter_push_down && sarg)
    {
--- a/src/Processors/QueryPlan/BufferChunksTransform.cpp
+++ b/src/Processors/QueryPlan/BufferChunksTransform.cpp
@ -0,0 +1,85 @@
+#include <Processors/QueryPlan/BufferChunksTransform.h>
+
+namespace DB
+{
+
+BufferChunksTransform::BufferChunksTransform(
+    const Block & header_,
+    size_t max_rows_to_buffer_,
+    size_t max_bytes_to_buffer_,
+    size_t limit_)
+    : IProcessor({header_}, {header_})
+    , input(inputs.front())
+    , output(outputs.front())
+    , max_rows_to_buffer(max_rows_to_buffer_)
+    , max_bytes_to_buffer(max_bytes_to_buffer_)
+    , limit(limit_)
+{
+}
+
+IProcessor::Status BufferChunksTransform::prepare()
+{
+    if (output.isFinished())
+    {
+        chunks = {};
+        input.close();
+        return Status::Finished;
+    }
+
+    if (input.isFinished() && chunks.empty())
+    {
+        output.finish();
+        return Status::Finished;
+    }
+
+    if (output.canPush())
+    {
+        input.setNeeded();
+
+        if (!chunks.empty())
+        {
+            auto chunk = std::move(chunks.front());
+            chunks.pop();
+
+            num_buffered_rows -= chunk.getNumRows();
+            num_buffered_bytes -= chunk.bytes();
+
+            output.push(std::move(chunk));
+        }
+        else if (input.hasData())
+        {
+            auto chunk = pullChunk();
+            output.push(std::move(chunk));
+        }
+    }
+
+    if (input.hasData() && (num_buffered_rows < max_rows_to_buffer || num_buffered_bytes < max_bytes_to_buffer))
+    {
+        auto chunk = pullChunk();
+        num_buffered_rows += chunk.getNumRows();
+        num_buffered_bytes += chunk.bytes();
+        chunks.push(std::move(chunk));
+    }
+
+    if (num_buffered_rows >= max_rows_to_buffer && num_buffered_bytes >= max_bytes_to_buffer)
+    {
+        input.setNotNeeded();
+        return Status::PortFull;
+    }
+
+    input.setNeeded();
+    return Status::NeedData;
+}
+
+Chunk BufferChunksTransform::pullChunk()
+{
+    auto chunk = input.pull();
+    num_processed_rows += chunk.getNumRows();
+
+    if (limit && num_processed_rows >= limit)
+        input.close();
+
+    return chunk;
+}
+
+}
--- a/src/Processors/QueryPlan/BufferChunksTransform.h
+++ b/src/Processors/QueryPlan/BufferChunksTransform.h
@ -0,0 +1,42 @@
+#pragma once
+#include <Processors/IProcessor.h>
+#include <queue>
+
+namespace DB
+{
+
+/// Transform that buffers chunks from the input
+/// up to the certain limit  and pushes chunks to
+/// the output whenever it is ready. It can be used
+/// to increase parallelism of execution, for example
+/// when it is adeded before MergingSortedTransform.
+class BufferChunksTransform : public IProcessor
+{
+public:
+    /// OR condition is used for the limits on rows and bytes.
+    BufferChunksTransform(
+        const Block & header_,
+        size_t max_rows_to_buffer_,
+        size_t max_bytes_to_buffer_,
+        size_t limit_);
+
+    Status prepare() override;
+    String getName() const override { return "BufferChunks"; }
+
+private:
+    Chunk pullChunk();
+
+    InputPort & input;
+    OutputPort & output;
+
+    size_t max_rows_to_buffer;
+    size_t max_bytes_to_buffer;
+    size_t limit;
+
+    std::queue<Chunk> chunks;
+    size_t num_buffered_rows = 0;
+    size_t num_buffered_bytes = 0;
+    size_t num_processed_rows = 0;
+};
+
+}
--- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp
@ -919,15 +919,23 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes)
    {
        auto & union_node = node.children.front();

-        std::vector<InputOrderInfoPtr> infos;
+        bool use_buffering = false;
        const SortDescription * max_sort_descr = nullptr;
+
+        std::vector<InputOrderInfoPtr> infos;
        infos.reserve(node.children.size());
+
        for (auto * child : union_node->children)
        {
            infos.push_back(buildInputOrderInfo(*sorting, *child, steps_to_update));

-            if (infos.back() && (!max_sort_descr || max_sort_descr->size() < infos.back()->sort_description_for_merging.size()))
-                max_sort_descr = &infos.back()->sort_description_for_merging;
+            if (infos.back())
+            {
+                if (!max_sort_descr || max_sort_descr->size() < infos.back()->sort_description_for_merging.size())
+                    max_sort_descr = &infos.back()->sort_description_for_merging;
+
+                use_buffering |= infos.back()->limit == 0;
+            }
        }

        if (!max_sort_descr || max_sort_descr->empty())
@ -972,12 +980,13 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes)
            }
        }

-        sorting->convertToFinishSorting(*max_sort_descr);
+        sorting->convertToFinishSorting(*max_sort_descr, use_buffering);
    }
    else if (auto order_info = buildInputOrderInfo(*sorting, *node.children.front(), steps_to_update))
    {
-        sorting->convertToFinishSorting(order_info->sort_description_for_merging);
-        /// update data stream's sorting properties
+        /// Use buffering only if have filter or don't have limit.
+        bool use_buffering = order_info->limit == 0;
+        sorting->convertToFinishSorting(order_info->sort_description_for_merging, use_buffering);
        updateStepsDataStreams(steps_to_update);
    }
 }
@ -1091,7 +1100,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node,
        bool can_read = read_from_merge_tree->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit);
        if (!can_read)
            return 0;
-        sorting->convertToFinishSorting(order_info->sort_description_for_merging);
+        sorting->convertToFinishSorting(order_info->sort_description_for_merging, false);
    }

    return 0;
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@ -1,5 +1,4 @@
 #include <memory>
-#include <stdexcept>
 #include <IO/Operators.h>
 #include <Interpreters/Context.h>
 #include <Processors/Merges/MergingSortedTransform.h>
@ -8,6 +7,7 @@
 #include <Processors/Transforms/LimitsCheckingTransform.h>
 #include <Processors/Transforms/MergeSortingTransform.h>
 #include <Processors/Transforms/PartialSortingTransform.h>
+#include <Processors/QueryPlan/BufferChunksTransform.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Common/JSONBuilder.h>

@ -38,6 +38,7 @@ SortingStep::Settings::Settings(const Context & context)
    tmp_data = context.getTempDataOnDisk();
    min_free_disk_space = settings.min_free_disk_space_for_temporary_data;
    max_block_bytes = settings.prefer_external_sort_block_bytes;
+    read_in_order_use_buffering = settings.read_in_order_use_buffering;
 }

 SortingStep::Settings::Settings(size_t max_block_size_)
@ -153,10 +154,11 @@ void SortingStep::updateLimit(size_t limit_)
    }
 }

-void SortingStep::convertToFinishSorting(SortDescription prefix_description_)
+void SortingStep::convertToFinishSorting(SortDescription prefix_description_, bool use_buffering_)
 {
    type = Type::FinishSorting;
    prefix_description = std::move(prefix_description_);
+    use_buffering = use_buffering_;
 }

 void SortingStep::scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline)
@ -244,6 +246,14 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr
    /// If there are several streams, then we merge them into one
    if (pipeline.getNumStreams() > 1)
    {
+        if (use_buffering && sort_settings.read_in_order_use_buffering)
+        {
+            pipeline.addSimpleTransform([&](const Block & header)
+            {
+                return std::make_shared<BufferChunksTransform>(header, sort_settings.max_block_size, sort_settings.max_block_bytes, limit_);
+            });
+        }
+
        auto transform = std::make_shared<MergingSortedTransform>(
            pipeline.getHeader(),
            pipeline.getNumStreams(),
@ -373,9 +383,8 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build
        mergingSorted(pipeline, prefix_description, (need_finish_sorting ? 0 : limit));

        if (need_finish_sorting)
-        {
            finishSorting(pipeline, prefix_description, result_description, limit);
-        }
+
        return;
    }

--- a/src/Processors/QueryPlan/SortingStep.h
+++ b/src/Processors/QueryPlan/SortingStep.h
@ -28,6 +28,7 @@ public:
        TemporaryDataOnDiskScopePtr tmp_data = nullptr;
        size_t min_free_disk_space = 0;
        size_t max_block_bytes = 0;
+        size_t read_in_order_use_buffering = 0;

        explicit Settings(const Context & context);
        explicit Settings(size_t max_block_size_);
@ -80,7 +81,7 @@ public:

    const SortDescription & getSortDescription() const { return result_description; }

-    void convertToFinishSorting(SortDescription prefix_description);
+    void convertToFinishSorting(SortDescription prefix_description, bool use_buffering_);

    Type getType() const { return type; }
    const Settings & getSettings() const { return sort_settings; }
@ -126,6 +127,7 @@ private:

    UInt64 limit;
    bool always_read_till_end = false;
+    bool use_buffering = false;

    Settings sort_settings;

--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@ -105,7 +105,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
            connection_entries.emplace_back(std::move(result.entry));
        }

-        auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
+        auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), context, throttler);
        if (extension_ && extension_->replica_info)
            res->setReplicaInfo(*extension_->replica_info);

@ -127,7 +127,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
 {
    create_connections = [this, &connection, throttler, extension_](AsyncCallback)
    {
-        auto res = std::make_unique<MultiplexedConnections>(connection, context->getSettingsRef(), throttler);
+        auto res = std::make_unique<MultiplexedConnections>(connection, context, throttler);
        if (extension_ && extension_->replica_info)
            res->setReplicaInfo(*extension_->replica_info);
        return res;
@ -148,7 +148,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
 {
    create_connections = [this, connection_ptr, throttler, extension_](AsyncCallback)
    {
-        auto res = std::make_unique<MultiplexedConnections>(connection_ptr, context->getSettingsRef(), throttler);
+        auto res = std::make_unique<MultiplexedConnections>(connection_ptr, context, throttler);
        if (extension_ && extension_->replica_info)
            res->setReplicaInfo(*extension_->replica_info);
        return res;
@ -169,7 +169,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
 {
    create_connections = [this, connections_, throttler, extension_](AsyncCallback) mutable
    {
-        auto res = std::make_unique<MultiplexedConnections>(std::move(connections_), context->getSettingsRef(), throttler);
+        auto res = std::make_unique<MultiplexedConnections>(std::move(connections_), context, throttler);
        if (extension_ && extension_->replica_info)
            res->setReplicaInfo(*extension_->replica_info);
        return res;
@ -234,7 +234,7 @@ RemoteQueryExecutor::RemoteQueryExecutor(
                timeouts, current_settings, pool_mode, std::move(async_callback), skip_unavailable_endpoints, priority_func);
        }

-        auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), current_settings, throttler);
+        auto res = std::make_unique<MultiplexedConnections>(std::move(connection_entries), context, throttler);
        if (extension && extension->replica_info)
            res->setReplicaInfo(*extension->replica_info);
        return res;
--- a/src/Server/HTTP/authenticateUserByHTTP.cpp
+++ b/src/Server/HTTP/authenticateUserByHTTP.cpp
@ -0,0 +1,265 @@
+#include <Server/HTTP/authenticateUserByHTTP.h>
+
+#include <Access/Authentication.h>
+#include <Access/Common/SSLCertificateSubjects.h>
+#include <Access/Credentials.h>
+#include <Access/ExternalAuthenticators.h>
+#include <Common/Base64.h>
+#include <Server/HTTP/HTTPServerRequest.h>
+#include <Server/HTTP/HTMLForm.h>
+#include <Server/HTTP/HTTPServerResponse.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/Session.h>
+
+#include <Poco/Net/HTTPBasicCredentials.h>
+
+#if USE_SSL
+#include <Poco/Net/X509Certificate.h>
+#endif
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int AUTHENTICATION_FAILED;
+    extern const int BAD_ARGUMENTS;
+    extern const int SUPPORT_IS_DISABLED;
+}
+
+
+namespace
+{
+    /// Throws an exception that multiple authorization schemes are used simultaneously.
+    [[noreturn]] void throwMultipleAuthenticationMethods(std::string_view method1, std::string_view method2)
+    {
+        throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
+                        "Invalid authentication: it is not allowed to use {} and {} simultaneously", method1, method2);
+    }
+
+    /// Checks that a specified user name is not empty, and throws an exception if it's empty.
+    void checkUserNameNotEmpty(const String & user_name, std::string_view method)
+    {
+        if (user_name.empty())
+            throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Got an empty user name from {}", method);
+    }
+}
+
+
+bool authenticateUserByHTTP(
+    const HTTPServerRequest & request,
+    const HTMLForm & params,
+    HTTPServerResponse & response,
+    Session & session,
+    std::unique_ptr<Credentials> & request_credentials,
+    ContextPtr global_context,
+    LoggerPtr log)
+{
+    /// Get the credentials created by the previous call of authenticateUserByHTTP() while handling the previous HTTP request.
+    auto current_credentials = std::move(request_credentials);
+
+    /// The user and password can be passed by headers (similar to X-Auth-*),
+    /// which is used by load balancers to pass authentication information.
+    std::string user = request.get("X-ClickHouse-User", "");
+    std::string password = request.get("X-ClickHouse-Key", "");
+    std::string quota_key = request.get("X-ClickHouse-Quota", "");
+    bool has_auth_headers = !user.empty() || !password.empty();
+
+    /// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name
+    /// extracted from the SSL certificate used for this connection instead of checking password.
+    bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on");
+
+    /// User name and password can be passed using HTTP Basic auth or query parameters
+    /// (both methods are insecure).
+    bool has_http_credentials = request.hasCredentials();
+    bool has_credentials_in_query_params = params.has("user") || params.has("password");
+
+    std::string spnego_challenge;
+    SSLCertificateSubjects certificate_subjects;
+
+    if (has_ssl_certificate_auth)
+    {
+#if USE_SSL
+        /// For SSL certificate authentication we extract the user name from the "X-ClickHouse-User" HTTP header.
+        checkUserNameNotEmpty(user, "X-ClickHouse HTTP headers");
+
+        /// It is prohibited to mix different authorization schemes.
+        if (!password.empty())
+            throwMultipleAuthenticationMethods("SSL certificate authentication", "authentication via password");
+        if (has_http_credentials)
+            throwMultipleAuthenticationMethods("SSL certificate authentication", "Authorization HTTP header");
+        if (has_credentials_in_query_params)
+            throwMultipleAuthenticationMethods("SSL certificate authentication", "authentication via parameters");
+
+        if (request.havePeerCertificate())
+            certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate());
+
+        if (certificate_subjects.empty())
+            throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
+                            "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name");
+#else
+        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
+                        "SSL certificate authentication disabled because ClickHouse was built without SSL library");
+#endif
+    }
+    else if (has_auth_headers)
+    {
+        checkUserNameNotEmpty(user, "X-ClickHouse HTTP headers");
+
+        /// It is prohibited to mix different authorization schemes.
+        if (has_http_credentials)
+            throwMultipleAuthenticationMethods("X-ClickHouse HTTP headers", "Authorization HTTP header");
+        if (has_credentials_in_query_params)
+            throwMultipleAuthenticationMethods("X-ClickHouse HTTP headers", "authentication via parameters");
+    }
+    else if (has_http_credentials)
+    {
+        /// It is prohibited to mix different authorization schemes.
+        if (has_credentials_in_query_params)
+            throwMultipleAuthenticationMethods("Authorization HTTP header", "authentication via parameters");
+
+        std::string scheme;
+        std::string auth_info;
+        request.getCredentials(scheme, auth_info);
+
+        if (Poco::icompare(scheme, "Basic") == 0)
+        {
+            Poco::Net::HTTPBasicCredentials credentials(auth_info);
+            user = credentials.getUsername();
+            password = credentials.getPassword();
+            checkUserNameNotEmpty(user, "Authorization HTTP header");
+        }
+        else if (Poco::icompare(scheme, "Negotiate") == 0)
+        {
+            spnego_challenge = auth_info;
+
+            if (spnego_challenge.empty())
+                throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty");
+        }
+        else
+        {
+            throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme);
+        }
+    }
+    else
+    {
+        /// If the user name is not set we assume it's the 'default' user.
+        user = params.get("user", "default");
+        password = params.get("password", "");
+        checkUserNameNotEmpty(user, "authentication via parameters");
+    }
+
+    if (!certificate_subjects.empty())
+    {
+        chassert(!user.empty());
+        if (!current_credentials)
+            current_credentials = std::make_unique<SSLCertificateCredentials>(user, std::move(certificate_subjects));
+
+        auto * certificate_credentials = dynamic_cast<SSLCertificateCredentials *>(current_credentials.get());
+        if (!certificate_credentials)
+            throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme");
+    }
+    else if (!spnego_challenge.empty())
+    {
+        if (!current_credentials)
+            current_credentials = global_context->makeGSSAcceptorContext();
+
+        auto * gss_acceptor_context = dynamic_cast<GSSAcceptorContext *>(current_credentials.get());
+        if (!gss_acceptor_context)
+            throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected");
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunreachable-code"
+        const auto spnego_response = base64Encode(gss_acceptor_context->processToken(base64Decode(spnego_challenge), log));
+#pragma clang diagnostic pop
+
+        if (!spnego_response.empty())
+            response.set("WWW-Authenticate", "Negotiate " + spnego_response);
+
+        if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady())
+        {
+            if (spnego_response.empty())
+                throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure");
+
+            response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
+            response.send();
+            /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request.
+            request_credentials = std::move(current_credentials);
+            return false;
+        }
+    }
+    else // I.e., now using user name and password strings ("Basic").
+    {
+        if (!current_credentials)
+            current_credentials = std::make_unique<BasicCredentials>();
+
+        auto * basic_credentials = dynamic_cast<BasicCredentials *>(current_credentials.get());
+        if (!basic_credentials)
+            throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme");
+
+        chassert(!user.empty());
+        basic_credentials->setUserName(user);
+        basic_credentials->setPassword(password);
+    }
+
+    if (params.has("quota_key"))
+    {
+        if (!quota_key.empty())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "Invalid authentication: it is not allowed "
+                            "to use quota key as HTTP header and as parameter simultaneously");
+
+        quota_key = params.get("quota_key");
+    }
+
+    /// Set client info. It will be used for quota accounting parameters in 'setUser' method.
+
+    session.setHTTPClientInfo(request);
+    session.setQuotaClientKey(quota_key);
+
+    /// Extract the last entry from comma separated list of forwarded_for addresses.
+    /// Only the last proxy can be trusted (if any).
+    String forwarded_address = session.getClientInfo().getLastForwardedFor();
+    try
+    {
+        if (!forwarded_address.empty() && global_context->getConfigRef().getBool("auth_use_forwarded_address", false))
+            session.authenticate(*current_credentials, Poco::Net::SocketAddress(forwarded_address, request.clientAddress().port()));
+        else
+            session.authenticate(*current_credentials, request.clientAddress());
+    }
+    catch (const Authentication::Require<BasicCredentials> & required_credentials)
+    {
+        current_credentials = std::make_unique<BasicCredentials>();
+
+        if (required_credentials.getRealm().empty())
+            response.set("WWW-Authenticate", "Basic");
+        else
+            response.set("WWW-Authenticate", "Basic realm=\"" + required_credentials.getRealm() + "\"");
+
+        response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
+        response.send();
+        /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request.
+        request_credentials = std::move(current_credentials);
+        return false;
+    }
+    catch (const Authentication::Require<GSSAcceptorContext> & required_credentials)
+    {
+        current_credentials = global_context->makeGSSAcceptorContext();
+
+        if (required_credentials.getRealm().empty())
+            response.set("WWW-Authenticate", "Negotiate");
+        else
+            response.set("WWW-Authenticate", "Negotiate realm=\"" + required_credentials.getRealm() + "\"");
+
+        response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
+        response.send();
+        /// Keep the credentials for next HTTP request. A client can handle HTTP_UNAUTHORIZED and send us more credentials with the next HTTP request.
+        request_credentials = std::move(current_credentials);
+        return false;
+    }
+
+    return true;
+}
+
+}
--- a/src/Server/HTTP/authenticateUserByHTTP.h
+++ b/src/Server/HTTP/authenticateUserByHTTP.h
@ -0,0 +1,31 @@
+#pragma once
+
+#include <Common/logger_useful.h>
+#include <Interpreters/Context_fwd.h>
+
+
+namespace DB
+{
+class HTTPServerRequest;
+class HTMLForm;
+class HTTPServerResponse;
+class Session;
+class Credentials;
+
+/// Authenticates a user via HTTP protocol and initializes a session.
+/// Usually retrieves the name and the password for that user from either the request's headers or from the query parameters.
+/// Returns true when the user successfully authenticated,
+/// the session instance will be configured accordingly, and the request_credentials instance will be dropped.
+/// Returns false when the user is not authenticated yet, and the HTTP_UNAUTHORIZED response is sent with the "WWW-Authenticate" header,
+/// in this case the `request_credentials` instance must be preserved until the next request or until any exception.
+/// Throws an exception if authentication failed.
+bool authenticateUserByHTTP(
+    const HTTPServerRequest & request,
+    const HTMLForm & params,
+    HTTPServerResponse & response,
+    Session & session,
+    std::unique_ptr<Credentials> & request_credentials,
+    ContextPtr global_context,
+    LoggerPtr log);
+
+}
--- a/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp
+++ b/src/Server/HTTP/exceptionCodeToHTTPStatus.cpp
@ -0,0 +1,158 @@
+#include <Server/HTTP/exceptionCodeToHTTPStatus.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int CANNOT_PARSE_TEXT;
+    extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
+    extern const int CANNOT_PARSE_QUOTED_STRING;
+    extern const int CANNOT_PARSE_DATE;
+    extern const int CANNOT_PARSE_DATETIME;
+    extern const int CANNOT_PARSE_NUMBER;
+    extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING;
+    extern const int CANNOT_PARSE_IPV4;
+    extern const int CANNOT_PARSE_IPV6;
+    extern const int CANNOT_PARSE_UUID;
+    extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
+    extern const int CANNOT_SCHEDULE_TASK;
+    extern const int CANNOT_OPEN_FILE;
+    extern const int CANNOT_COMPILE_REGEXP;
+    extern const int DUPLICATE_COLUMN;
+    extern const int ILLEGAL_COLUMN;
+    extern const int THERE_IS_NO_COLUMN;
+    extern const int UNKNOWN_ELEMENT_IN_AST;
+    extern const int UNKNOWN_TYPE_OF_AST_NODE;
+    extern const int TOO_DEEP_AST;
+    extern const int TOO_BIG_AST;
+    extern const int UNEXPECTED_AST_STRUCTURE;
+    extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
+
+    extern const int SYNTAX_ERROR;
+
+    extern const int INCORRECT_DATA;
+    extern const int TYPE_MISMATCH;
+
+    extern const int UNKNOWN_TABLE;
+    extern const int UNKNOWN_FUNCTION;
+    extern const int UNKNOWN_IDENTIFIER;
+    extern const int UNKNOWN_TYPE;
+    extern const int UNKNOWN_STORAGE;
+    extern const int UNKNOWN_DATABASE;
+    extern const int UNKNOWN_SETTING;
+    extern const int UNKNOWN_DIRECTION_OF_SORTING;
+    extern const int UNKNOWN_AGGREGATE_FUNCTION;
+    extern const int UNKNOWN_FORMAT;
+    extern const int UNKNOWN_DATABASE_ENGINE;
+    extern const int UNKNOWN_TYPE_OF_QUERY;
+    extern const int UNKNOWN_ROLE;
+
+    extern const int QUERY_IS_TOO_LARGE;
+
+    extern const int NOT_IMPLEMENTED;
+    extern const int SOCKET_TIMEOUT;
+
+    extern const int UNKNOWN_USER;
+    extern const int WRONG_PASSWORD;
+    extern const int REQUIRED_PASSWORD;
+    extern const int AUTHENTICATION_FAILED;
+    extern const int SET_NON_GRANTED_ROLE;
+
+    extern const int HTTP_LENGTH_REQUIRED;
+
+    extern const int TIMEOUT_EXCEEDED;
+}
+
+
+Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code)
+{
+    using namespace Poco::Net;
+
+    if (exception_code == ErrorCodes::REQUIRED_PASSWORD)
+    {
+        return HTTPResponse::HTTP_UNAUTHORIZED;
+    }
+    else if (exception_code == ErrorCodes::UNKNOWN_USER ||
+             exception_code == ErrorCodes::WRONG_PASSWORD ||
+             exception_code == ErrorCodes::AUTHENTICATION_FAILED ||
+             exception_code == ErrorCodes::SET_NON_GRANTED_ROLE)
+    {
+        return HTTPResponse::HTTP_FORBIDDEN;
+    }
+    else if (exception_code == ErrorCodes::BAD_ARGUMENTS ||
+             exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP ||
+             exception_code == ErrorCodes::CANNOT_PARSE_TEXT ||
+             exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE ||
+             exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING ||
+             exception_code == ErrorCodes::CANNOT_PARSE_DATE ||
+             exception_code == ErrorCodes::CANNOT_PARSE_DATETIME ||
+             exception_code == ErrorCodes::CANNOT_PARSE_NUMBER ||
+             exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING ||
+             exception_code == ErrorCodes::CANNOT_PARSE_IPV4 ||
+             exception_code == ErrorCodes::CANNOT_PARSE_IPV6 ||
+             exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED ||
+             exception_code == ErrorCodes::CANNOT_PARSE_UUID ||
+             exception_code == ErrorCodes::DUPLICATE_COLUMN ||
+             exception_code == ErrorCodes::ILLEGAL_COLUMN ||
+             exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST ||
+             exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE ||
+             exception_code == ErrorCodes::THERE_IS_NO_COLUMN ||
+             exception_code == ErrorCodes::TOO_DEEP_AST ||
+             exception_code == ErrorCodes::TOO_BIG_AST ||
+             exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE ||
+             exception_code == ErrorCodes::SYNTAX_ERROR ||
+             exception_code == ErrorCodes::INCORRECT_DATA ||
+             exception_code == ErrorCodes::TYPE_MISMATCH ||
+             exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE)
+    {
+        return HTTPResponse::HTTP_BAD_REQUEST;
+    }
+    else if (exception_code == ErrorCodes::UNKNOWN_TABLE ||
+             exception_code == ErrorCodes::UNKNOWN_FUNCTION ||
+             exception_code == ErrorCodes::UNKNOWN_IDENTIFIER ||
+             exception_code == ErrorCodes::UNKNOWN_TYPE ||
+             exception_code == ErrorCodes::UNKNOWN_STORAGE ||
+             exception_code == ErrorCodes::UNKNOWN_DATABASE ||
+             exception_code == ErrorCodes::UNKNOWN_SETTING ||
+             exception_code == ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING ||
+             exception_code == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION ||
+             exception_code == ErrorCodes::UNKNOWN_FORMAT ||
+             exception_code == ErrorCodes::UNKNOWN_DATABASE_ENGINE ||
+             exception_code == ErrorCodes::UNKNOWN_TYPE_OF_QUERY ||
+             exception_code == ErrorCodes::UNKNOWN_ROLE)
+    {
+        return HTTPResponse::HTTP_NOT_FOUND;
+    }
+    else if (exception_code == ErrorCodes::QUERY_IS_TOO_LARGE)
+    {
+        return HTTPResponse::HTTP_REQUESTENTITYTOOLARGE;
+    }
+    else if (exception_code == ErrorCodes::NOT_IMPLEMENTED)
+    {
+        return HTTPResponse::HTTP_NOT_IMPLEMENTED;
+    }
+    else if (exception_code == ErrorCodes::SOCKET_TIMEOUT ||
+             exception_code == ErrorCodes::CANNOT_OPEN_FILE)
+    {
+        return HTTPResponse::HTTP_SERVICE_UNAVAILABLE;
+    }
+    else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED)
+    {
+        return HTTPResponse::HTTP_LENGTH_REQUIRED;
+    }
+    else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED)
+    {
+        return HTTPResponse::HTTP_REQUEST_TIMEOUT;
+    }
+    else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK)
+    {
+        return HTTPResponse::HTTP_SERVICE_UNAVAILABLE;
+    }
+
+    return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR;
+}
+
+}
--- a/src/Server/HTTP/exceptionCodeToHTTPStatus.h
+++ b/src/Server/HTTP/exceptionCodeToHTTPStatus.h
@ -0,0 +1,11 @@
+#pragma once
+#include <Poco/Net/HTTPResponse.h>
+
+
+namespace DB
+{
+
+/// Converts Exception code to HTTP status code.
+Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code);
+
+}
--- a/src/Server/HTTP/sendExceptionToHTTPClient.cpp
+++ b/src/Server/HTTP/sendExceptionToHTTPClient.cpp
@ -0,0 +1,80 @@
+#include <Server/HTTP/sendExceptionToHTTPClient.h>
+
+#include <Server/HTTP/HTTPServerRequest.h>
+#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
+#include <Server/HTTP/exceptionCodeToHTTPStatus.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int HTTP_LENGTH_REQUIRED;
+    extern const int REQUIRED_PASSWORD;
+}
+
+
+void sendExceptionToHTTPClient(
+    const String & exception_message,
+    int exception_code,
+    HTTPServerRequest & request,
+    HTTPServerResponse & response,
+    WriteBufferFromHTTPServerResponse * out,
+    LoggerPtr log)
+{
+    setHTTPResponseStatusAndHeadersForException(exception_code, request, response, out, log);
+
+    if (!out)
+    {
+        /// If nothing was sent yet.
+        WriteBufferFromHTTPServerResponse out_for_message{response, request.getMethod() == HTTPRequest::HTTP_HEAD, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT};
+
+        out_for_message.writeln(exception_message);
+        out_for_message.finalize();
+    }
+    else
+    {
+        /// If buffer has data, and that data wasn't sent yet, then no need to send that data
+        bool data_sent = (out->count() != out->offset());
+
+        if (!data_sent)
+            out->position() = out->buffer().begin();
+
+        out->writeln(exception_message);
+        out->finalize();
+    }
+}
+
+
+void setHTTPResponseStatusAndHeadersForException(
+    int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBufferFromHTTPServerResponse * out, LoggerPtr log)
+{
+    if (out)
+        out->setExceptionCode(exception_code);
+    else
+        response.set("X-ClickHouse-Exception-Code", toString<int>(exception_code));
+
+    /// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body
+    /// to avoid reading part of the current request body in the next request.
+    if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive()
+        && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
+    {
+        try
+        {
+            if (!request.getStream().eof())
+                request.getStream().ignoreAll();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log, "Cannot read remaining request body during exception handling");
+            response.setKeepAlive(false);
+        }
+    }
+
+    if (exception_code == ErrorCodes::REQUIRED_PASSWORD)
+        response.requireAuthentication("ClickHouse server HTTP API");
+    else
+        response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code));
+}
+}
--- a/src/Server/HTTP/sendExceptionToHTTPClient.h
+++ b/src/Server/HTTP/sendExceptionToHTTPClient.h
@ -0,0 +1,27 @@
+#pragma once
+
+#include <Common/logger_useful.h>
+#include <base/types.h>
+
+
+namespace DB
+{
+class HTTPServerRequest;
+class HTTPServerResponse;
+class WriteBufferFromHTTPServerResponse;
+
+/// Sends an exception to HTTP client. This function doesn't handle its own exceptions so it needs to be wrapped in try-catch.
+/// Argument `out` may be either created from `response` or be nullptr (if it wasn't created before the exception).
+void sendExceptionToHTTPClient(
+    const String & exception_message,
+    int exception_code,
+    HTTPServerRequest & request,
+    HTTPServerResponse & response,
+    WriteBufferFromHTTPServerResponse * out,
+    LoggerPtr log);
+
+/// Sets "X-ClickHouse-Exception-Code" header and the correspondent HTTP status in the response for an exception.
+/// This is a part of what sendExceptionToHTTPClient() does.
+void setHTTPResponseStatusAndHeadersForException(
+    int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, WriteBufferFromHTTPServerResponse * out, LoggerPtr log);
+}
--- a/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp
+++ b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.cpp
@ -0,0 +1,24 @@
+#include <Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h>
+
+#include <Interpreters/Context.h>
+#include <Server/HTTP/HTTPServerRequest.h>
+
+
+namespace DB
+{
+
+void setReadOnlyIfHTTPMethodIdempotent(ContextMutablePtr context, const String & http_method)
+{
+    /// Anything else beside HTTP POST should be readonly queries.
+    if (http_method != HTTPServerRequest::HTTP_POST)
+    {
+        /// 'readonly' setting values mean:
+        /// readonly = 0 - any query is allowed, client can change any setting.
+        /// readonly = 1 - only readonly queries are allowed, client can't change settings.
+        /// readonly = 2 - only readonly queries are allowed, client can change any setting except 'readonly'.
+        if (context->getSettingsRef().readonly == 0)
+            context->setSetting("readonly", 2);
+    }
+}
+
+}
--- a/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h
+++ b/src/Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h
@ -0,0 +1,12 @@
+#pragma once
+
+#include <Interpreters/Context_fwd.h>
+
+
+namespace DB
+{
+
+/// Sets readonly = 2 if the current HTTP method is not HTTP POST and if readonly is not set already.
+void setReadOnlyIfHTTPMethodIdempotent(ContextMutablePtr context, const String & http_method);
+
+}
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@ -1,11 +1,6 @@
 #include <Server/HTTPHandler.h>

-#include <Access/Authentication.h>
 #include <Access/Credentials.h>
-#include <Access/AccessControl.h>
-#include <Access/ExternalAuthenticators.h>
-#include <Access/Role.h>
-#include <Access/User.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <Core/ExternalTable.h>
@ -37,20 +32,14 @@
 #include <base/getFQDNOrHostName.h>
 #include <base/scope_guard.h>
 #include <Server/HTTP/HTTPResponse.h>
+#include <Server/HTTP/authenticateUserByHTTP.h>
+#include <Server/HTTP/sendExceptionToHTTPClient.h>
+#include <Server/HTTP/setReadOnlyIfHTTPMethodIdempotent.h>
 #include <boost/container/flat_set.hpp>

-#include <Access/Common/SSLCertificateSubjects.h>
-#include "config.h"
-
-#include <Poco/Base64Decoder.h>
-#include <Poco/Base64Encoder.h>
-#include <Poco/Net/HTTPBasicCredentials.h>
 #include <Poco/Net/HTTPMessage.h>
-#include <Poco/Net/HTTPStream.h>
-#include <Poco/MemoryStream.h>
-#include <Poco/StreamCopier.h>
-#include <Poco/String.h>
-#include <Poco/Net/SocketAddress.h>
+
+#include "config.h"

 #include <algorithm>
 #include <chrono>
@ -60,78 +49,19 @@
 #include <unordered_map>
 #include <utility>

-#if USE_SSL
-#include <Poco/Net/X509Certificate.h>
-#endif
-

 namespace DB
 {

 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
    extern const int LOGICAL_ERROR;
    extern const int CANNOT_COMPILE_REGEXP;
-    extern const int CANNOT_OPEN_FILE;
-    extern const int CANNOT_PARSE_TEXT;
-    extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
-    extern const int CANNOT_PARSE_QUOTED_STRING;
-    extern const int CANNOT_PARSE_DATE;
-    extern const int CANNOT_PARSE_DATETIME;
-    extern const int CANNOT_PARSE_NUMBER;
-    extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING;
-    extern const int CANNOT_PARSE_IPV4;
-    extern const int CANNOT_PARSE_IPV6;
-    extern const int CANNOT_PARSE_UUID;
-    extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
-    extern const int CANNOT_SCHEDULE_TASK;
-    extern const int DUPLICATE_COLUMN;
-    extern const int ILLEGAL_COLUMN;
-    extern const int THERE_IS_NO_COLUMN;
-    extern const int UNKNOWN_ELEMENT_IN_AST;
-    extern const int UNKNOWN_TYPE_OF_AST_NODE;
-    extern const int TOO_DEEP_AST;
-    extern const int TOO_BIG_AST;
-    extern const int UNEXPECTED_AST_STRUCTURE;
-    extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;

-    extern const int SYNTAX_ERROR;
-
-    extern const int INCORRECT_DATA;
-    extern const int TYPE_MISMATCH;
-
-    extern const int UNKNOWN_TABLE;
-    extern const int UNKNOWN_FUNCTION;
-    extern const int UNKNOWN_IDENTIFIER;
-    extern const int UNKNOWN_TYPE;
-    extern const int UNKNOWN_STORAGE;
-    extern const int UNKNOWN_DATABASE;
-    extern const int UNKNOWN_SETTING;
-    extern const int UNKNOWN_DIRECTION_OF_SORTING;
-    extern const int UNKNOWN_AGGREGATE_FUNCTION;
-    extern const int UNKNOWN_FORMAT;
-    extern const int UNKNOWN_DATABASE_ENGINE;
-    extern const int UNKNOWN_TYPE_OF_QUERY;
-    extern const int UNKNOWN_ROLE;
    extern const int NO_ELEMENTS_IN_CONFIG;

-    extern const int QUERY_IS_TOO_LARGE;
-
-    extern const int NOT_IMPLEMENTED;
-    extern const int SOCKET_TIMEOUT;
-
-    extern const int UNKNOWN_USER;
-    extern const int WRONG_PASSWORD;
-    extern const int REQUIRED_PASSWORD;
-    extern const int AUTHENTICATION_FAILED;
-    extern const int SET_NON_GRANTED_ROLE;
-
    extern const int INVALID_SESSION_TIMEOUT;
    extern const int HTTP_LENGTH_REQUIRED;
-    extern const int SUPPORT_IS_DISABLED;
-
-    extern const int TIMEOUT_EXCEEDED;
 }

 namespace
@ -173,115 +103,6 @@ void processOptionsRequest(HTTPServerResponse & response, const Poco::Util::Laye
 }
 }

-static String base64Decode(const String & encoded)
-{
-    String decoded;
-    Poco::MemoryInputStream istr(encoded.data(), encoded.size());
-    Poco::Base64Decoder decoder(istr);
-    Poco::StreamCopier::copyToString(decoder, decoded);
-    return decoded;
-}
-
-static String base64Encode(const String & decoded)
-{
-    std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-    ostr.exceptions(std::ios::failbit);
-    Poco::Base64Encoder encoder(ostr);
-    encoder.rdbuf()->setLineLength(0);
-    encoder << decoded;
-    encoder.close();
-    return ostr.str();
-}
-
-static Poco::Net::HTTPResponse::HTTPStatus exceptionCodeToHTTPStatus(int exception_code)
-{
-    using namespace Poco::Net;
-
-    if (exception_code == ErrorCodes::REQUIRED_PASSWORD)
-    {
-        return HTTPResponse::HTTP_UNAUTHORIZED;
-    }
-    else if (exception_code == ErrorCodes::UNKNOWN_USER ||
-             exception_code == ErrorCodes::WRONG_PASSWORD ||
-             exception_code == ErrorCodes::AUTHENTICATION_FAILED ||
-             exception_code == ErrorCodes::SET_NON_GRANTED_ROLE)
-    {
-        return HTTPResponse::HTTP_FORBIDDEN;
-    }
-    else if (exception_code == ErrorCodes::BAD_ARGUMENTS ||
-             exception_code == ErrorCodes::CANNOT_COMPILE_REGEXP ||
-             exception_code == ErrorCodes::CANNOT_PARSE_TEXT ||
-             exception_code == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE ||
-             exception_code == ErrorCodes::CANNOT_PARSE_QUOTED_STRING ||
-             exception_code == ErrorCodes::CANNOT_PARSE_DATE ||
-             exception_code == ErrorCodes::CANNOT_PARSE_DATETIME ||
-             exception_code == ErrorCodes::CANNOT_PARSE_NUMBER ||
-             exception_code == ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING ||
-             exception_code == ErrorCodes::CANNOT_PARSE_IPV4 ||
-             exception_code == ErrorCodes::CANNOT_PARSE_IPV6 ||
-             exception_code == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED ||
-             exception_code == ErrorCodes::CANNOT_PARSE_UUID ||
-             exception_code == ErrorCodes::DUPLICATE_COLUMN ||
-             exception_code == ErrorCodes::ILLEGAL_COLUMN ||
-             exception_code == ErrorCodes::UNKNOWN_ELEMENT_IN_AST ||
-             exception_code == ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE ||
-             exception_code == ErrorCodes::THERE_IS_NO_COLUMN ||
-             exception_code == ErrorCodes::TOO_DEEP_AST ||
-             exception_code == ErrorCodes::TOO_BIG_AST ||
-             exception_code == ErrorCodes::UNEXPECTED_AST_STRUCTURE ||
-             exception_code == ErrorCodes::SYNTAX_ERROR ||
-             exception_code == ErrorCodes::INCORRECT_DATA ||
-             exception_code == ErrorCodes::TYPE_MISMATCH ||
-             exception_code == ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE)
-    {
-        return HTTPResponse::HTTP_BAD_REQUEST;
-    }
-    else if (exception_code == ErrorCodes::UNKNOWN_TABLE ||
-             exception_code == ErrorCodes::UNKNOWN_FUNCTION ||
-             exception_code == ErrorCodes::UNKNOWN_IDENTIFIER ||
-             exception_code == ErrorCodes::UNKNOWN_TYPE ||
-             exception_code == ErrorCodes::UNKNOWN_STORAGE ||
-             exception_code == ErrorCodes::UNKNOWN_DATABASE ||
-             exception_code == ErrorCodes::UNKNOWN_SETTING ||
-             exception_code == ErrorCodes::UNKNOWN_DIRECTION_OF_SORTING ||
-             exception_code == ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION ||
-             exception_code == ErrorCodes::UNKNOWN_FORMAT ||
-             exception_code == ErrorCodes::UNKNOWN_DATABASE_ENGINE ||
-             exception_code == ErrorCodes::UNKNOWN_TYPE_OF_QUERY ||
-             exception_code == ErrorCodes::UNKNOWN_ROLE)
-    {
-        return HTTPResponse::HTTP_NOT_FOUND;
-    }
-    else if (exception_code == ErrorCodes::QUERY_IS_TOO_LARGE)
-    {
-        return HTTPResponse::HTTP_REQUESTENTITYTOOLARGE;
-    }
-    else if (exception_code == ErrorCodes::NOT_IMPLEMENTED)
-    {
-        return HTTPResponse::HTTP_NOT_IMPLEMENTED;
-    }
-    else if (exception_code == ErrorCodes::SOCKET_TIMEOUT ||
-             exception_code == ErrorCodes::CANNOT_OPEN_FILE)
-    {
-        return HTTPResponse::HTTP_SERVICE_UNAVAILABLE;
-    }
-    else if (exception_code == ErrorCodes::HTTP_LENGTH_REQUIRED)
-    {
-        return HTTPResponse::HTTP_LENGTH_REQUIRED;
-    }
-    else if (exception_code == ErrorCodes::TIMEOUT_EXCEEDED)
-    {
-        return HTTPResponse::HTTP_REQUEST_TIMEOUT;
-    }
-    else if (exception_code == ErrorCodes::CANNOT_SCHEDULE_TASK)
-    {
-        return HTTPResponse::HTTP_SERVICE_UNAVAILABLE;
-    }
-
-    return HTTPResponse::HTTP_INTERNAL_SERVER_ERROR;
-}
-
-
 static std::chrono::steady_clock::duration parseSessionTimeout(
    const Poco::Util::AbstractConfiguration & config,
    const HTMLForm & params)
@ -358,204 +179,9 @@ HTTPHandler::HTTPHandler(IServer & server_, const std::string & name, const HTTP
 HTTPHandler::~HTTPHandler() = default;


-bool HTTPHandler::authenticateUser(
-    HTTPServerRequest & request,
-    HTMLForm & params,
-    HTTPServerResponse & response)
+bool HTTPHandler::authenticateUser(HTTPServerRequest & request, HTMLForm & params, HTTPServerResponse & response)
 {
-    using namespace Poco::Net;
-
-    /// The user and password can be passed by headers (similar to X-Auth-*),
-    /// which is used by load balancers to pass authentication information.
-    std::string user = request.get("X-ClickHouse-User", "");
-    std::string password = request.get("X-ClickHouse-Key", "");
-    std::string quota_key = request.get("X-ClickHouse-Quota", "");
-
-    /// The header 'X-ClickHouse-SSL-Certificate-Auth: on' enables checking the common name
-    /// extracted from the SSL certificate used for this connection instead of checking password.
-    bool has_ssl_certificate_auth = (request.get("X-ClickHouse-SSL-Certificate-Auth", "") == "on");
-    bool has_auth_headers = !user.empty() || !password.empty() || has_ssl_certificate_auth;
-
-    /// User name and password can be passed using HTTP Basic auth or query parameters
-    /// (both methods are insecure).
-    bool has_http_credentials = request.hasCredentials();
-    bool has_credentials_in_query_params = params.has("user") || params.has("password");
-
-    std::string spnego_challenge;
-    SSLCertificateSubjects certificate_subjects;
-
-    if (has_auth_headers)
-    {
-        /// It is prohibited to mix different authorization schemes.
-        if (has_http_credentials)
-            throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
-                            "Invalid authentication: it is not allowed "
-                            "to use SSL certificate authentication and Authorization HTTP header simultaneously");
-        if (has_credentials_in_query_params)
-            throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
-                            "Invalid authentication: it is not allowed "
-                            "to use SSL certificate authentication and authentication via parameters simultaneously simultaneously");
-
-        if (has_ssl_certificate_auth)
-        {
-#if USE_SSL
-            if (!password.empty())
-                throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
-                                "Invalid authentication: it is not allowed "
-                                "to use SSL certificate authentication and authentication via password simultaneously");
-
-            if (request.havePeerCertificate())
-                certificate_subjects = extractSSLCertificateSubjects(request.peerCertificate());
-
-            if (certificate_subjects.empty())
-                throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
-                                "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name or Subject Alternative Name");
-#else
-            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
-                            "SSL certificate authentication disabled because ClickHouse was built without SSL library");
-#endif
-        }
-    }
-    else if (has_http_credentials)
-    {
-        /// It is prohibited to mix different authorization schemes.
-        if (has_credentials_in_query_params)
-            throw Exception(ErrorCodes::AUTHENTICATION_FAILED,
-                            "Invalid authentication: it is not allowed "
-                            "to use Authorization HTTP header and authentication via parameters simultaneously");
-
-        std::string scheme;
-        std::string auth_info;
-        request.getCredentials(scheme, auth_info);
-
-        if (Poco::icompare(scheme, "Basic") == 0)
-        {
-            HTTPBasicCredentials credentials(auth_info);
-            user = credentials.getUsername();
-            password = credentials.getPassword();
-        }
-        else if (Poco::icompare(scheme, "Negotiate") == 0)
-        {
-            spnego_challenge = auth_info;
-
-            if (spnego_challenge.empty())
-                throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty");
-        }
-        else
-        {
-            throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme);
-        }
-    }
-    else
-    {
-        /// If the user name is not set we assume it's the 'default' user.
-        user = params.get("user", "default");
-        password = params.get("password", "");
-    }
-
-    if (!certificate_subjects.empty())
-    {
-        if (!request_credentials)
-            request_credentials = std::make_unique<SSLCertificateCredentials>(user, std::move(certificate_subjects));
-
-        auto * certificate_credentials = dynamic_cast<SSLCertificateCredentials *>(request_credentials.get());
-        if (!certificate_credentials)
-            throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme");
-    }
-    else if (!spnego_challenge.empty())
-    {
-        if (!request_credentials)
-            request_credentials = server.context()->makeGSSAcceptorContext();
-
-        auto * gss_acceptor_context = dynamic_cast<GSSAcceptorContext *>(request_credentials.get());
-        if (!gss_acceptor_context)
-            throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected");
-
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunreachable-code"
-        const auto spnego_response = base64Encode(gss_acceptor_context->processToken(base64Decode(spnego_challenge), log));
-#pragma clang diagnostic pop
-
-        if (!spnego_response.empty())
-            response.set("WWW-Authenticate", "Negotiate " + spnego_response);
-
-        if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady())
-        {
-            if (spnego_response.empty())
-                throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure");
-
-            response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
-            response.send();
-            return false;
-        }
-    }
-    else // I.e., now using user name and password strings ("Basic").
-    {
-        if (!request_credentials)
-            request_credentials = std::make_unique<BasicCredentials>();
-
-        auto * basic_credentials = dynamic_cast<BasicCredentials *>(request_credentials.get());
-        if (!basic_credentials)
-            throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme");
-
-        basic_credentials->setUserName(user);
-        basic_credentials->setPassword(password);
-    }
-
-    if (params.has("quota_key"))
-    {
-        if (!quota_key.empty())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                            "Invalid authentication: it is not allowed "
-                            "to use quota key as HTTP header and as parameter simultaneously");
-
-        quota_key = params.get("quota_key");
-    }
-
-    /// Set client info. It will be used for quota accounting parameters in 'setUser' method.
-
-    session->setHTTPClientInfo(request);
-    session->setQuotaClientKey(quota_key);
-
-    /// Extract the last entry from comma separated list of forwarded_for addresses.
-    /// Only the last proxy can be trusted (if any).
-    String forwarded_address = session->getClientInfo().getLastForwardedFor();
-    try
-    {
-        if (!forwarded_address.empty() && server.config().getBool("auth_use_forwarded_address", false))
-            session->authenticate(*request_credentials, Poco::Net::SocketAddress(forwarded_address, request.clientAddress().port()));
-        else
-            session->authenticate(*request_credentials, request.clientAddress());
-    }
-    catch (const Authentication::Require<BasicCredentials> & required_credentials)
-    {
-        request_credentials = std::make_unique<BasicCredentials>();
-
-        if (required_credentials.getRealm().empty())
-            response.set("WWW-Authenticate", "Basic");
-        else
-            response.set("WWW-Authenticate", "Basic realm=\"" + required_credentials.getRealm() + "\"");
-
-        response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
-        response.send();
-        return false;
-    }
-    catch (const Authentication::Require<GSSAcceptorContext> & required_credentials)
-    {
-        request_credentials = server.context()->makeGSSAcceptorContext();
-
-        if (required_credentials.getRealm().empty())
-            response.set("WWW-Authenticate", "Negotiate");
-        else
-            response.set("WWW-Authenticate", "Negotiate realm=\"" + required_credentials.getRealm() + "\"");
-
-        response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED);
-        response.send();
-        return false;
-    }
-
-    request_credentials.reset();
-    return true;
+    return authenticateUserByHTTP(request, params, response, *session, request_credentials, server.context(), log);
 }


@ -727,10 +353,22 @@ void HTTPHandler::processQuery(

    std::unique_ptr<ReadBuffer> in;

-    static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace", "role",
-        "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session"};
+    auto roles = params.getAll("role");
+    if (!roles.empty())
+        context->setCurrentRoles(roles);

-    Names reserved_param_suffixes;
+    std::string database = request.get("X-ClickHouse-Database", params.get("database", ""));
+    if (!database.empty())
+        context->setCurrentDatabase(database);
+
+    std::string default_format = request.get("X-ClickHouse-Format", params.get("default_format", ""));
+    if (!default_format.empty())
+        context->setDefaultFormat(default_format);
+
+    /// Anything else beside HTTP POST should be readonly queries.
+    setReadOnlyIfHTTPMethodIdempotent(context, request.getMethod());
+
+    bool has_external_data = startsWith(request.getContentType(), "multipart/form-data");

    auto param_could_be_skipped = [&] (const String & name)
    {
@ -738,87 +376,36 @@ void HTTPHandler::processQuery(
        if (name.empty())
            return true;

+        /// Some parameters (database, default_format, everything used in the code above) do not
+        /// belong to the Settings class.
+        static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace", "role",
+            "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version", "close_session",
+            "database", "default_format"};
+
        if (reserved_param_names.contains(name))
            return true;

-        for (const String & suffix : reserved_param_suffixes)
+        /// For external data we also want settings.
+        if (has_external_data)
        {
-            if (endsWith(name, suffix))
-                return true;
+            /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters.
+            /// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings.
+            static const Names reserved_param_suffixes = {"_format", "_types", "_structure"};
+            for (const String & suffix : reserved_param_suffixes)
+            {
+                if (endsWith(name, suffix))
+                    return true;
+            }
        }

        return false;
    };

-    auto roles = params.getAll("role");
-    if (!roles.empty())
-    {
-        const auto & access_control = context->getAccessControl();
-        const auto & user = context->getUser();
-        std::vector<UUID> roles_ids(roles.size());
-        for (size_t i = 0; i < roles.size(); i++)
-        {
-            auto role_id = access_control.getID<Role>(roles[i]);
-            if (user->granted_roles.isGranted(role_id))
-                roles_ids[i] = role_id;
-            else
-                throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role {} should be granted to set as a current", roles[i].get());
-        }
-        context->setCurrentRoles(roles_ids);
-    }
-
    /// Settings can be overridden in the query.
-    /// Some parameters (database, default_format, everything used in the code above) do not
-    /// belong to the Settings class.
-
-    /// 'readonly' setting values mean:
-    /// readonly = 0 - any query is allowed, client can change any setting.
-    /// readonly = 1 - only readonly queries are allowed, client can't change settings.
-    /// readonly = 2 - only readonly queries are allowed, client can change any setting except 'readonly'.
-
-    /// In theory if initially readonly = 0, the client can change any setting and then set readonly
-    /// to some other value.
-    const auto & settings = context->getSettingsRef();
-
-    /// Anything else beside HTTP POST should be readonly queries.
-    if (request.getMethod() != HTTPServerRequest::HTTP_POST)
-    {
-        if (settings.readonly == 0)
-            context->setSetting("readonly", 2);
-    }
-
-    bool has_external_data = startsWith(request.getContentType(), "multipart/form-data");
-
-    if (has_external_data)
-    {
-        /// Skip unneeded parameters to avoid confusing them later with context settings or query parameters.
-        reserved_param_suffixes.reserve(3);
-        /// It is a bug and ambiguity with `date_time_input_format` and `low_cardinality_allow_in_native_format` formats/settings.
-        reserved_param_suffixes.emplace_back("_format");
-        reserved_param_suffixes.emplace_back("_types");
-        reserved_param_suffixes.emplace_back("_structure");
-    }
-
-    std::string database = request.get("X-ClickHouse-Database", "");
-    std::string default_format = request.get("X-ClickHouse-Format", "");
-
    SettingsChanges settings_changes;
    for (const auto & [key, value] : params)
    {
-        if (key == "database")
-        {
-            if (database.empty())
-                database = value;
-        }
-        else if (key == "default_format")
-        {
-            if (default_format.empty())
-                default_format = value;
-        }
-        else if (param_could_be_skipped(key))
-        {
-        }
-        else
+        if (!param_could_be_skipped(key))
        {
            /// Other than query parameters are treated as settings.
            if (!customizeQueryParam(context, key, value))
@ -826,15 +413,9 @@ void HTTPHandler::processQuery(
        }
    }

-    if (!database.empty())
-        context->setCurrentDatabase(database);
-
-    if (!default_format.empty())
-        context->setDefaultFormat(default_format);
-
-    /// For external data we also want settings
    context->checkSettingsConstraints(settings_changes, SettingSource::QUERY);
    context->applySettingsChanges(settings_changes);
+    const auto & settings = context->getSettingsRef();

    /// Set the query id supplied by the user, if any, and also update the OpenTelemetry fields.
    context->setCurrentQueryId(params.get("query_id", request.get("X-ClickHouse-Query-Id", "")));
@ -936,7 +517,7 @@ void HTTPHandler::processQuery(
            {
                bool with_stacktrace = (params.getParsed<bool>("stacktrace", false) && server.config().getBool("enable_http_stacktrace", true));
                ExecutionStatus status = ExecutionStatus::fromCurrentException("", with_stacktrace);
-                formatExceptionForClient(status.code, request, response, used_output);
+                setHTTPResponseStatusAndHeadersForException(status.code, request, response, used_output.out_holder.get(), log);
                current_output_format.setException(status.message);
                current_output_format.finalize();
                used_output.exception_is_written = true;
@ -970,7 +551,7 @@ void HTTPHandler::trySendExceptionToClient(
    const std::string & s, int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output)
 try
 {
-    formatExceptionForClient(exception_code, request, response, used_output);
+    setHTTPResponseStatusAndHeadersForException(exception_code, request, response, used_output.out_holder.get(), log);

    if (!used_output.out_holder && !used_output.exception_is_written)
    {
@ -1032,38 +613,6 @@ catch (...)
    used_output.cancel();
 }

-void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest & request, HTTPServerResponse & response, Output & used_output)
-{
-    if (used_output.out_holder)
-        used_output.out_holder->setExceptionCode(exception_code);
-    else
-        response.set("X-ClickHouse-Exception-Code", toString<int>(exception_code));
-
-    /// FIXME: make sure that no one else is reading from the same stream at the moment.
-
-    /// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body
-    /// to avoid reading part of the current request body in the next request.
-    if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive()
-        && exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
-    {
-        try
-        {
-            if (!request.getStream().eof())
-                request.getStream().ignoreAll();
-        }
-        catch (...)
-        {
-            tryLogCurrentException(log, "Cannot read remaining request body during exception handling");
-            response.setKeepAlive(false);
-        }
-    }
-
-    if (exception_code == ErrorCodes::REQUIRED_PASSWORD)
-        response.requireAuthentication("ClickHouse server HTTP API");
-    else
-        response.setStatusAndReason(exceptionCodeToHTTPStatus(exception_code));
-}
-
 void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event)
 {
    setThreadName("HTTPHandler");
--- a/src/Server/HTTPHandler.h
+++ b/src/Server/HTTPHandler.h
@ -173,12 +173,6 @@ private:
        HTTPServerResponse & response,
        Output & used_output);

-    void formatExceptionForClient(
-        int exception_code,
-        HTTPServerRequest & request,
-        HTTPServerResponse & response,
-        Output & used_output);
-
    static void pushDelayedResults(Output & used_output);
 };

--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -7077,6 +7077,20 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage(
    /// with new analyzer, Planner make decision regarding parallel replicas usage, and so about processing stage on reading
    if (!query_context->getSettingsRef().allow_experimental_analyzer)
    {
+        const auto & settings = query_context->getSettingsRef();
+        if (query_context->canUseParallelReplicasCustomKey())
+        {
+            if (query_context->getClientInfo().distributed_depth > 0)
+                return QueryProcessingStage::FetchColumns;
+
+            if (!supportsReplication() && !settings.parallel_replicas_for_non_replicated_merge_tree)
+                return QueryProcessingStage::Enum::FetchColumns;
+
+            if (to_stage >= QueryProcessingStage::WithMergeableState
+                && query_context->canUseParallelReplicasCustomKeyForCluster(*query_context->getClusterForParallelReplicas()))
+                return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
+        }
+
        if (query_context->getClientInfo().collaborate_with_initiator)
            return QueryProcessingStage::Enum::FetchColumns;

@ -7088,7 +7102,7 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage(
                return QueryProcessingStage::Enum::WithMergeableState;

            /// For non-replicated MergeTree we allow them only if parallel_replicas_for_non_replicated_merge_tree is enabled
-            if (query_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree)
+            if (settings.parallel_replicas_for_non_replicated_merge_tree)
                return QueryProcessingStage::Enum::WithMergeableState;
        }
    }
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@ -93,6 +93,7 @@ struct MergedBlockOutputStream::Finalizer::Impl
 void MergedBlockOutputStream::Finalizer::finish()
 {
    std::unique_ptr<Impl> to_finish = std::move(impl);
+    impl.reset();
    if (to_finish)
        to_finish->finish();
 }
@ -130,7 +131,19 @@ MergedBlockOutputStream::Finalizer::Finalizer(Finalizer &&) noexcept = default;
 MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operator=(Finalizer &&) noexcept = default;
 MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr<Impl> impl_) : impl(std::move(impl_)) {}

-MergedBlockOutputStream::Finalizer::~Finalizer() = default;
+MergedBlockOutputStream::Finalizer::~Finalizer()
+{
+    try
+    {
+        if (impl)
+            finish();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+    }
+}
+

 void MergedBlockOutputStream::finalizePart(
    const MergeTreeMutableDataPartPtr & new_part,
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@ -426,7 +426,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(

    query_info.cluster = cluster;

-    if (!local_context->canUseParallelReplicasCustomKey(*cluster))
+    if (!local_context->canUseParallelReplicasCustomKeyForCluster(*cluster))
    {
        if (nodes > 1 && settings.optimize_skip_unused_shards)
        {
@ -839,7 +839,9 @@ void StorageDistributed::read(

    SelectQueryInfo modified_query_info = query_info;

-    if (local_context->getSettingsRef().allow_experimental_analyzer)
+    const auto & settings = local_context->getSettingsRef();
+
+    if (settings.allow_experimental_analyzer)
    {
        StorageID remote_storage_id = StorageID::createEmpty();
        if (!remote_table_function_ptr)
@ -864,7 +866,7 @@ void StorageDistributed::read(
        header = InterpreterSelectQuery(modified_query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
    }

-    if (!local_context->getSettingsRef().allow_experimental_analyzer)
+    if (!settings.allow_experimental_analyzer)
    {
        modified_query_info.query = ClusterProxy::rewriteSelectQuery(
            local_context, modified_query_info.query,
@ -874,7 +876,7 @@ void StorageDistributed::read(
    /// Return directly (with correct header) if no shard to query.
    if (modified_query_info.getCluster()->getShardsInfo().empty())
    {
-        if (local_context->getSettingsRef().allow_experimental_analyzer)
+        if (settings.allow_experimental_analyzer)
            return;

        Pipe pipe(std::make_shared<NullSource>(header));
@ -893,27 +895,8 @@ void StorageDistributed::read(
            storage_snapshot,
            processed_stage);

-    const auto & settings = local_context->getSettingsRef();
-
-    ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator;
-    if (local_context->canUseParallelReplicasCustomKey(*modified_query_info.getCluster()))
-    {
-        if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *local_context))
-        {
-            additional_shard_filter_generator =
-                [my_custom_key_ast = std::move(custom_key_ast),
-                 column_description = this->getInMemoryMetadataPtr()->columns,
-                 custom_key_type = settings.parallel_replicas_custom_key_filter_type.value,
-                 custom_key_range_lower = settings.parallel_replicas_custom_key_range_lower.value,
-                 custom_key_range_upper = settings.parallel_replicas_custom_key_range_upper.value,
-                 context = local_context,
-                 replica_count = modified_query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr
-            {
-                return getCustomKeyFilterForParallelReplica(
-                    replica_count, replica_num - 1, my_custom_key_ast, {custom_key_type, custom_key_range_lower, custom_key_range_upper}, column_description, context);
-            };
-        }
-    }
+    auto shard_filter_generator = ClusterProxy::getShardFilterGeneratorForCustomKey(
+        *modified_query_info.getCluster(), local_context, getInMemoryMetadataPtr()->columns);

    ClusterProxy::executeQuery(
        query_plan,
@ -928,7 +911,7 @@ void StorageDistributed::read(
        sharding_key_expr,
        sharding_key_column_name,
        distributed_settings,
-        additional_shard_filter_generator,
+        shard_filter_generator,
        /* is_remote_function= */ static_cast<bool>(owned_cluster));

    /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier.
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@ -1,51 +1,51 @@
-#include "StorageMergeTree.h"
-#include "Core/QueryProcessingStage.h"
-#include "Storages/MergeTree/IMergeTreeDataPart.h"
+#include <Storages/StorageMergeTree.h>

 #include <optional>
 #include <ranges>

-#include <Poco/Timestamp.h>
-#include <base/sort.h>
 #include <Backups/BackupEntriesCollector.h>
+#include <Core/QueryProcessingStage.h>
 #include <Databases/IDatabase.h>
-#include "Common/Exception.h"
-#include <Common/MemoryTracker.h>
-#include <Common/escapeForFileName.h>
-#include <Common/ProfileEventsScope.h>
-#include <Common/typeid_cast.h>
-#include <Common/ThreadPool.h>
-#include <Interpreters/PartLog.h>
-#include <Interpreters/MutationsInterpreter.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/TransactionLog.h>
-#include <Interpreters/ClusterProxy/executeQuery.h>
-#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
-#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
 #include <IO/copyData.h>
+#include <Interpreters/ClusterProxy/SelectStreamFactory.h>
+#include <Interpreters/ClusterProxy/executeQuery.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
+#include <Interpreters/MutationsInterpreter.h>
+#include <Interpreters/PartLog.h>
+#include <Interpreters/TransactionLog.h>
 #include <Parsers/ASTCheckQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTPartition.h>
 #include <Parsers/ASTSetQuery.h>
-#include <Parsers/queryToString.h>
 #include <Parsers/formatAST.h>
+#include <Parsers/queryToString.h>
 #include <Planner/Utils.h>
-#include <Storages/buildQueryTreeForShard.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/MergeTree/ActiveDataPartSet.h>
-#include <Storages/AlterCommands.h>
-#include <Storages/PartitionCommands.h>
-#include <Storages/MergeTree/MergeTreeSink.h>
-#include <Storages/MergeTree/MergePlainMergeTreeTask.h>
-#include <Storages/MergeTree/PartitionPruner.h>
-#include <Storages/MergeTree/MergeList.h>
-#include <Storages/MergeTree/checkDataPart.h>
-#include <QueryPipeline/Pipe.h>
-#include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <QueryPipeline/Pipe.h>
+#include <Storages/AlterCommands.h>
+#include <Storages/MergeTree/ActiveDataPartSet.h>
+#include <Storages/MergeTree/IMergeTreeDataPart.h>
+#include <Storages/MergeTree/MergeList.h>
+#include <Storages/MergeTree/MergePlainMergeTreeTask.h>
+#include <Storages/MergeTree/MergeTreeData.h>
+#include <Storages/MergeTree/MergeTreeSink.h>
+#include <Storages/MergeTree/PartitionPruner.h>
+#include <Storages/MergeTree/checkDataPart.h>
+#include <Storages/PartitionCommands.h>
+#include <Storages/buildQueryTreeForShard.h>
+#include <base/sort.h>
 #include <fmt/core.h>
+#include <Poco/Timestamp.h>
+#include <Common/Exception.h>
+#include <Common/MemoryTracker.h>
+#include <Common/ProfileEventsScope.h>
+#include <Common/ThreadPool.h>
+#include <Common/escapeForFileName.h>
+#include <Common/typeid_cast.h>


 namespace DB
@ -220,24 +220,50 @@ void StorageMergeTree::read(
    {
        ClusterProxy::executeQueryWithParallelReplicas(
            query_plan, getStorageID(), processed_stage, query_info.query, local_context, query_info.storage_limits);
+        return;
    }
-    else
-    {
-        const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower()
-            && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree
-            && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower);

-        if (auto plan = reader.read(
-                column_names,
+    if (local_context->canUseParallelReplicasCustomKey() && settings.parallel_replicas_for_non_replicated_merge_tree
+        && !settings.allow_experimental_analyzer && local_context->getClientInfo().distributed_depth == 0)
+    {
+        if (auto cluster = local_context->getClusterForParallelReplicas();
+            local_context->canUseParallelReplicasCustomKeyForCluster(*cluster))
+        {
+            auto modified_query_info = query_info;
+            modified_query_info.cluster = std::move(cluster);
+            ClusterProxy::executeQueryWithParallelReplicasCustomKey(
+                query_plan,
+                getStorageID(),
+                std::move(modified_query_info),
+                getInMemoryMetadataPtr()->getColumns(),
                storage_snapshot,
-                query_info,
-                local_context,
-                max_block_size,
-                num_streams,
-                nullptr,
-                enable_parallel_reading))
-            query_plan = std::move(*plan);
+                processed_stage,
+                query_info.query,
+                local_context);
+            return;
+        }
+        else
+            LOG_WARNING(
+                log,
+                "Parallel replicas with custom key will not be used because cluster defined by 'cluster_for_parallel_replicas' ('{}') has "
+                "multiple shards",
+                cluster->getName());
    }
+
+    const bool enable_parallel_reading = local_context->canUseParallelReplicasOnFollower()
+        && local_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree
+        && (!local_context->getSettingsRef().allow_experimental_analyzer || query_info.analyzer_can_use_parallel_replicas_on_follower);
+
+    if (auto plan = reader.read(
+            column_names,
+            storage_snapshot,
+            query_info,
+            local_context,
+            max_block_size,
+            num_streams,
+            nullptr,
+            enable_parallel_reading))
+        query_plan = std::move(*plan);
 }

 std::optional<UInt64> StorageMergeTree::totalRows(const Settings &) const
@ -1551,6 +1577,12 @@ bool StorageMergeTree::optimize(
 {
    assertNotReadonly();

+    if (deduplicate && getInMemoryMetadataPtr()->hasProjections())
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                    "OPTIMIZE DEDUPLICATE query is not supported for table {} as it has projections. "
+                    "User should drop all the projections manually before running the query",
+                    getStorageID().getTableName());
+
    if (deduplicate)
    {
        if (deduplicate_by_columns.empty())
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -5460,13 +5460,45 @@ void StorageReplicatedMergeTree::read(
    /// 2. Do not read parts that have not yet been written to the quorum of the replicas.
    /// For this you have to synchronously go to ZooKeeper.
    if (settings.select_sequential_consistency)
+    {
        readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams);
+        return;
+    }
    /// reading step for parallel replicas with new analyzer is built in Planner, so don't do it here
-    else if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer)
+    if (local_context->canUseParallelReplicasOnInitiator() && !settings.allow_experimental_analyzer)
+    {
        readParallelReplicasImpl(query_plan, column_names, query_info, local_context, processed_stage);
-    else
-        readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams);
-}
+        return;
+    }
+
+    if (local_context->canUseParallelReplicasCustomKey() && !settings.allow_experimental_analyzer
+        && local_context->getClientInfo().distributed_depth == 0)
+    {
+        if (auto cluster = local_context->getClusterForParallelReplicas();
+            local_context->canUseParallelReplicasCustomKeyForCluster(*cluster))
+        {
+            auto modified_query_info = query_info;
+            modified_query_info.cluster = std::move(cluster);
+            ClusterProxy::executeQueryWithParallelReplicasCustomKey(
+                query_plan,
+                getStorageID(),
+                std::move(modified_query_info),
+                getInMemoryMetadataPtr()->getColumns(),
+                storage_snapshot,
+                processed_stage,
+                query_info.query,
+                local_context);
+            return;
+        }
+        else
+            LOG_WARNING(
+                log,
+                "Parallel replicas with custom key will not be used because cluster defined by 'cluster_for_parallel_replicas' ('{}') has "
+                "multiple shards",
+                cluster->getName());
+    }
+
+    readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, max_block_size, num_streams); }

 void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl(
    QueryPlan & query_plan,
@ -5746,6 +5778,12 @@ bool StorageReplicatedMergeTree::optimize(
    if (!is_leader)
        throw Exception(ErrorCodes::NOT_A_LEADER, "OPTIMIZE cannot be done on this replica because it is not a leader");

+    if (deduplicate && getInMemoryMetadataPtr()->hasProjections())
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                    "OPTIMIZE DEDUPLICATE query is not supported for table {} as it has projections. "
+                    "User should drop all the projections manually before running the query",
+                    getStorageID().getTableName());
+
    if (cleanup)
    {
        if (!getSettings()->allow_experimental_replacing_merge_with_cleanup)
--- a/tests/ci/download_release_packages.py
+++ b/tests/ci/download_release_packages.py
@ -13,26 +13,28 @@ from get_previous_release_tag import (
 PACKAGES_DIR = Path("previous_release_package_folder")


-def download_packages(release: ReleaseInfo, dest_path: Path = PACKAGES_DIR) -> None:
+def download_packages(
+    release: ReleaseInfo, dest_path: Path = PACKAGES_DIR, debug: bool = False
+) -> None:
    dest_path.mkdir(parents=True, exist_ok=True)

    logging.info("Will download %s", release)

    for pkg, url in release.assets.items():
-        if not pkg.endswith("_amd64.deb") or "-dbg_" in pkg:
+        if not pkg.endswith("_amd64.deb") or (not debug and "-dbg_" in pkg):
            continue
        pkg_name = dest_path / pkg
        download_build_with_progress(url, pkg_name)


-def download_last_release(dest_path: Path) -> None:
+def download_last_release(dest_path: Path, debug: bool = False) -> None:
    current_release = get_previous_release(None)
    if current_release is None:
        raise DownloadException("The current release is not found")
-    download_packages(current_release, dest_path=dest_path)
+    download_packages(current_release, dest_path=dest_path, debug=debug)


 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    release = get_release_by_tag(input())
-    download_packages(release)
+    download_packages(release, debug=True)
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@ -253,7 +253,7 @@ def main():
    packages_path.mkdir(parents=True, exist_ok=True)

    if validate_bugfix_check:
-        download_last_release(packages_path)
+        download_last_release(packages_path, debug=True)
    else:
        download_all_deb_packages(check_name, reports_path, packages_path)

--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@ -185,7 +185,7 @@ def main():
    build_path.mkdir(parents=True, exist_ok=True)

    if validate_bugfix_check:
-        download_last_release(build_path)
+        download_last_release(build_path, debug=True)
    else:
        download_all_deb_packages(check_name, reports_path, build_path)

--- a/tests/integration/test_parallel_replicas_custom_key/test.py
+++ b/tests/integration/test_parallel_replicas_custom_key/test.py
@ -5,7 +5,10 @@ cluster = ClickHouseCluster(__file__)

 nodes = [
    cluster.add_instance(
-        f"n{i}", main_configs=["configs/remote_servers.xml"], with_zookeeper=True
+        f"n{i}",
+        main_configs=["configs/remote_servers.xml"],
+        with_zookeeper=True,
+        macros={"replica": f"r{i}"},
    )
    for i in range(1, 5)
 ]
@ -20,34 +23,17 @@ def start_cluster():
        cluster.shutdown()


-def create_tables(cluster):
-    n1 = nodes[0]
-    n1.query("DROP TABLE IF EXISTS dist_table")
-    n1.query(f"DROP TABLE IF EXISTS test_table ON CLUSTER {cluster}")
-
-    n1.query(
-        f"CREATE TABLE test_table ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))"
-    )
-    n1.query(
-        f"""
-            CREATE TABLE dist_table AS test_table
-            Engine=Distributed(
-                {cluster},
-                currentDatabase(),
-                test_table,
-                rand()
-            )
-            """
+def insert_data(table_name, row_num, all_nodes=False):
+    query = (
+        f"INSERT INTO {table_name} SELECT number % 4, number FROM numbers({row_num})"
    )

-
-def insert_data(cluster, row_num):
-    create_tables(cluster)
-    n1 = nodes[0]
-    n1.query(
-        f"INSERT INTO dist_table SELECT number % 4, number FROM numbers({row_num})"
-    )
-    n1.query("SYSTEM FLUSH DISTRIBUTED dist_table")
+    if all_nodes:
+        for n in nodes:
+            n.query(query)
+    else:
+        n1 = nodes[0]
+        n1.query(query)


@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"])
@ -56,12 +42,36 @@ def insert_data(cluster, row_num):
    "cluster",
    ["test_multiple_shards_multiple_replicas", "test_single_shard_multiple_replicas"],
 )
-def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter_type):
+def test_parallel_replicas_custom_key_distributed(
+    start_cluster, cluster, custom_key, filter_type
+):
    for node in nodes:
        node.rotate_logs()

    row_num = 1000
-    insert_data(cluster, row_num)
+
+    n1 = nodes[0]
+    n1.query(f"DROP TABLE IF EXISTS dist_table ON CLUSTER {cluster} SYNC")
+    n1.query(f"DROP TABLE IF EXISTS test_table_for_dist ON CLUSTER {cluster} SYNC")
+    n1.query(
+        f"CREATE TABLE test_table_for_dist ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))"
+    )
+
+    n1.query(
+        f"""
+            CREATE TABLE dist_table AS test_table_for_dist
+            Engine=Distributed(
+                {cluster},
+                currentDatabase(),
+                test_table_for_dist,
+                rand()
+            )
+            """
+    )
+
+    insert_data("dist_table", row_num)
+
+    n1.query("SYSTEM FLUSH DISTRIBUTED dist_table")

    expected_result = ""
    for i in range(4):
@ -72,10 +82,10 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter
        n1.query(
            "SELECT key, count() FROM dist_table GROUP BY key ORDER BY key",
            settings={
-                "prefer_localhost_replica": 0,
                "max_parallel_replicas": 4,
                "parallel_replicas_custom_key": custom_key,
                "parallel_replicas_custom_key_filter_type": filter_type,
+                "prefer_localhost_replica": 0,
            },
        )
        == expected_result
@ -87,3 +97,84 @@ def test_parallel_replicas_custom_key(start_cluster, cluster, custom_key, filter
            node.contains_in_log("Processing query on a replica using custom_key")
            for node in nodes
        )
+
+
+@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"])
+@pytest.mark.parametrize("filter_type", ["default", "range"])
+@pytest.mark.parametrize(
+    "cluster",
+    ["test_single_shard_multiple_replicas"],
+)
+def test_parallel_replicas_custom_key_mergetree(
+    start_cluster, cluster, custom_key, filter_type
+):
+    for node in nodes:
+        node.rotate_logs()
+
+    row_num = 1000
+    n1 = nodes[0]
+    n1.query(f"DROP TABLE IF EXISTS test_table_for_mt ON CLUSTER {cluster} SYNC")
+    n1.query(
+        f"CREATE TABLE test_table_for_mt ON CLUSTER {cluster} (key UInt32, value String) Engine=MergeTree ORDER BY (key, sipHash64(value))"
+    )
+
+    insert_data("test_table_for_mt", row_num, all_nodes=True)
+
+    expected_result = ""
+    for i in range(4):
+        expected_result += f"{i}\t250\n"
+
+    n1 = nodes[0]
+    assert (
+        n1.query(
+            "SELECT key, count() FROM test_table_for_mt GROUP BY key ORDER BY key",
+            settings={
+                "max_parallel_replicas": 4,
+                "parallel_replicas_custom_key": custom_key,
+                "parallel_replicas_custom_key_filter_type": filter_type,
+                "parallel_replicas_for_non_replicated_merge_tree": 1,
+                "cluster_for_parallel_replicas": cluster,
+            },
+        )
+        == expected_result
+    )
+
+
+@pytest.mark.parametrize("custom_key", ["sipHash64(key)", "key"])
+@pytest.mark.parametrize("filter_type", ["default", "range"])
+@pytest.mark.parametrize(
+    "cluster",
+    ["test_single_shard_multiple_replicas"],
+)
+def test_parallel_replicas_custom_key_replicatedmergetree(
+    start_cluster, cluster, custom_key, filter_type
+):
+    for node in nodes:
+        node.rotate_logs()
+
+    row_num = 1000
+    n1 = nodes[0]
+    n1.query(f"DROP TABLE IF EXISTS test_table_for_rmt ON CLUSTER {cluster} SYNC")
+    n1.query(
+        f"CREATE TABLE test_table_for_rmt ON CLUSTER {cluster} (key UInt32, value String) Engine=ReplicatedMergeTree('/clickhouse/tables', '{{replica}}') ORDER BY (key, sipHash64(value))"
+    )
+
+    insert_data("test_table_for_rmt", row_num, all_nodes=False)
+
+    expected_result = ""
+    for i in range(4):
+        expected_result += f"{i}\t250\n"
+
+    n1 = nodes[0]
+    assert (
+        n1.query(
+            "SELECT key, count() FROM test_table_for_rmt GROUP BY key ORDER BY key",
+            settings={
+                "max_parallel_replicas": 4,
+                "parallel_replicas_custom_key": custom_key,
+                "parallel_replicas_custom_key_filter_type": filter_type,
+                "cluster_for_parallel_replicas": cluster,
+            },
+        )
+        == expected_result
+    )
--- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py
+++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py
@ -76,11 +76,11 @@ def test_parallel_replicas_custom_key_failover(
            f"SELECT key, count() FROM cluster('{cluster_name}', currentDatabase(), test_table) GROUP BY key ORDER BY key",
            settings={
                "log_comment": log_comment,
-                "prefer_localhost_replica": prefer_localhost_replica,
                "max_parallel_replicas": 4,
                "parallel_replicas_custom_key": custom_key,
                "parallel_replicas_custom_key_filter_type": filter_type,
                "use_hedged_requests": use_hedged_requests,
+                "prefer_localhost_replica": prefer_localhost_replica,
                # avoid considering replica delay on connection choice
                # otherwise connection can be not distributed evenly among available nodes
                # and so custom key secondary queries (we check it bellow)
@ -100,20 +100,19 @@ def test_parallel_replicas_custom_key_failover(
    assert query_id != ""
    query_id = query_id[:-1]

-    if prefer_localhost_replica == 0:
+    assert (
+        node1.query(
+            f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1"
+        )
+        == "subqueries\t4\n"
+    )
+
+    # With enabled hedged requests, we can't guarantee exact query distribution among nodes
+    # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice
+    if use_hedged_requests == 0:
        assert (
            node1.query(
-                f"SELECT 'subqueries', count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' AND query_id != initial_query_id SETTINGS skip_unavailable_shards=1"
+                f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1"
            )
-            == "subqueries\t4\n"
+            == "n1\t3\nn3\t2\n"
        )
-
-        # With enabled hedged requests, we can't guarantee exact query distribution among nodes
-        # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice
-        if use_hedged_requests == 0:
-            assert (
-                node1.query(
-                    f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1"
-                )
-                == "n1\t3\nn3\t2\n"
-            )
--- a/tests/performance/replaceRegexp_fallback.xml
+++ b/tests/performance/replaceRegexp_fallback.xml
@ -0,0 +1,12 @@
+<!-- Tests functions replaceRegexpAll and replaceRegexpOne with trivial patterns. These trigger internally a fallback to simple string replacement -->>
+<!-- _materialize_ because the shortcut is only implemented for non-const haystack + const needle + const replacement strings -->>
+<test>
+    <!-- trivial pattern -->>
+    <query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null</query>
+    <query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null</query>
+
+    <!-- non-trivial patterns -->>
+    <!-- deliberately testing with fewer rows to keep runtimes reasonable -->>
+    <query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null</query>
+    <query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null</query>
+</test>
--- a/tests/queries/0_stateless/00027_argMinMax.sql
+++ b/tests/queries/0_stateless/00027_argMinMax.sql
@ -13,4 +13,4 @@ FROM
    SELECT
        arrayJoin([[10, 4, 3], [7, 5, 6], [8, 8, 2]]) AS num,
        arrayJoin([[1, 2, 4]]) AS id
-)
+);
--- a/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql
+++ b/tests/queries/0_stateless/00504_mergetree_arrays_rw.sql
@ -1,5 +1,8 @@

 set allow_deprecated_syntax_for_merge_tree=1;
+set max_threads = 1;
+set max_insert_threads = 1;
+
 drop table if exists test_ins_arr;
 create table test_ins_arr (date Date, val Array(UInt64)) engine = MergeTree(date, (date), 8192);
 insert into test_ins_arr select toDate('2017-10-02'), [number, 42] from system.numbers limit 10000;
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.reference
@ -1,173 +0,0 @@
-query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)' with custom_key='sipHash64(x)'
-filter_type='default' max_replicas=1 prefer_localhost_replica=0
-Hello
-filter_type='default' max_replicas=2 prefer_localhost_replica=0
-Hello
-filter_type='default' max_replicas=3 prefer_localhost_replica=0
-Hello
-filter_type='range' max_replicas=1 prefer_localhost_replica=0
-Hello
-filter_type='range' max_replicas=2 prefer_localhost_replica=0
-Hello
-filter_type='range' max_replicas=3 prefer_localhost_replica=0
-Hello
-filter_type='default' max_replicas=1 prefer_localhost_replica=1
-Hello
-filter_type='default' max_replicas=2 prefer_localhost_replica=1
-Hello
-filter_type='default' max_replicas=3 prefer_localhost_replica=1
-Hello
-filter_type='range' max_replicas=1 prefer_localhost_replica=1
-Hello
-filter_type='range' max_replicas=2 prefer_localhost_replica=1
-Hello
-filter_type='range' max_replicas=3 prefer_localhost_replica=1
-Hello
-query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='y'
-filter_type='default' max_replicas=1 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=2 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=3 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=1 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=2 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=3 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=1 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=2 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=3 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=1 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=2 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=3 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)'
-filter_type='default' max_replicas=1 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=2 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=3 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=1 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=2 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=3 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=1 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=2 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=3 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=1 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=2 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=3 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1'
-filter_type='default' max_replicas=1 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=2 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=3 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=1 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=2 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=3 prefer_localhost_replica=0
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=1 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=2 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='default' max_replicas=3 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=1 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=2 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-filter_type='range' max_replicas=3 prefer_localhost_replica=1
-0	334
-1	333
-2	333
-1
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh
@ -1,46 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-parallel, long
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-function run_with_custom_key {
-    echo "query='$1' with custom_key='$2'"
-    for prefer_localhost_replica in 0 1; do
-        for filter_type in 'default' 'range'; do
-            for max_replicas in {1..3}; do
-                echo "filter_type='$filter_type' max_replicas=$max_replicas prefer_localhost_replica=$prefer_localhost_replica"
-                query="$1 SETTINGS max_parallel_replicas=$max_replicas\
-    , parallel_replicas_custom_key='$2'\
-    , parallel_replicas_custom_key_filter_type='$filter_type'\
-    , prefer_localhost_replica=$prefer_localhost_replica"
-                $CLICKHOUSE_CLIENT --query="$query"
-            done
-        done
-    done
-}
-
-$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key";
-
-$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String) ENGINE = MergeTree ORDER BY x";
-$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key VALUES ('Hello')";
-
-run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key)" "sipHash64(x)"
-
-$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key"
-
-$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)"
-$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key SELECT toString(number), number % 3 FROM numbers(1000)"
-
-function run_count_with_custom_key {
-    run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) GROUP BY y ORDER BY y" "$1"
-}
-
-run_count_with_custom_key "y"
-run_count_with_custom_key "cityHash64(y)"
-run_count_with_custom_key "cityHash64(y) + 1"
-
-$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with"
-
-$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key"
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.reference
@ -0,0 +1,177 @@
+query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt)' with custom_key='sipHash64(x)'
+filter_type='default' max_replicas=1
+Hello
+filter_type='default' max_replicas=2
+Hello
+filter_type='default' max_replicas=3
+Hello
+filter_type='range' max_replicas=1
+Hello
+filter_type='range' max_replicas=2
+Hello
+filter_type='range' max_replicas=3
+Hello
+query='SELECT * FROM 02535_custom_key_mt' with custom_key='sipHash64(x)'
+filter_type='default' max_replicas=1
+Hello
+filter_type='default' max_replicas=2
+Hello
+filter_type='default' max_replicas=3
+Hello
+filter_type='range' max_replicas=1
+Hello
+filter_type='range' max_replicas=2
+Hello
+filter_type='range' max_replicas=3
+Hello
+query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='y'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='y'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='cityHash64(y)'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+1
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_mt.sh
@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, long
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+function run_with_custom_key {
+    echo "query='$1' with custom_key='$2'"
+    for filter_type in 'default' 'range'; do
+        for max_replicas in {1..3}; do
+            echo "filter_type='$filter_type' max_replicas=$max_replicas"
+            query="$1 SETTINGS max_parallel_replicas=$max_replicas\
+, parallel_replicas_custom_key='$2'\
+, parallel_replicas_custom_key_filter_type='$filter_type'\
+, parallel_replicas_for_non_replicated_merge_tree=1 \
+, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'"
+            $CLICKHOUSE_CLIENT --query="$query"
+        done
+    done
+}
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_mt";
+
+$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_mt (x String) ENGINE = MergeTree ORDER BY x";
+$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_mt VALUES ('Hello')";
+
+run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt)" "sipHash64(x)"
+run_with_custom_key "SELECT * FROM 02535_custom_key_mt" "sipHash64(x)"
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_mt"
+
+$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_mt (x String, y UInt32) ENGINE = MergeTree ORDER BY cityHash64(x)"
+$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_mt SELECT toString(number), number % 3 FROM numbers(1000)"
+
+function run_count_with_custom_key_distributed {
+    run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) GROUP BY y ORDER BY y" "$1"
+}
+
+run_count_with_custom_key_distributed "y"
+run_count_with_custom_key_distributed "cityHash64(y)"
+run_count_with_custom_key_distributed "cityHash64(y) + 1"
+
+function run_count_with_custom_key_merge_tree {
+    run_with_custom_key "SELECT y, count() FROM 02535_custom_key_mt GROUP BY y ORDER BY y" "$1"
+}
+
+run_count_with_custom_key_merge_tree "y"
+run_count_with_custom_key_merge_tree "cityHash64(y)"
+run_count_with_custom_key_merge_tree "cityHash64(y) + 1"
+
+$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_mt) as t1 JOIN 02535_custom_key_mt USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with"
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_mt"
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.reference
@ -0,0 +1,177 @@
+query='SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt)' with custom_key='sipHash64(x)'
+filter_type='default' max_replicas=1
+Hello
+filter_type='default' max_replicas=2
+Hello
+filter_type='default' max_replicas=3
+Hello
+filter_type='range' max_replicas=1
+Hello
+filter_type='range' max_replicas=2
+Hello
+filter_type='range' max_replicas=3
+Hello
+query='SELECT * FROM 02535_custom_key_rmt' with custom_key='sipHash64(x)'
+filter_type='default' max_replicas=1
+Hello
+filter_type='default' max_replicas=2
+Hello
+filter_type='default' max_replicas=3
+Hello
+filter_type='range' max_replicas=1
+Hello
+filter_type='range' max_replicas=2
+Hello
+filter_type='range' max_replicas=3
+Hello
+query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='y'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='cityHash64(y)'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='y'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='cityHash64(y)'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+query='SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y' with custom_key='cityHash64(y) + 1'
+filter_type='default' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='default' max_replicas=3
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=1
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=2
+0	334
+1	333
+2	333
+filter_type='range' max_replicas=3
+0	334
+1	333
+2	333
+1
--- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh
+++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key_rmt.sh
@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, long
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+function run_with_custom_key {
+    echo "query='$1' with custom_key='$2'"
+    for filter_type in 'default' 'range'; do
+        for max_replicas in {1..3}; do
+            echo "filter_type='$filter_type' max_replicas=$max_replicas"
+            query="$1 SETTINGS max_parallel_replicas=$max_replicas\
+, parallel_replicas_custom_key='$2'\
+, parallel_replicas_custom_key_filter_type='$filter_type'\
+, parallel_replicas_for_non_replicated_merge_tree=1 \
+, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'"
+            $CLICKHOUSE_CLIENT --query="$query"
+        done
+    done
+}
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_rmt";
+
+$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_rmt (x String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_02535', 'r1') ORDER BY x";
+$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_rmt VALUES ('Hello')";
+
+run_with_custom_key "SELECT * FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt)" "sipHash64(x)"
+run_with_custom_key "SELECT * FROM 02535_custom_key_rmt" "sipHash64(x)"
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS 02535_custom_key_rmt_hash";
+
+$CLICKHOUSE_CLIENT --query="CREATE TABLE 02535_custom_key_rmt_hash (x String, y UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/test_02535_hash', 'r1') ORDER BY cityHash64(x)"
+$CLICKHOUSE_CLIENT --query="INSERT INTO 02535_custom_key_rmt_hash SELECT toString(number), number % 3 FROM numbers(1000)"
+
+function run_count_with_custom_key {
+    run_with_custom_key "SELECT y, count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) GROUP BY y ORDER BY y" "$1"
+}
+
+run_count_with_custom_key "y"
+run_count_with_custom_key "cityHash64(y)"
+run_count_with_custom_key "cityHash64(y) + 1"
+
+function run_count_with_custom_key_merge_tree {
+    run_with_custom_key "SELECT y, count() FROM 02535_custom_key_rmt_hash GROUP BY y ORDER BY y" "$1"
+}
+
+run_count_with_custom_key_merge_tree "y"
+run_count_with_custom_key_merge_tree "cityHash64(y)"
+run_count_with_custom_key_merge_tree "cityHash64(y) + 1"
+
+$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key_rmt_hash) as t1 JOIN 02535_custom_key_rmt_hash USING y" --allow_repeated_settings --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with"
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key_rmt_hash"
--- a/tests/queries/0_stateless/02834_apache_arrow_abort.sql
+++ b/tests/queries/0_stateless/02834_apache_arrow_abort.sql
@ -1,4 +1,4 @@
 -- Tags: no-fasttest
 -- This tests depends on internet access, but it does not matter, because it only has to check that there is no abort due to a bug in Apache Arrow library.

-INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION }
+INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/athena_partitioned/hits_9.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION }
--- a/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference
+++ b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.reference
@ -0,0 +1 @@
+Hello	l	x	Hexlo	Hexxo
--- a/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql
+++ b/tests/queries/0_stateless/02864_replace_regexp_string_fallback.sql
@ -0,0 +1,11 @@
+-- Tests functions replaceRegexpAll and replaceRegexpOne with trivial patterns. These trigger internally a fallback to simple string replacement.
+
+-- _materialize_ because the shortcut is only implemented for non-const haystack + const needle + const replacement strings
+
+SELECT 'Hello' AS haystack, 'l' AS needle, 'x' AS replacement, replaceRegexpOne(materialize(haystack), needle, replacement), replaceRegexpAll(materialize(haystack), needle, replacement);
+
+-- negative tests
+
+-- Even if the fallback is used, invalid substitutions must throw an exception.
+SELECT 'Hello' AS haystack, 'l' AS needle, '\\1' AS replacement, replaceRegexpOne(materialize(haystack), needle, replacement); -- { serverError BAD_ARGUMENTS }
+SELECT 'Hello' AS haystack, 'l' AS needle, '\\1' AS replacement, replaceRegexpAll(materialize(haystack), needle, replacement); -- { serverError BAD_ARGUMENTS }
--- a/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference
+++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.reference
@ -0,0 +1,6 @@
+1
+0
+1
+0
+0
+0
--- a/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql
+++ b/tests/queries/0_stateless/03168_read_in_order_buffering_1.sql
@ -0,0 +1,45 @@
+DROP TABLE IF EXISTS t_read_in_order_1;
+
+CREATE TABLE t_read_in_order_1 (id UInt64, v UInt64)
+ENGINE = MergeTree ORDER BY id
+SETTINGS index_granularity = 1024, index_granularity_bytes = '10M';
+
+INSERT INTO t_read_in_order_1 SELECT number, number FROM numbers(1000000);
+
+SET max_threads = 8;
+SET optimize_read_in_order = 1;
+SET read_in_order_use_buffering = 1;
+
+SELECT count() FROM
+(
+    EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id
+) WHERE explain LIKE '%BufferChunks%';
+
+SELECT count() FROM
+(
+    EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id LIMIT 10
+) WHERE explain LIKE '%BufferChunks%';
+
+SELECT count() FROM
+(
+    EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10
+) WHERE explain LIKE '%BufferChunks%';
+
+SET read_in_order_use_buffering = 0;
+
+SELECT count() FROM
+(
+    EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id
+) WHERE explain LIKE '%BufferChunks%';
+
+SELECT count() FROM
+(
+    EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 ORDER BY id LIMIT 10
+) WHERE explain LIKE '%BufferChunks%';
+
+SELECT count() FROM
+(
+    EXPLAIN PIPELINE SELECT * FROM t_read_in_order_1 WHERE v % 10 = 0 ORDER BY id LIMIT 10
+) WHERE explain LIKE '%BufferChunks%';
+
+DROP TABLE t_read_in_order_1;
--- a/tests/queries/0_stateless/03168_read_in_order_buffering_2.reference
+++ b/tests/queries/0_stateless/03168_read_in_order_buffering_2.reference
--- a/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql
+++ b/tests/queries/0_stateless/03168_read_in_order_buffering_2.sql
@ -0,0 +1,17 @@
+-- Tags: long, no-random-settings, no-tsan, no-asan, no-msan, no-s3-storage
+
+DROP TABLE IF EXISTS t_read_in_order_2;
+
+CREATE TABLE t_read_in_order_2 (id UInt64, v UInt64) ENGINE = MergeTree ORDER BY id;
+
+INSERT INTO t_read_in_order_2 SELECT number, number FROM numbers(10000000);
+OPTIMIZE TABLE t_read_in_order_2 FINAL;
+
+SET optimize_read_in_order = 1;
+SET max_threads = 4;
+SET read_in_order_use_buffering = 1;
+SET max_memory_usage = '100M';
+
+SELECT * FROM t_read_in_order_2 ORDER BY id FORMAT Null;
+
+DROP TABLE t_read_in_order_2;
--- a/tests/queries/0_stateless/03174_projection_deduplicate.reference
+++ b/tests/queries/0_stateless/03174_projection_deduplicate.reference
@ -0,0 +1,3 @@
+1	one
+1	one
+1	one
--- a/tests/queries/0_stateless/03174_projection_deduplicate.sql
+++ b/tests/queries/0_stateless/03174_projection_deduplicate.sql
@ -0,0 +1,30 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/65548
+DROP TABLE IF EXISTS test_projection_deduplicate;
+
+CREATE TABLE test_projection_deduplicate
+(
+    `id` Int32,
+    `string` String,
+    PROJECTION test_projection
+    (
+        SELECT id
+        GROUP BY id
+    )
+)
+ENGINE = MergeTree
+PRIMARY KEY id;
+
+INSERT INTO test_projection_deduplicate VALUES (1, 'one');
+INSERT INTO test_projection_deduplicate VALUES (1, 'one');
+
+OPTIMIZE TABLE test_projection_deduplicate DEDUPLICATE; -- { serverError NOT_IMPLEMENTED }
+
+SELECT * FROM test_projection_deduplicate;
+
+ALTER TABLE test_projection_deduplicate DROP PROJECTION test_projection;
+
+OPTIMIZE TABLE test_projection_deduplicate DEDUPLICATE;
+
+SELECT * FROM test_projection_deduplicate;
+
+DROP TABLE test_projection_deduplicate;
--- a/tests/queries/0_stateless/03198_orc_read_time_zone.reference
+++ b/tests/queries/0_stateless/03198_orc_read_time_zone.reference
@ -0,0 +1 @@
+1	2024-06-30 20:00:00.000
--- a/tests/queries/0_stateless/03198_orc_read_time_zone.sh
+++ b/tests/queries/0_stateless/03198_orc_read_time_zone.sh
@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "drop table if exists test"
+$CLICKHOUSE_CLIENT -q "create table test(id UInt64, t DateTime64) Engine=MergeTree order by id"
+$CLICKHOUSE_CLIENT -q "insert into test from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_read_use_writer_time_zone=true FORMAT ORC"
+$CLICKHOUSE_CLIENT -q "select * from test SETTINGS session_timezone='Asia/Shanghai'"
+$CLICKHOUSE_CLIENT -q "drop table test"
--- a/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.reference
+++ b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.reference
@ -0,0 +1 @@
+0
--- a/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql
+++ b/tests/queries/0_stateless/03200_subcolumns_join_use_nulls.sql
@ -0,0 +1,13 @@
+DROP TABLE IF EXISTS t_subcolumns_join;
+
+CREATE TABLE t_subcolumns_join (id UInt64) ENGINE=MergeTree ORDER BY tuple();
+
+INSERT INTO t_subcolumns_join SELECT number as number FROM numbers(10000);
+
+SELECT
+    count()
+FROM (SELECT number FROM numbers(10)) as tbl LEFT JOIN t_subcolumns_join ON number = id
+WHERE id is null
+SETTINGS allow_experimental_analyzer = 1, optimize_functions_to_subcolumns = 1, join_use_nulls = 1;
+
+DROP TABLE t_subcolumns_join;
--- a/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference
+++ b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.reference
@ -0,0 +1,11 @@
+str_array	Array(String)					
+1318
+5779
+1715
+6422
+5875
+1887
+3763
+4245
+4270
+758
--- a/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh
+++ b/tests/queries/0_stateless/03201_avro_negative_block_size_arrays.sh
@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+# Tags: no-parallel, no-fasttest
+
+set -e
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+DATA_DIR=$CUR_DIR/data_avro
+
+# See https://github.com/ClickHouse/ClickHouse/issues/60438
+$CLICKHOUSE_LOCAL -q "DESC file('$DATA_DIR/negative_block_size_arrays.avro')"
+$CLICKHOUSE_LOCAL -q "SELECT arraySum(arrayMap(x -> length(x), str_array)) AS res FROM file('$DATA_DIR/negative_block_size_arrays.avro')"
--- a/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference
+++ b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.reference
@ -0,0 +1,24 @@
+QUERY id: 0
+  PROJECTION COLUMNS
+    (sumIf(toInt64(1), 1)) Tuple(Int64)
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: tuple, function_type: ordinary, result_type: Tuple(Int64)
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            FUNCTION id: 4, function_name: sumIf, function_type: aggregate, result_type: Int64
+              ARGUMENTS
+                LIST id: 5, nodes: 2
+                  CONSTANT id: 6, constant_value: Int64_1, constant_value_type: Int64
+                    EXPRESSION
+                      FUNCTION id: 7, function_name: toInt64, function_type: ordinary, result_type: Int64
+                        ARGUMENTS
+                          LIST id: 8, nodes: 1
+                            CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8
+                  CONSTANT id: 10, constant_value: UInt64_1, constant_value_type: UInt8
+  JOIN TREE
+    TABLE_FUNCTION id: 11, alias: __table1, table_function_name: numbers
+      ARGUMENTS
+        LIST id: 12, nodes: 1
+          CONSTANT id: 13, constant_value: UInt64_100, constant_value_type: UInt8
+  SETTINGS optimize_rewrite_sum_if_to_count_if=1
--- a/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql
+++ b/tests/queries/0_stateless/03201_sumIf_to_countIf_return_type.sql
@ -0,0 +1,2 @@
+SET allow_experimental_analyzer = 1;
+EXPLAIN QUERY TREE SELECT tuple(sumIf(toInt64(1), 1)) FROM numbers(100) settings optimize_rewrite_sum_if_to_count_if=1;
--- a/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro
+++ b/tests/queries/0_stateless/data_avro/negative_block_size_arrays.avro
--- a/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc
+++ b/tests/queries/0_stateless/data_orc/test_reader_time_zone.snappy.orc
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@ -1,4 +1,4 @@
-personal_ws-1.1 en 2758
+personal_ws-1.1 en 2942 
 AArch
 ACLs
 ALTERs
@ -1658,9 +1658,9 @@ fsync
 func
 fuzzBits
 fuzzJSON
+fuzzQuery
 fuzzer
 fuzzers
-fuzzQuery
 gRPC
 gccMurmurHash
 gcem
@ -2000,6 +2000,7 @@ maxMap
 maxintersections
 maxintersectionsposition
 maxmap
+minMappedArrays
 maxmind
 mdadm
 meanZTest
@ -2017,6 +2018,7 @@ metrica
 metroHash
 mfedotov
 minMap
+minMappedArrays
 minSampleSizeContinuous
 minSampleSizeConversion
 mindsdb
@ -2124,8 +2126,10 @@ noaa
 nonNegativeDerivative
 noop
 normalizeQuery
+normalizeQueryKeepNames
 normalizeUTF
 normalizedQueryHash
+normalizedQueryHashKeepNames
 notEmpty
 notEquals
 notILike