Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into hdfs-idisk

2024-11-25 09:02:00 +00:00 · 2021-04-20 08:08:50 +00:00 · 2021-04-20 08:08:50 +00:00 · 7d357baa40
commit 7d357baa40
parent e89594f376 b43656bf89
140 changed files with 2554 additions and 1621 deletions
--- a/base/common/DateLUTImpl.h
+++ b/base/common/DateLUTImpl.h
@ -25,7 +25,7 @@


 #if defined(__PPC__)
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
 #endif
@ -1266,7 +1266,7 @@ public:
 };

 #if defined(__PPC__)
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
 #endif
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@ -1,9 +1,9 @@
 # This strings autochanged from release_lib.sh:
-SET(VERSION_REVISION 54450)
+SET(VERSION_REVISION 54451)
 SET(VERSION_MAJOR 21)
-SET(VERSION_MINOR 5)
+SET(VERSION_MINOR 6)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 3827789b3d8fd2021952e57e5110343d26daa1a1)
-SET(VERSION_DESCRIBE v21.5.1.1-prestable)
-SET(VERSION_STRING 21.5.1.1)
+SET(VERSION_GITHASH 96fced4c3cf432fb0b401d2ab01f0c56e5f74a96)
+SET(VERSION_DESCRIBE v21.6.1.1-prestable)
+SET(VERSION_STRING 21.6.1.1)
 # end of autochange
--- a/cmake/warnings.cmake
+++ b/cmake/warnings.cmake
@ -171,6 +171,7 @@ elseif (COMPILER_GCC)
    add_cxx_compile_options(-Wtrampolines)
    # Obvious
    add_cxx_compile_options(-Wunused)
+    add_cxx_compile_options(-Wundef)
    # Warn if vector operation is not implemented via SIMD capabilities of the architecture
    add_cxx_compile_options(-Wvector-operation-performance)
    # XXX: libstdc++ has some of these for 3way compare
--- a/contrib/datasketches-cpp
+++ b/contrib/datasketches-cpp
@ -1 +1 @@
-Subproject commit 45885c0c8c0807bb9480886d60ca7042000a4c43
+Subproject commit f915d35b2de676683493c86c585141a1e1c83334
--- a/debian/changelog
+++ b/debian/changelog
@ -1,5 +1,5 @@
-clickhouse (21.5.1.1) unstable; urgency=low
+clickhouse (21.6.1.1) unstable; urgency=low

  * Modified source code

- -- clickhouse-release <clickhouse-release@yandex-team.ru>  Fri, 02 Apr 2021 18:34:26 +0300
+ -- clickhouse-release <clickhouse-release@yandex-team.ru>  Tue, 20 Apr 2021 01:48:16 +0300
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.5.1.*
+ARG version=21.6.1.*

 RUN apt-get update \
    && apt-get install --yes --no-install-recommends \
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:20.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.5.1.*
+ARG version=21.6.1.*
 ARG gosu_ver=1.10

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=21.5.1.*
+ARG version=21.6.1.*

 RUN apt-get update && \
    apt-get install -y apt-transport-https dirmngr && \
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -312,8 +312,6 @@ function run_tests
        01533_collate_in_nullable
        01542_collate_in_array
        01543_collate_in_tuple
-        01798_uniq_theta_sketch
-        01799_long_uniq_theta_sketch
        _orc_
        arrow
        avro
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -66,7 +66,12 @@ reportStageEnd('parse')
 subst_elems = root.findall('substitutions/substitution')
 available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
 for e in subst_elems:
-    available_parameters[e.find('name').text] = [v.text for v in e.findall('values/value')]
+    name = e.find('name').text
+    values = [v.text for v in e.findall('values/value')]
+    if not values:
+        raise Exception(f'No values given for substitution {{{name}}}')
+
+    available_parameters[name] = values

 # Takes parallel lists of templates, substitutes them with all combos of
 # parameters. The set of parameters is determined based on the first list.
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@ -90,6 +90,7 @@ The following settings can be specified in configuration file for given endpoint
 -   `endpoint` — Specifies prefix of an endpoint. Mandatory.
 -   `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional.
 -   `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. Optional, default value is `false`.
+-   `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`.
 -   `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be speficied multiple times.
 -   `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.

@ -102,11 +103,13 @@ The following settings can be specified in configuration file for given endpoint
        <!-- <access_key_id>ACCESS_KEY_ID</access_key_id> -->
        <!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
        <!-- <use_environment_credentials>false</use_environment_credentials> -->
+        <!-- <use_insecure_imds_request>false</use_insecure_imds_request> -->
        <!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
        <!-- <server_side_encryption_customer_key_base64>BASE64-ENCODED-KEY</server_side_encryption_customer_key_base64> -->
    </endpoint-name>
 </s3>
 ```
+
 ## Usage {#usage-examples}

 Suppose we have several files in TSV format with the following URIs on HDFS:
@ -149,6 +152,7 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_p
 CREATE TABLE big_table (name String, value UInt32) 
 ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
 ```
+
 ## See also

 -  [S3 table function](../../../sql-reference/table-functions/s3.md)
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -767,6 +767,7 @@ Required parameters:

 Optional parameters:    
 -   `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
+-   `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
 -   `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. 
 -   `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. 
 -   `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. 
--- a/docs/en/guides/apply-catboost-model.md
+++ b/docs/en/guides/apply-catboost-model.md
@ -159,6 +159,9 @@ The fastest way to evaluate a CatBoost model is compile `libcatboostmodel.<so|dl
 <models_config>/home/catboost/models/*_model.xml</models_config>
 ```

+!!! note "Note"
+    You can change path to the CatBoost model configuration later without restarting server.
+
 ## 4. Run the Model Inference from SQL {#run-model-inference}

 For test model run the ClickHouse client `$ clickhouse client`.
--- a/docs/en/interfaces/third-party/gui.md
+++ b/docs/en/interfaces/third-party/gui.md
@ -169,24 +169,21 @@ Features:

 ### SeekTable {#seektable}

-[SeekTable](https://www.seektable.com) is a self-service BI tool for data exploration and operational reporting. SeekTable is available both as a cloud service and a self-hosted version. SeekTable reports may be embedded into any web-app.
+[SeekTable](https://www.seektable.com) is a self-service BI tool for data exploration and operational reporting. It is available both as a cloud service and a self-hosted version. Reports from SeekTable may be embedded into any web-app.

 Features:

 -   Business users-friendly reports builder.
 -   Powerful report parameters for SQL filtering and report-specific query customizations.
 -   Can connect to ClickHouse both with a native TCP/IP endpoint and a HTTP(S) interface (2 different drivers).
-   It is possible to use all power of CH SQL dialect in dimensions/measures definitions
+-   It is possible to use all power of ClickHouse SQL dialect in dimensions/measures definitions.
 -   [Web API](https://www.seektable.com/help/web-api-integration) for automated reports generation.
-   Supports reports development flow with account data [backup/restore](https://www.seektable.com/help/self-hosted-backup-restore), data models (cubes) / reports configuration is a human-readable XML and can be stored under version control.
+-   Supports reports development flow with account data [backup/restore](https://www.seektable.com/help/self-hosted-backup-restore); data models (cubes) / reports configuration is a human-readable XML and can be stored under version control system.

 SeekTable is [free](https://www.seektable.com/help/cloud-pricing) for personal/individual usage.

 [How to configure ClickHouse connection in SeekTable.](https://www.seektable.com/help/clickhouse-pivot-table)

-
 ### Chadmin {#chadmin}

 [Chadmin](https://github.com/bun4uk/chadmin) is a simple UI where you can visualize your currently running queries on your ClickHouse cluster and info about them and kill them if you want.
-
-[Original article](https://clickhouse.tech/docs/en/interfaces/third-party/gui/) <!--hide-->
--- a/docs/en/operations/system-tables/columns.md
+++ b/docs/en/operations/system-tables/columns.md
@ -4,7 +4,9 @@ Contains information about columns in all the tables.

 You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once.

-The `system.columns` table contains the following columns (the column type is shown in brackets):
+Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field. 
+
+Columns:

 -   `database` ([String](../../sql-reference/data-types/string.md)) — Database name.
 -   `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
@ -26,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh
 **Example**

 ```sql
-:) select * from system.columns LIMIT 2 FORMAT Vertical;
+SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
 ```

 ```text
@ -65,8 +67,6 @@ is_in_sorting_key:       0
 is_in_primary_key:       0
 is_in_sampling_key:      0
 compression_codec:       
-
-2 rows in set. Elapsed: 0.002 sec. 
 ```

 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) <!--hide-->
--- a/docs/en/operations/system-tables/tables.md
+++ b/docs/en/operations/system-tables/tables.md
@ -1,59 +1,65 @@
 # system.tables {#system-tables}

-Contains metadata of each table that the server knows about. Detached tables are not shown in `system.tables`.
+Contains metadata of each table that the server knows about. 

-This table contains the following columns (the column type is shown in brackets):
+[Detached](../../sql-reference/statements/detach.md) tables are not shown in `system.tables`.

-   `database` (String) — The name of the database the table is in.
+[Temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.tables` only in those session where they have been created. They are shown with the empty `database` field and with the `is_temporary` flag switched on. 

-   `name` (String) — Table name.
+Columns:

-   `engine` (String) — Table engine name (without parameters).
+-   `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. 

-   `is_temporary` (UInt8) - Flag that indicates whether the table is temporary.
+-   `name` ([String](../../sql-reference/data-types/string.md)) — Table name. 

-   `data_path` (String) - Path to the table data in the file system.
+-   `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters). 

-   `metadata_path` (String) - Path to the table metadata in the file system.
+-   `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary. 

-   `metadata_modification_time` (DateTime) - Time of latest modification of the table metadata.
+-   `data_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table data in the file system. 

-   `dependencies_database` (Array(String)) - Database dependencies.
+-   `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. 

-   `dependencies_table` (Array(String)) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).
+-   `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata.

-   `create_table_query` (String) - The query that was used to create the table.
+-   `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies.

-   `engine_full` (String) - Parameters of the table engine.
+-   `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table).

-   `partition_key` (String) - The partition key expression specified in the table.
+-   `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table.

-   `sorting_key` (String) - The sorting key expression specified in the table.
+-   `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine. 

-   `primary_key` (String) - The primary key expression specified in the table.
+-   `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table. 

-   `sampling_key` (String) - The sampling key expression specified in the table.
+-   `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table. 

-   `storage_policy` (String) - The storage policy:
+-   `primary_key` ([String](../../sql-reference/data-types/string.md)) - The primary key expression specified in the table. 
+
+-   `sampling_key` ([String](../../sql-reference/data-types/string.md)) - The sampling key expression specified in the table. 
+
+-   `storage_policy` ([String](../../sql-reference/data-types/string.md)) - The storage policy:

    -   [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
    -   [Distributed](../../engines/table-engines/special/distributed.md#distributed)

-   `total_rows` (Nullable(UInt64)) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `Null` (including underying `Buffer` table).
+-   `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table). 

-   `total_bytes` (Nullable(UInt64)) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `Null` (**does not** includes any underlying storage).
+-   `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage). 

    -   If the table stores data on disk, returns used space on disk (i.e. compressed).
    -   If the table stores data in memory, returns approximated number of used bytes in memory.

-   `lifetime_rows` (Nullable(UInt64)) - Total number of rows INSERTed since server start (only for `Buffer` tables).
+-   `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables). 

-   `lifetime_bytes` (Nullable(UInt64)) - Total number of bytes INSERTed since server start (only for `Buffer` tables).
+-   `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables). 

 The `system.tables` table is used in `SHOW TABLES` query implementation.

+**Example**
+
 ```sql
-:) SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
+SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
 ```

 ```text
@ -100,8 +106,6 @@ sampling_key:
 storage_policy:             
 total_rows:                 ᴺᵁᴸᴸ
 total_bytes:                ᴺᵁᴸᴸ
-
-2 rows in set. Elapsed: 0.004 sec. 
 ```

 [Original article](https://clickhouse.tech/docs/en/operations/system_tables/tables) <!--hide-->
--- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md
@ -38,4 +38,3 @@ We recommend using this function in almost all scenarios.
 -   [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
 -   [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
 -   [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
-   [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
--- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md
@ -49,4 +49,3 @@ Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq
 -   [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
 -   [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
 -   [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
-   [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
--- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md
@ -23,4 +23,3 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, `
 -   [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
 -   [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqcombined)
 -   [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqhll12)
-   [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
--- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md
@ -37,4 +37,3 @@ We don’t recommend using this function. In most cases, use the [uniq](../../..
 -   [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
 -   [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined)
 -   [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
-   [uniqThetaSketch](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch)
--- a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md
@ -1,39 +0,0 @@
---
-toc_priority: 195
---
-
-# uniqThetaSketch {#agg_function-uniqthetasketch}
-
-Calculates the approximate number of different argument values, using the [Theta Sketch Framework](https://datasketches.apache.org/docs/Theta/ThetaSketchFramework.html).
-
-``` sql
-uniqThetaSketch(x[, ...])
-```
-
-**Arguments**
-
-The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types.
-
-**Returned value**
-
-   A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number.
-
-**Implementation details**
-
-Function:
-
-   Calculates a hash for all parameters in the aggregate, then uses it in calculations.
-
-   Uses the [KMV](https://datasketches.apache.org/docs/Theta/InverseEstimate.html) algorithm to approximate the number of different argument values.
-
-        4096(2^12) 64-bit sketch are used. The size of the state is about 41 KB.
-
-   The relative error is 3.125% (95% confidence), see the [relative error table](https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html) for detail.
-
-**See Also**
-
-   [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
-   [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined)
-   [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
-   [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
-   [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
--- a/docs/en/sql-reference/functions/json-functions.md
+++ b/docs/en/sql-reference/functions/json-functions.md
@ -16,46 +16,60 @@ The following assumptions are made:

 ## visitParamHas(params, name) {#visitparamhasparams-name}

-Checks whether there is a field with the ‘name’ name.
+Checks whether there is a field with the `name` name.
+
+Alias: `simpleJSONHas`.

 ## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name}

-Parses UInt64 from the value of the field named ‘name’. If this is a string field, it tries to parse a number from the beginning of the string. If the field doesn’t exist, or it exists but doesn’t contain a number, it returns 0.
+Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field doesn’t exist, or it exists but doesn’t contain a number, it returns 0.
+
+Alias: `simpleJSONExtractUInt`.

 ## visitParamExtractInt(params, name) {#visitparamextractintparams-name}

 The same as for Int64.

+Alias: `simpleJSONExtractInt`.
+
 ## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name}

 The same as for Float64.

+Alias: `simpleJSONExtractFloat`.
+
 ## visitParamExtractBool(params, name) {#visitparamextractboolparams-name}

 Parses a true/false value. The result is UInt8.

+Alias: `simpleJSONExtractBool`.
+
 ## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name}

 Returns the value of a field, including separators.

+Alias: `simpleJSONExtractRaw`.
+
 Examples:

 ``` sql
-visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'
-visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'
+visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
+visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
 ```

 ## visitParamExtractString(params, name) {#visitparamextractstringparams-name}

 Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string.

+Alias: `simpleJSONExtractString`.
+
 Examples:

 ``` sql
-visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
-visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
-visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''
-visitParamExtractString('{"abc":"hello}', 'abc') = ''
+visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
+visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
+visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
+visitParamExtractString('{"abc":"hello}', 'abc') = '';
 ```

 There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@ -74,6 +74,9 @@ Deletes the column with the name `name`. If the `IF EXISTS` clause is specified,

 Deletes data from the file system. Since this deletes entire files, the query is completed almost instantly.

+!!! warning "Warning"
+    You can’t delete a column if it is referenced by [materialized view](../../../sql-reference/statements/create/view.md#materialized). Otherwise, it returns an error.
+
 Example:

 ``` sql
@ -180,7 +183,7 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
 ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
 ```

-## See Also
+**See Also**

 - [REMOVE TTL](ttl.md).

--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@ -50,15 +50,32 @@ Creates a table with the same result as that of the [table function](../../../sq
 ### From SELECT query {#from-select-query}

 ``` sql
-CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
+CREATE TABLE [IF NOT EXISTS] [db.]table_name[(name1 [type1], name2 [type2], ...)] ENGINE = engine AS SELECT ...
 ```

-Creates a table with a structure like the result of the `SELECT` query, with the `engine` engine, and fills it with data from SELECT.
+Creates a table with a structure like the result of the `SELECT` query, with the `engine` engine, and fills it with data from `SELECT`. Also you can explicitly specify columns description.

-In all cases, if `IF NOT EXISTS` is specified, the query won’t return an error if the table already exists. In this case, the query won’t do anything.
+If the table already exists and `IF NOT EXISTS` is specified, the query won’t do anything.

 There can be other clauses after the `ENGINE` clause in the query. See detailed documentation on how to create tables in the descriptions of [table engines](../../../engines/table-engines/index.md#table_engines).

+**Example**
+
+Query:
+
+``` sql
+CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;
+SELECT x, toTypeName(x) FROM t1;
+```
+
+Result:
+
+```text
+┌─x─┬─toTypeName(x)─┐
+│ 1 │ String        │
+└───┴───────────────┘
+```
+
 ## NULL Or NOT NULL Modifiers {#null-modifiers}

 `NULL` and `NOT NULL` modifiers after data type in column definition allow or do not allow it to be [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable).
--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@ -5,13 +5,18 @@ toc_title: OPTIMIZE

 # OPTIMIZE Statement {#misc_operations-optimize}

+This query tries to initialize an unscheduled merge of data parts for tables.
+
+!!! warning "Warning"
+    `OPTIMIZE` can’t fix the `Too many parts` error.
+
+**Syntax**
+
 ``` sql
 OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
 ```

-This query tries to initialize an unscheduled merge of data parts for tables with a table engine from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family.
-
-The `OPTMIZE` query is also supported for the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported.
+The `OPTMIZE` query is supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) family, the [MaterializedView](../../engines/table-engines/special/materializedview.md) and the [Buffer](../../engines/table-engines/special/buffer.md) engines. Other table engines aren’t supported.

 When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all nodes (if the `replication_alter_partitions_sync` setting is enabled).

@ -21,12 +26,13 @@ When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engin
 -   If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine.


-### BY expression {#by-expression}
+## BY expression {#by-expression}

 If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explictly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key).

-Note that `*` behaves just like in `SELECT`: `MATERIALIZED`, and `ALIAS` columns are not used for expansion.
-Also, it is an error to specify empty list of columns, or write an expression that results in an empty list of columns, or deduplicate by an ALIAS column.
+!!! note "Note"
+    Notice that `*` behaves just like in `SELECT`: `MATERIALIZED` and `ALIAS` columns are not used for expansion.
+    Also, it is an error to specify empty list of columns, or write an expression that results in an empty list of columns, or deduplicate by an ALIAS column.

 ``` sql
 OPTIMIZE TABLE table DEDUPLICATE; -- the old one
@ -39,9 +45,10 @@ OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT co
 OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT (colX, colY);
 ```

-**Example:**
+**Examples**
+
+Create a table:

-A silly synthetic table.
 ``` sql
 CREATE TABLE example (
    primary_key Int32,
@ -56,31 +63,31 @@ PARTITION BY partition_key
 ORDER BY (primary_key, secondary_key);
 ```

+The 'old' deduplicate, all columns are taken into account, i.e. row is removed only if all values in all columns are equal to corresponding values in previous row.
+
 ``` sql
-- The 'old' deduplicate, all columns are taken into account, i.e. row is removed only if all values in all columns are equal to corresponding values in previous row.
 OPTIMIZE TABLE example FINAL DEDUPLICATE;
 ```

+Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED`: `primary_key`, `secondary_key`, `value`, `partition_key`, and `materialized_value` columns.
+
 ``` sql
-- Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED`: `primary_key`, `secondary_key`, `value`, `partition_key`, and `materialized_value` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY *;
 ```

+Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED` and explicitly not `materialized_value`: `primary_key`, `secondary_key`, `value`, and `partition_key` columns.
+
 ``` sql
-- Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED` and explicitly not `materialized_value`: `primary_key`, `secondary_key`, `value`, and `partition_key` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY * EXCEPT materialized_value;
 ```

+Deduplicate explicitly by `primary_key`, `secondary_key`, and `partition_key` columns.
 ``` sql
-- Deduplicate explicitly by `primary_key`, `secondary_key`, and `partition_key` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY primary_key, secondary_key, partition_key;
 ```

+Deduplicate by any column matching a regex: `primary_key`, `secondary_key`, and `partition_key` columns.
+
 ``` sql
-- Deduplicate by any column matching a regex: `primary_key`, `secondary_key`, and `partition_key` columns.
 OPTIMIZE TABLE example FINAL DEDUPLICATE BY COLUMNS('.*_key');
 ```
-
-
-!!! warning "Warning"
-    `OPTIMIZE` can’t fix the “Too many parts” error.
--- a/docs/ru/commercial/cloud.md
+++ b/docs/ru/commercial/cloud.md
@ -39,4 +39,20 @@ toc_title: "Поставщики облачных услуг ClickHouse"
 -   поддержка прав доступа, one-key восстановления, многоуровневая защита сети, шифрование облачного диска;
 -   полная интеграция с облачными системами логирования, базами данных и инструментами обработки данных;
 -   встроенная платформа для мониторинга и управления базами данных;
-   техническая поддержка от экспертов по работе с базами данных.
+-   техническая поддержка от экспертов по работе с базами данных.
+
+## SberCloud {#sbercloud}
+
+[Облачная платформа SberCloud.Advanced](https://sbercloud.ru/ru/advanced):
+
+-   предоставляет более 50 высокотехнологичных сервисов;
+-   позволяет быстро создавать и эффективно управлять ИТ-инфраструктурой, приложениями и интернет-сервисами;
+-   радикально минимизирует ресурсы, требуемые для работы корпоративных ИТ-систем;
+-   в разы сокращает время вывода новых продуктов на рынок.
+
+SberCloud.Advanced предоставляет [MapReduce Service (MRS)](https://docs.sbercloud.ru/mrs/ug/topics/ug__clickhouse.html) — надежную, безопасную и простую в использовании платформу корпоративного уровня для хранения, обработки и анализа больших данных. MRS позволяет быстро создавать и управлять кластерами ClickHouse.
+
+-   Инстанс ClickHouse состоит из трех узлов ZooKeeper и нескольких узлов ClickHouse. Выделенный режим реплики используется для обеспечения высокой надежности двойных копий данных.
+-   MRS предлагает возможности гибкого масштабирования при быстром росте сервисов в сценариях, когда емкости кластерного хранилища или вычислительных ресурсов процессора недостаточно. MRS в один клик предоставляет инструмент для балансировки данных при расширении узлов ClickHouse в кластере. Вы можете определить режим и время балансировки данных на основе характеристик сервиса, чтобы обеспечить доступность сервиса.
+-   MRS использует архитектуру развертывания высокой доступности на основе Elastic Load Balance (ELB) — сервиса для автоматического распределения трафика на несколько внутренних узлов. Благодаря ELB, данные записываются в локальные таблицы и считываются из распределенных таблиц на разных узлах. Такая архитектура повышает отказоустойчивость кластера и гарантирует высокую доступность приложений.
+
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@ -753,7 +753,8 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'

 Необязательные параметры:   

-   `use_environment_credentials` — признак, нужно ли считывать учетные данные AWS из переменных окружения `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` и `AWS_SESSION_TOKEN`, если они есть. Значение по умолчанию: `false`.
+-   `use_environment_credentials` — признак, нужно ли считывать учетные данные AWS из сетевого окружения, а также из переменных окружения `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` и `AWS_SESSION_TOKEN`, если они есть. Значение по умолчанию: `false`.
+-   `use_insecure_imds_request` — признак, нужно ли использовать менее безопасное соединение при выполнении запроса к IMDS при получении учётных данных из метаданных Amazon EC2. Значение по умолчанию: `false`.
 -   `proxy` — конфигурация прокси-сервера для конечной точки S3. Каждый элемент `uri` внутри блока `proxy` должен содержать URL прокси-сервера. 
 -   `connect_timeout_ms` — таймаут подключения к сокету в миллисекундах. Значение по умолчанию: 10 секунд. 
 -   `request_timeout_ms` — таймаут выполнения запроса в миллисекундах. Значение по умолчанию: 5 секунд. 
--- a/docs/ru/guides/apply-catboost-model.md
+++ b/docs/ru/guides/apply-catboost-model.md
@ -158,7 +158,9 @@ FROM amazon_train
 <catboost_dynamic_library_path>/home/catboost/data/libcatboostmodel.so</catboost_dynamic_library_path>
 <models_config>/home/catboost/models/*_model.xml</models_config>
 ```
-
+!!! note "Примечание"
+    Вы можете позднее изменить путь к конфигурации модели CatBoost без перезагрузки сервера.
+    
 ## 4. Запустите вывод модели из SQL {#run-model-inference}

 Для тестирования модели запустите клиент ClickHouse `$ clickhouse client`.
--- a/docs/ru/interfaces/third-party/gui.md
+++ b/docs/ru/interfaces/third-party/gui.md
@ -166,4 +166,19 @@ toc_title: "Визуальные интерфейсы от сторонних р

 [Как сконфигурировать ClickHouse в Looker.](https://docs.looker.com/setup-and-management/database-config/clickhouse)

-[Original article](https://clickhouse.tech/docs/ru/interfaces/third-party/gui/) <!--hide-->
+### SeekTable {#seektable}
+
+[SeekTable](https://www.seektable.com) — это аналитический инструмент для самостоятельного анализа и обработки данных бизнес-аналитики. Он доступен как в виде облачного сервиса, так и в виде локальной версии. Отчеты из SeekTable могут быть встроены в любое веб-приложение.
+
+Основные возможности:
+
+-   Удобный конструктор отчетов.
+-   Гибкая настройка отчетов SQL и создание запросов для специфичных отчетов.
+-   Интегрируется с ClickHouse, используя собственную точку приема запроса TCP/IP или интерфейс HTTP(S) (два разных драйвера).
+-   Поддерживает всю мощь диалекта ClickHouse SQL для построения запросов по различным измерениям и показателям.
+-   [WEB-API](https://www.seektable.com/help/web-api-integration) для автоматизированной генерации отчетов.
+-   Процесс разработки отчетов поддерживает [резервное копирование/восстановление данных](https://www.seektable.com/help/self-hosted-backup-restore); конфигурация моделей данных (кубов) / отчетов представляет собой удобочитаемый XML-файл, который может храниться в системе контроля версий.
+
+SeekTable [бесплатен](https://www.seektable.com/help/cloud-pricing) для личного/индивидуального использования.
+
+[Как сконфигурировать подключение ClickHouse в SeekTable.](https://www.seektable.com/help/clickhouse-pivot-table)
--- a/docs/ru/operations/system-tables/columns.md
+++ b/docs/ru/operations/system-tables/columns.md
@ -4,7 +4,9 @@

 С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table), но для многих таблиц сразу.

-Таблица `system.columns` содержит столбцы (тип столбца указан в скобках):
+Колонки [временных таблиц](../../sql-reference/statements/create/table.md#temporary-tables) содержатся в `system.columns` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких колонок пустое. 
+
+Cтолбцы:

 -   `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
 -   `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
@ -23,3 +25,46 @@
 -   `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ выборки.
 -   `compression_codec` ([String](../../sql-reference/data-types/string.md)) — имя кодека сжатия.

+**Пример**
+
+```sql
+SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
+```
+
+```text
+Row 1:
+──────
+database:                system
+table:                   aggregate_function_combinators
+name:                    name
+type:                    String
+default_kind:            
+default_expression:      
+data_compressed_bytes:   0
+data_uncompressed_bytes: 0
+marks_bytes:             0
+comment:                 
+is_in_partition_key:     0
+is_in_sorting_key:       0
+is_in_primary_key:       0
+is_in_sampling_key:      0
+compression_codec:       
+
+Row 2:
+──────
+database:                system
+table:                   aggregate_function_combinators
+name:                    is_internal
+type:                    UInt8
+default_kind:            
+default_expression:      
+data_compressed_bytes:   0
+data_uncompressed_bytes: 0
+marks_bytes:             0
+comment:                 
+is_in_partition_key:     0
+is_in_sorting_key:       0
+is_in_primary_key:       0
+is_in_sampling_key:      0
+compression_codec:       
+```
--- a/docs/ru/operations/system-tables/tables.md
+++ b/docs/ru/operations/system-tables/tables.md
@ -1,39 +1,94 @@
 # system.tables {#system-tables}

-Содержит метаданные каждой таблицы, о которой знает сервер. Отсоединённые таблицы не отображаются в `system.tables`.
+Содержит метаданные каждой таблицы, о которой знает сервер. 

-Эта таблица содержит следующие столбцы (тип столбца показан в скобках):
+Отсоединённые таблицы ([DETACH](../../sql-reference/statements/detach.md)) не отображаются в `system.tables`.

-   `database String` — имя базы данных, в которой находится таблица.
-   `name` (String) — имя таблицы.
-   `engine` (String) — движок таблицы (без параметров).
-   `is_temporary` (UInt8) — флаг, указывающий на то, временная это таблица или нет.
-   `data_path` (String) — путь к данным таблицы в файловой системе.
-   `metadata_path` (String) — путь к табличным метаданным в файловой системе.
-   `metadata_modification_time` (DateTime) — время последней модификации табличных метаданных.
-   `dependencies_database` (Array(String)) — зависимости базы данных.
-   `dependencies_table` (Array(String)) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
-   `create_table_query` (String) — запрос, которым создавалась таблица.
-   `engine_full` (String) — параметры табличного движка.
-   `partition_key` (String) — ключ партиционирования таблицы.
-   `sorting_key` (String) — ключ сортировки таблицы.
-   `primary_key` (String) - первичный ключ таблицы.
-   `sampling_key` (String) — ключ сэмплирования таблицы.
-   `storage_policy` (String) - политика хранения данных:
+Информация о [временных таблицах](../../sql-reference/statements/create/table.md#temporary-tables) содержится в `system.tables` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких таблиц пустое, а флаг `is_temporary` включен. 
+
+Столбцы:
+
+-   `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
+-   `name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
+-   `engine` ([String](../../sql-reference/data-types/string.md)) — движок таблицы (без параметров).
+-   `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет.
+-   `data_path` ([String](../../sql-reference/data-types/string.md)) — путь к данным таблицы в файловой системе.
+-   `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе.
+-   `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время последней модификации табличных метаданных.
+-   `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — зависимости базы данных.
+-   `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
+-   `create_table_query` ([String](../../sql-reference/data-types/string.md)) — запрос, при помощи которого создавалась таблица.
+-   `engine_full` ([String](../../sql-reference/data-types/string.md)) — параметры табличного движка.
+-   `partition_key` ([String](../../sql-reference/data-types/string.md)) — ключ партиционирования таблицы.
+-   `sorting_key` ([String](../../sql-reference/data-types/string.md)) — ключ сортировки таблицы.
+-   `primary_key` ([String](../../sql-reference/data-types/string.md)) - первичный ключ таблицы.
+-   `sampling_key` ([String](../../sql-reference/data-types/string.md)) — ключ сэмплирования таблицы.
+-   `storage_policy` ([String](../../sql-reference/data-types/string.md)) - политика хранения данных:

    -   [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
    -   [Distributed](../../engines/table-engines/special/distributed.md#distributed)

-   `total_rows` (Nullable(UInt64)) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `Null` (включая базовую таблицу `Buffer`).
+-   `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество строк, если есть возможность быстро определить точное количество строк в таблице, в противном случае `NULL` (включая базовую таблицу `Buffer`).

-   `total_bytes` (Nullable(UInt64)) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `Null` (**не включает** в себя никакого базового хранилища).
+-   `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество байт, если можно быстро определить точное количество байт для таблицы на накопителе, в противном случае `NULL` (не включает в себя никакого базового хранилища).

    -   Если таблица хранит данные на диске, возвращает используемое пространство на диске (т. е. сжатое).
    -   Если таблица хранит данные в памяти, возвращает приблизительное количество используемых байт в памяти.

-   `lifetime_rows` (Nullable(UInt64)) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
+-   `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество строк, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).

-   `lifetime_bytes` (Nullable(UInt64)) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).
+-   `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - общее количество байт, добавленных оператором `INSERT` с момента запуска сервера (только для таблиц `Buffer`).

 Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`.

+**Пример**
+
+```sql
+SELECT * FROM system.tables LIMIT 2 FORMAT Vertical;
+```
+
+```text
+Row 1:
+──────
+database:                   system
+name:                       aggregate_function_combinators
+uuid:                       00000000-0000-0000-0000-000000000000
+engine:                     SystemAggregateFunctionCombinators
+is_temporary:               0
+data_paths:                 []
+metadata_path:              /var/lib/clickhouse/metadata/system/aggregate_function_combinators.sql
+metadata_modification_time: 1970-01-01 03:00:00
+dependencies_database:      []
+dependencies_table:         []
+create_table_query:         
+engine_full:                
+partition_key:              
+sorting_key:                
+primary_key:                
+sampling_key:               
+storage_policy:             
+total_rows:                 ᴺᵁᴸᴸ
+total_bytes:                ᴺᵁᴸᴸ
+
+Row 2:
+──────
+database:                   system
+name:                       asynchronous_metrics
+uuid:                       00000000-0000-0000-0000-000000000000
+engine:                     SystemAsynchronousMetrics
+is_temporary:               0
+data_paths:                 []
+metadata_path:              /var/lib/clickhouse/metadata/system/asynchronous_metrics.sql
+metadata_modification_time: 1970-01-01 03:00:00
+dependencies_database:      []
+dependencies_table:         []
+create_table_query:         
+engine_full:                
+partition_key:              
+sorting_key:                
+primary_key:                
+sampling_key:               
+storage_policy:             
+total_rows:                 ᴺᵁᴸᴸ
+total_bytes:                ᴺᵁᴸᴸ
+```
--- a/docs/ru/sql-reference/functions/json-functions.md
+++ b/docs/ru/sql-reference/functions/json-functions.md
@ -16,51 +16,65 @@ toc_title: JSON

 ## visitParamHas(params, name) {#visitparamhasparams-name}

-Проверить наличие поля с именем name.
+Проверяет наличие поля с именем `name`.
+
+Алиас: `simpleJSONHas`.

 ## visitParamExtractUInt(params, name) {#visitparamextractuintparams-name}

-Распарсить UInt64 из значения поля с именем name. Если поле строковое - попытаться распарсить число из начала строки. Если такого поля нет, или если оно есть, но содержит не число, то вернуть 0.
+Пытается выделить число типа UInt64 из значения поля с именем `name`. Если поле строковое, пытается выделить число из начала строки. Если такого поля нет, или если оно есть, но содержит не число, то возвращает 0.
+
+Алиас: `simpleJSONExtractUInt`.

 ## visitParamExtractInt(params, name) {#visitparamextractintparams-name}

 Аналогично для Int64.

+Алиас: `simpleJSONExtractInt`.
+
 ## visitParamExtractFloat(params, name) {#visitparamextractfloatparams-name}

 Аналогично для Float64.

+Алиас: `simpleJSONExtractFloat`.
+
 ## visitParamExtractBool(params, name) {#visitparamextractboolparams-name}

-Распарсить значение true/false. Результат - UInt8.
+Пытается выделить значение true/false. Результат — UInt8.
+
+Алиас: `simpleJSONExtractBool`.

 ## visitParamExtractRaw(params, name) {#visitparamextractrawparams-name}

-Вернуть значение поля, включая разделители.
+Возвращает значение поля, включая разделители.
+
+Алиас: `simpleJSONExtractRaw`.

 Примеры:

 ``` sql
-visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'
-visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'
+visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
+visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
 ```

 ## visitParamExtractString(params, name) {#visitparamextractstringparams-name}

-Распарсить строку в двойных кавычках. У значения убирается экранирование. Если убрать экранированные символы не удалось, то возвращается пустая строка.
+Разбирает строку в двойных кавычках. У значения убирается экранирование. Если убрать экранированные символы не удалось, то возвращается пустая строка.
+
+Алиас: `simpleJSONExtractString`.

 Примеры:

 ``` sql
-visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'
-visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'
-visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''
-visitParamExtractString('{"abc":"hello}', 'abc') = ''
+visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
+visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
+visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
+visitParamExtractString('{"abc":"hello}', 'abc') = '';
 ```

-На данный момент, не поддерживаются записанные в формате `\uXXXX\uYYYY` кодовые точки не из basic multilingual plane (они переводятся не в UTF-8, а в CESU-8).
+На данный момент не поддерживаются записанные в формате `\uXXXX\uYYYY` кодовые точки не из basic multilingual plane (они переводятся не в UTF-8, а в CESU-8).

-Следующие функции используют [simdjson](https://github.com/lemire/simdjson) который разработан под более сложные требования для разбора JSON. Упомянутое выше предположение 2 по-прежнему применимо.
+Следующие функции используют [simdjson](https://github.com/lemire/simdjson), который разработан под более сложные требования для разбора JSON. Упомянутое выше допущение 2 по-прежнему применимо.

 ## isValidJSON(json) {#isvalidjsonjson}

@ -292,4 +306,3 @@ SELECT JSONExtractKeysAndValuesRaw('{"a": [-100, 200.0], "b":{"c": {"d": "hello"
 │ [('d','"hello"'),('f','"world"')]                                                                     │
 └───────────────────────────────────────────────────────────────────────────────────────────────────────┘
 ```
-
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@ -63,6 +63,9 @@ DROP COLUMN [IF EXISTS] name

 Запрос удаляет данные из файловой системы. Так как это представляет собой удаление целых файлов, запрос выполняется почти мгновенно.

+!!! warning "Предупреждение"
+    Вы не можете удалить столбец, используемый в [материализованном представлениии](../../../sql-reference/statements/create/view.md#materialized). В противном случае будет ошибка.
+
 Пример:

 ``` sql
@ -155,7 +158,7 @@ ALTER TABLE table_name MODIFY column_name REMOVE property;
 ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
 ```

-## Смотрите также
+**Смотрите также**

 - [REMOVE TTL](ttl.md).

--- a/docs/ru/sql-reference/statements/create/table.md
+++ b/docs/ru/sql-reference/statements/create/table.md
@ -46,15 +46,32 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name AS table_function()
 ### Из запроса SELECT {#from-select-query}

 ``` sql
-CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
+CREATE TABLE [IF NOT EXISTS] [db.]table_name[(name1 [type1], name2 [type2], ...)] ENGINE = engine AS SELECT ...
 ```

-Создаёт таблицу со структурой, как результат запроса `SELECT`, с движком engine, и заполняет её данными из SELECT-а.
+Создаёт таблицу со структурой, как результат запроса `SELECT`, с движком `engine`, и заполняет её данными из `SELECT`. Также вы можете явно задать описание столбцов.

-Во всех случаях, если указано `IF NOT EXISTS`, то запрос не будет возвращать ошибку, если таблица уже существует. В этом случае, запрос будет ничего не делать.
+Если таблица уже существует и указано `IF NOT EXISTS`, то запрос ничего не делает.

 После секции `ENGINE` в запросе могут использоваться и другие секции в зависимости от движка. Подробную документацию по созданию таблиц смотрите в описаниях [движков таблиц](../../../engines/table-engines/index.md#table_engines).

+**Пример**
+
+Запрос:
+
+``` sql
+CREATE TABLE t1 (x String) ENGINE = Memory AS SELECT 1;
+SELECT x, toTypeName(x) FROM t1;
+```
+
+Результат:
+
+```text
+┌─x─┬─toTypeName(x)─┐
+│ 1 │ String        │
+└───┴───────────────┘
+```
+
 ## Модификатор NULL или NOT NULL {#null-modifiers}

 Модификатор `NULL` или `NOT NULL`, указанный после типа данных в определении столбца, позволяет или не позволяет типу данных быть [Nullable](../../../sql-reference/data-types/nullable.md#data_type-nullable). 
@ -230,7 +247,7 @@ CREATE TABLE codec_example
 )
 ENGINE = MergeTree()
 ```
-## Временные таблицы {#vremennye-tablitsy}
+## Временные таблицы {#temporary-tables}

 ClickHouse поддерживает временные таблицы со следующими характеристиками:

--- a/docs/ru/sql-reference/statements/optimize.md
+++ b/docs/ru/sql-reference/statements/optimize.md
@ -5,19 +5,83 @@ toc_title: OPTIMIZE

 # OPTIMIZE {#misc_operations-optimize}

-``` sql
-OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE]
-```
-
-Запрос пытается запустить внеплановый мёрж кусков данных для таблиц семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). Другие движки таблиц не поддерживаются.
-
-Если `OPTIMIZE` применяется к таблицам семейства [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md), ClickHouse создаёт задачу на мёрж и ожидает её исполнения на всех узлах (если активирована настройка `replication_alter_partitions_sync`).
-
-   Если `OPTIMIZE` не выполняет мёрж по любой причине, ClickHouse не оповещает об этом клиента. Чтобы включить оповещения, используйте настройку [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop).
-   Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. [Как задавать имя партиции в запросах](alter/index.md#alter-how-to-specify-part-expr).
-   Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске. Кроме того, слияние является принудительным, даже если выполняются параллельные слияния.
-   Если указать `DEDUPLICATE`, то произойдет схлопывание полностью одинаковых строк (сравниваются значения во всех колонках), имеет смысл только для движка MergeTree.
+Запрос пытается запустить внеплановое слияние кусков данных для таблиц.

 !!! warning "Внимание"
-    Запрос `OPTIMIZE` не может устранить причину появления ошибки «Too many parts».
-    
+    `OPTIMIZE` не устраняет причину появления ошибки `Too many parts`.
+
+**Синтаксис**
+
+``` sql
+OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
+```
+
+Может применяться к таблицам семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md), [MaterializedView](../../engines/table-engines/special/materializedview.md) и [Buffer](../../engines/table-engines/special/buffer.md). Другие движки таблиц не поддерживаются.
+
+Если запрос `OPTIMIZE` применяется к таблицам семейства [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md), ClickHouse создаёт задачу на слияние и ожидает её исполнения на всех узлах (если активирована настройка `replication_alter_partitions_sync`).
+
+-   По умолчанию, если запросу `OPTIMIZE` не удалось выполнить слияние, то
+ClickHouse не оповещает клиента. Чтобы включить оповещения, используйте настройку [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop).
+-   Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. [Как задавать имя партиции в запросах](alter/index.md#alter-how-to-specify-part-expr).
+-   Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске данных. Кроме того, слияние является принудительным, даже если выполняются параллельные слияния.
+-   Если указать `DEDUPLICATE`, то произойдет схлопывание полностью одинаковых строк (сравниваются значения во всех столбцах), имеет смысл только для движка MergeTree.
+
+## Выражение BY {#by-expression}
+
+Чтобы выполнить дедупликацию по произвольному набору столбцов, вы можете явно указать список столбцов или использовать любую комбинацию подстановки [`*`](../../sql-reference/statements/select/index.md#asterisk), выражений [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) и [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier).
+
+ Список столбцов для дедупликации должен включать все столбцы, указанные в условиях сортировки (первичный ключ и ключ сортировки), а также в условиях партиционирования (ключ партиционирования).
+
+ !!! note "Примечание"
+    Обратите внимание, что символ подстановки `*` обрабатывается так же, как и в запросах `SELECT`: столбцы `MATERIALIZED` и `ALIAS` не включаются в результат.
+    Если указать пустой список или выражение, которое возвращает пустой список, или дедуплицировать столбец по псевдониму (`ALIAS`), то сервер вернет ошибку.
+
+
+**Примеры**
+
+Рассмотрим таблицу:
+
+``` sql
+CREATE TABLE example (
+    primary_key Int32,
+    secondary_key Int32,
+    value UInt32,
+    partition_key UInt32,
+    materialized_value UInt32 MATERIALIZED 12345,
+    aliased_value UInt32 ALIAS 2,
+    PRIMARY KEY primary_key
+) ENGINE=MergeTree
+PARTITION BY partition_key;
+```
+
+Прежний способ дедупликации, когда учитываются все столбцы. Строка удаляется только в том случае, если все значения во всех столбцах равны соответствующим значениям в предыдущей строке.
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE;
+```
+
+Дедупликация по всем столбцам, кроме `ALIAS` и `MATERIALIZED`: `primary_key`, `secondary_key`, `value`, `partition_key` и `materialized_value`.
+
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY *;
+```
+
+Дедупликация по всем столбцам, кроме `ALIAS`, `MATERIALIZED` и `materialized_value`: столбцы `primary_key`, `secondary_key`, `value` и `partition_key`.
+
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY * EXCEPT materialized_value;
+```
+
+Дедупликация по столбцам `primary_key`, `secondary_key` и `partition_key`.
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY primary_key, secondary_key, partition_key;
+```
+
+Дедупликация по любому столбцу, соответствующему регулярному выражению: столбцам `primary_key`, `secondary_key` и `partition_key`.
+
+``` sql
+OPTIMIZE TABLE example FINAL DEDUPLICATE BY COLUMNS('.*_key');
+```
--- a/programs/server/CMakeLists.txt
+++ b/programs/server/CMakeLists.txt
@ -19,6 +19,7 @@ set (CLICKHOUSE_SERVER_LINK
        clickhouse_storages_system
        clickhouse_table_functions
        string_utils
+        jemalloc

    ${LINK_RESOURCE_LIB}

--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -101,6 +101,10 @@
 #   include <Server/KeeperTCPHandlerFactory.h>
 #endif

+#if USE_JEMALLOC
+#    include <jemalloc/jemalloc.h>
+#endif
+
 namespace CurrentMetrics
 {
    extern const Metric Revision;
@ -109,11 +113,35 @@ namespace CurrentMetrics
    extern const Metric MaxDDLEntryID;
 }

+#if USE_JEMALLOC
+static bool jemallocOptionEnabled(const char *name)
+{
+    bool value;
+    size_t size = sizeof(value);
+
+    if (mallctl(name, reinterpret_cast<void *>(&value), &size, /* newp= */ nullptr, /* newlen= */ 0))
+        throw Poco::SystemException("mallctl() failed");
+
+    return value;
+}
+#else
+static bool jemallocOptionEnabled(const char *) { return 0; }
+#endif
+

 int mainEntryClickHouseServer(int argc, char ** argv)
 {
    DB::Server app;

+    if (jemallocOptionEnabled("opt.background_thread"))
+    {
+        LOG_ERROR(&app.logger(),
+            "jemalloc.background_thread was requested, "
+            "however ClickHouse uses percpu_arena and background_thread most likely will not give any benefits, "
+            "and also background_thread is not compatible with ClickHouse watchdog "
+            "(that can be disabled with CLICKHOUSE_WATCHDOG_ENABLE=0)");
+    }
+
    /// Do not fork separate process from watchdog if we attached to terminal.
    /// Otherwise it breaks gdb usage.
    /// Can be overridden by environment variable (cannot use server config at this moment).
--- a/src/AggregateFunctions/AggregateFunctionAvg.h
+++ b/src/AggregateFunctions/AggregateFunctionAvg.h
@ -96,7 +96,7 @@ public:
        UInt32 num_scale_ = 0, UInt32 denom_scale_ = 0)
        : Base(argument_types_, {}), num_scale(num_scale_), denom_scale(denom_scale_) {}

-    DataTypePtr getReturnType() const final { return std::make_shared<DataTypeNumber<Float64>>(); }
+    DataTypePtr getReturnType() const override { return std::make_shared<DataTypeNumber<Float64>>(); }

    bool allocatesMemoryInArena() const override { return false; }

--- a/src/AggregateFunctions/AggregateFunctionSumCount.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSumCount.cpp
@ -0,0 +1,49 @@
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/AggregateFunctionSumCount.h>
+#include <AggregateFunctions/Helpers.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include "registerAggregateFunctions.h"
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+bool allowType(const DataTypePtr& type) noexcept
+{
+    const WhichDataType t(type);
+    return t.isInt() || t.isUInt() || t.isFloat() || t.isDecimal();
+}
+
+AggregateFunctionPtr createAggregateFunctionSumCount(const std::string & name, const DataTypes & argument_types, const Array & parameters)
+{
+    assertNoParameters(name, parameters);
+    assertUnary(name, argument_types);
+
+    AggregateFunctionPtr res;
+    DataTypePtr data_type = argument_types[0];
+    if (!allowType(data_type))
+        throw Exception("Illegal type " + data_type->getName() + " of argument for aggregate function " + name,
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    if (isDecimal(data_type))
+        res.reset(createWithDecimalType<AggregateFunctionSumCount>(
+            *data_type, argument_types, getDecimalScale(*data_type)));
+    else
+        res.reset(createWithNumericType<AggregateFunctionSumCount>(*data_type, argument_types));
+
+    return res;
+}
+
+}
+
+void registerAggregateFunctionSumCount(AggregateFunctionFactory & factory)
+{
+    factory.registerFunction("sumCount", createAggregateFunctionSumCount);
+}
+
+}
--- a/src/AggregateFunctions/AggregateFunctionSumCount.h
+++ b/src/AggregateFunctions/AggregateFunctionSumCount.h
@ -0,0 +1,55 @@
+#pragma once
+
+#include <type_traits>
+#include <DataTypes/DataTypeTuple.h>
+#include <AggregateFunctions/AggregateFunctionAvg.h>
+
+
+namespace DB
+{
+template <typename T>
+using DecimalOrNumberDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<AvgFieldType<T>>, DataTypeNumber<AvgFieldType<T>>>;
+template <typename T>
+class AggregateFunctionSumCount final : public AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionSumCount<T>>
+{
+public:
+    using Base = AggregateFunctionAvgBase<AvgFieldType<T>, UInt64, AggregateFunctionSumCount<T>>;
+
+    AggregateFunctionSumCount(const DataTypes & argument_types_, UInt32 num_scale_ = 0)
+         : Base(argument_types_, num_scale_), scale(num_scale_) {}
+
+    DataTypePtr getReturnType() const override
+    {
+        DataTypes types;
+        if constexpr (IsDecimalNumber<T>)
+            types.emplace_back(std::make_shared<DecimalOrNumberDataType<T>>(DecimalOrNumberDataType<T>::maxPrecision(), scale));
+        else
+            types.emplace_back(std::make_shared<DecimalOrNumberDataType<T>>());
+
+        types.emplace_back(std::make_shared<DataTypeUInt64>());
+
+        return std::make_shared<DataTypeTuple>(types);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const final
+    {
+        assert_cast<DecimalOrVectorCol<AvgFieldType<T>> &>((assert_cast<ColumnTuple &>(to)).getColumn(0)).getData().push_back(
+            this->data(place).numerator);
+
+        assert_cast<ColumnUInt64 &>((assert_cast<ColumnTuple &>(to)).getColumn(1)).getData().push_back(
+            this->data(place).denominator);
+    }
+
+    void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final
+    {
+        this->data(place).numerator += static_cast<const DecimalOrVectorCol<T> &>(*columns[0]).getData()[row_num];
+        ++this->data(place).denominator;
+    }
+
+    String getName() const final { return "sumCount"; }
+
+private:
+    UInt32 scale;
+};
+
+}
--- a/src/AggregateFunctions/AggregateFunctionUniq.cpp
+++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp
@ -132,12 +132,6 @@ void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory)

    factory.registerFunction("uniqExact",
        {createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactData<String>>, properties});
-
-#if USE_DATASKETCHES
-    factory.registerFunction("uniqThetaSketch",
-        {createAggregateFunctionUniq<AggregateFunctionUniqThetaSketchData, AggregateFunctionUniqThetaSketchData>, properties});
-#endif
-
 }

 }
--- a/src/AggregateFunctions/AggregateFunctionUniq.h
+++ b/src/AggregateFunctions/AggregateFunctionUniq.h
@ -22,7 +22,6 @@

 #include <AggregateFunctions/UniquesHashSet.h>
 #include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/ThetaSketchData.h>
 #include <AggregateFunctions/UniqVariadicHash.h>


@ -125,19 +124,6 @@ struct AggregateFunctionUniqExactData<String>
 };


-/// uniqThetaSketch
-#if USE_DATASKETCHES
-
-struct AggregateFunctionUniqThetaSketchData
-{
-    using Set = ThetaSketchData<UInt64>;
-    Set set;
-
-    static String getName() { return "uniqThetaSketch"; }
-};
-
-#endif
-
 namespace detail
 {

@ -203,12 +189,6 @@ struct OneAdder
                data.set.insert(key);
            }
        }
-#if USE_DATASKETCHES
-        else if constexpr (std::is_same_v<Data, AggregateFunctionUniqThetaSketchData>)
-        {
-            data.set.insertOriginal(column.getDataAt(row_num));
-        }
-#endif
    }
 };

--- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
+++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h
@ -17,7 +17,7 @@
 #include <IO/WriteHelpers.h>


-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
 #endif
@ -280,7 +280,7 @@ public:

 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/AggregateFunctions/ReservoirSamplerDeterministic.h
+++ b/src/AggregateFunctions/ReservoirSamplerDeterministic.h
@ -163,7 +163,7 @@ public:
        sorted = false;
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wclass-memaccess"
 #endif
@ -191,7 +191,7 @@ public:
        }
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/AggregateFunctions/ThetaSketchData.h
+++ b/src/AggregateFunctions/ThetaSketchData.h
@ -1,119 +0,0 @@
-#pragma once
-
-#if !defined(ARCADIA_BUILD)
-#    include <Common/config.h>
-#endif
-
-#if USE_DATASKETCHES
-
-#include <boost/noncopyable.hpp>
-#include <memory>
-#include <theta_sketch.hpp> // Y_IGNORE
-#include <theta_union.hpp> // Y_IGNORE
-
-
-namespace DB
-{
-
-
-template <typename Key>
-class ThetaSketchData : private boost::noncopyable
-{
-private:
-    std::unique_ptr<datasketches::update_theta_sketch> sk_update;
-    std::unique_ptr<datasketches::theta_union> sk_union;
-
-    inline datasketches::update_theta_sketch * getSkUpdate()
-    {
-        if (!sk_update)
-            sk_update = std::make_unique<datasketches::update_theta_sketch>(datasketches::update_theta_sketch::builder().build());
-        return sk_update.get();
-    }
-
-    inline datasketches::theta_union * getSkUnion()
-    {
-        if (!sk_union)
-            sk_union = std::make_unique<datasketches::theta_union>(datasketches::theta_union::builder().build());
-        return sk_union.get();
-    }
-
-public:
-    using value_type = Key;
-
-    ThetaSketchData() = default;
-    ~ThetaSketchData() = default;
-
-    /// Insert original value without hash, as `datasketches::update_theta_sketch.update` will do the hash internal.
-    void insertOriginal(const StringRef & value)
-    {
-        getSkUpdate()->update(value.data, value.size);
-    }
-
-    /// Note that `datasketches::update_theta_sketch.update` will do the hash again.
-    void insert(Key value)
-    {
-        getSkUpdate()->update(value);
-    }
-
-    UInt64 size() const
-    {
-        if (sk_union)
-            return static_cast<UInt64>(sk_union->get_result().get_estimate());
-        else if (sk_update)
-            return static_cast<UInt64>(sk_update->get_estimate());
-        else
-            return 0;
-    }
-
-    void merge(const ThetaSketchData & rhs)
-    {
-        datasketches::theta_union * u = getSkUnion();
-
-        if (sk_update)
-        {
-            u->update(*sk_update);
-            sk_update.reset(nullptr);
-        }
-
-        if (rhs.sk_update)
-            u->update(*rhs.sk_update);
-        else if (rhs.sk_union)
-            u->update(rhs.sk_union->get_result());
-    }
-
-    /// You can only call for an empty object.
-    void read(DB::ReadBuffer & in)
-    {
-        datasketches::compact_theta_sketch::vector_bytes bytes;
-        readVectorBinary(bytes, in);
-        if (!bytes.empty())
-        {
-            auto sk = datasketches::compact_theta_sketch::deserialize(bytes.data(), bytes.size());
-            getSkUnion()->update(sk);
-        }
-    }
-
-    void write(DB::WriteBuffer & out) const
-    {
-        if (sk_update)
-        {
-            auto bytes = sk_update->compact().serialize();
-            writeVectorBinary(bytes, out);
-        }
-        else if (sk_union)
-        {
-            auto bytes = sk_union->get_result().serialize();
-            writeVectorBinary(bytes, out);
-        }
-        else
-        {
-            datasketches::compact_theta_sketch::vector_bytes bytes;
-            writeVectorBinary(bytes, out);
-        }
-    }
-};
-
-
-}
-
-#endif
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -25,6 +25,7 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory &);
 void registerAggregateFunctionsStatisticsStable(AggregateFunctionFactory &);
 void registerAggregateFunctionsStatisticsSimple(AggregateFunctionFactory &);
 void registerAggregateFunctionSum(AggregateFunctionFactory &);
+void registerAggregateFunctionSumCount(AggregateFunctionFactory &);
 void registerAggregateFunctionSumMap(AggregateFunctionFactory &);
 void registerAggregateFunctionsUniq(AggregateFunctionFactory &);
 void registerAggregateFunctionUniqCombined(AggregateFunctionFactory &);
@ -83,6 +84,7 @@ void registerAggregateFunctions()
        registerAggregateFunctionsStatisticsStable(factory);
        registerAggregateFunctionsStatisticsSimple(factory);
        registerAggregateFunctionSum(factory);
+        registerAggregateFunctionSumCount(factory);
        registerAggregateFunctionSumMap(factory);
        registerAggregateFunctionsUniq(factory);
        registerAggregateFunctionUniqCombined(factory);
--- a/src/AggregateFunctions/ya.make
+++ b/src/AggregateFunctions/ya.make
@ -50,6 +50,7 @@ SRCS(
    AggregateFunctionStatisticsSimple.cpp
    AggregateFunctionStudentTTest.cpp
    AggregateFunctionSum.cpp
+    AggregateFunctionSumCount.cpp
    AggregateFunctionSumMap.cpp
    AggregateFunctionTopK.cpp
    AggregateFunctionUniq.cpp
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@ -111,7 +111,7 @@ public:
    }

 /// Suppress gcc 7.3.1 warning: '*((void*)&<anonymous> +8)' may be used uninitialized in this function
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -128,7 +128,7 @@ public:
        offsets.push_back(new_size);
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Common/Allocator.h
+++ b/src/Common/Allocator.h
@ -277,7 +277,7 @@ private:
  *  GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
  * In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
  */
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wfree-nonheap-object"
 #endif
@ -359,6 +359,6 @@ extern template class Allocator<true, false>;
 extern template class Allocator<false, true>;
 extern template class Allocator<true, true>;

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
--- a/src/Common/UInt128.h
+++ b/src/Common/UInt128.h
@ -19,7 +19,7 @@ namespace DB
 struct UInt128
 {
 /// Suppress gcc7 warnings: 'prev_key.DB::UInt128::low' may be used uninitialized in this function
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -92,7 +92,7 @@ struct UInt128
            return static_cast<T>(low);
    }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

@ -150,7 +150,7 @@ struct DummyUInt256
 {

 /// Suppress gcc7 warnings: 'prev_key.DB::UInt256::a' may be used uninitialized in this function
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -179,7 +179,7 @@ struct DummyUInt256
    bool operator== (const UInt64 rhs) const { return a == rhs && b == 0 && c == 0 && d == 0; }
    bool operator!= (const UInt64 rhs) const { return !operator==(rhs); }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@ -15,4 +15,3 @@
 #cmakedefine01 USE_GRPC
 #cmakedefine01 USE_STATS
 #cmakedefine01 CLICKHOUSE_SPLIT_BINARY
-#cmakedefine01 USE_DATASKETCHES
--- a/src/Common/tests/compact_array.cpp
+++ b/src/Common/tests/compact_array.cpp
@ -1,5 +1,5 @@
 /// Bug in GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"
 #endif
@ -263,6 +263,6 @@ int main()
    return 0;
 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
--- a/src/Common/tests/parallel_aggregation.cpp
+++ b/src/Common/tests/parallel_aggregation.cpp
@ -69,7 +69,7 @@ static void aggregate1(Map & map, Source::const_iterator begin, Source::const_it
        ++map[*it];
 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -122,7 +122,7 @@ static void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source:
    }
 }

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Common/tests/parallel_aggregation2.cpp
+++ b/src/Common/tests/parallel_aggregation2.cpp
@ -62,7 +62,7 @@ struct AggregateIndependent
    }
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -115,7 +115,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
    }
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

@ -265,7 +265,7 @@ struct Creator
    void operator()(Value &) const {}
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -275,7 +275,7 @@ struct Updater
    void operator()(Value & x) const { ++x; }
 };

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@ -96,7 +96,7 @@ template <typename T> bool decimalEqual(T x, T y, UInt32 x_scale, UInt32 y_scale
 template <typename T> bool decimalLess(T x, T y, UInt32 x_scale, UInt32 y_scale);
 template <typename T> bool decimalLessOrEqual(T x, T y, UInt32 x_scale, UInt32 y_scale);

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -159,7 +159,7 @@ private:
    T dec;
    UInt32 scale;
 };
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

@ -563,7 +563,7 @@ public:
        {
            case Types::Null:    return f(field.template get<Null>());
 // gcc 8.2.1
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
@ -583,7 +583,7 @@ public:
            case Types::Int128: return f(field.template get<Int128>());
            case Types::UInt256: return f(field.template get<UInt256>());
            case Types::Int256: return f(field.template get<Int256>());
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
        }
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -424,6 +424,7 @@ class IColumn;
    M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
    M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \
    M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
+    M(Bool, optimize_fuse_sum_count_avg, false, "Fuse aggregate functions sum(), avg(), count() with identical arguments into one sumCount() call, if the query has at least two different functions", 0) \
    M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \
    M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \
    M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
@ -445,6 +446,8 @@ class IColumn;
    M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \
    M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \
    M(UInt64, distributed_ddl_entry_format_version, 1, "Version of DDL entry to write into ZooKeeper", 0) \
+    M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializeMySQL. If equal to 0, this setting is disabled", 0) \
+    M(UInt64, external_storage_max_read_bytes, 0, "Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializeMySQL. If equal to 0, this setting is disabled", 0)  \
    \
    /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
    \
--- a/src/Core/Types.h
+++ b/src/Core/Types.h
@ -15,7 +15,7 @@ namespace DB
 struct Null {};

 /// Ignore strange gcc warning https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55776
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
 #endif
@ -59,7 +59,7 @@ enum class TypeIndex
    LowCardinality,
    Map,
 };
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif

--- a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
+++ b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp
@ -198,7 +198,7 @@ ASTPtr DatabaseConnectionMySQL::getCreateDatabaseQuery() const

 void DatabaseConnectionMySQL::fetchTablesIntoLocalCache(ContextPtr local_context) const
 {
-    const auto & tables_with_modification_time = fetchTablesWithModificationTime();
+    const auto & tables_with_modification_time = fetchTablesWithModificationTime(local_context);

    destroyLocalCacheExtraTables(tables_with_modification_time);
    fetchLatestTablesStructureIntoCache(tables_with_modification_time, local_context);
@ -252,7 +252,7 @@ void DatabaseConnectionMySQL::fetchLatestTablesStructureIntoCache(
    }
 }

-std::map<String, UInt64> DatabaseConnectionMySQL::fetchTablesWithModificationTime() const
+std::map<String, UInt64> DatabaseConnectionMySQL::fetchTablesWithModificationTime(ContextPtr local_context) const
 {
    Block tables_status_sample_block
    {
@ -268,7 +268,8 @@ std::map<String, UInt64> DatabaseConnectionMySQL::fetchTablesWithModificationTim
             " WHERE TABLE_SCHEMA = " << quote << database_name_in_mysql;

    std::map<String, UInt64> tables_with_modification_time;
-    MySQLBlockInputStream result(mysql_pool.get(), query.str(), tables_status_sample_block, DEFAULT_BLOCK_SIZE);
+    StreamSettings mysql_input_stream_settings(local_context->getSettingsRef());
+    MySQLBlockInputStream result(mysql_pool.get(), query.str(), tables_status_sample_block, mysql_input_stream_settings);

    while (Block block = result.read())
    {
@ -292,7 +293,7 @@ DatabaseConnectionMySQL::fetchTablesColumnsList(const std::vector<String> & tabl
            mysql_pool,
            database_name_in_mysql,
            tables_name,
-            settings.external_table_functions_use_nulls,
+            settings,
            database_settings->mysql_datatypes_support_level);
 }

--- a/src/Databases/MySQL/DatabaseConnectionMySQL.h
+++ b/src/Databases/MySQL/DatabaseConnectionMySQL.h
@ -108,7 +108,7 @@ private:

    void fetchTablesIntoLocalCache(ContextPtr context) const;

-    std::map<String, UInt64> fetchTablesWithModificationTime() const;
+    std::map<String, UInt64> fetchTablesWithModificationTime(ContextPtr local_context) const;

    std::map<String, NamesAndTypesList> fetchTablesColumnsList(const std::vector<String> & tables_name, ContextPtr context) const;

--- a/src/Databases/MySQL/FetchTablesColumnsList.cpp
+++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp
@ -44,7 +44,7 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
        mysqlxx::PoolWithFailover & pool,
        const String & database_name,
        const std::vector<String> & tables_name,
-        bool external_table_functions_use_nulls,
+        const Settings & settings,
        MultiEnum<MySQLDataTypesSupport> type_support)
 {
    std::map<String, NamesAndTypesList> tables_and_columns;
@ -78,7 +78,8 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
             " WHERE TABLE_SCHEMA = " << quote << database_name
          << " AND TABLE_NAME IN " << toQueryStringWithQuote(tables_name) << " ORDER BY ORDINAL_POSITION";

-    MySQLBlockInputStream result(pool.get(), query.str(), tables_columns_sample_block, DEFAULT_BLOCK_SIZE);
+    StreamSettings mysql_input_stream_settings(settings);
+    MySQLBlockInputStream result(pool.get(), query.str(), tables_columns_sample_block, mysql_input_stream_settings);
    while (Block block = result.read())
    {
        const auto & table_name_col = *block.getByPosition(0).column;
@ -99,7 +100,7 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
                    convertMySQLDataType(
                            type_support,
                            column_type_col[i].safeGet<String>(),
-                            external_table_functions_use_nulls && is_nullable_col[i].safeGet<UInt64>(),
+                            settings.external_table_functions_use_nulls && is_nullable_col[i].safeGet<UInt64>(),
                            is_unsigned_col[i].safeGet<UInt64>(),
                            char_max_length_col[i].safeGet<UInt64>(),
                            precision_col[i].safeGet<UInt64>(),
--- a/src/Databases/MySQL/FetchTablesColumnsList.h
+++ b/src/Databases/MySQL/FetchTablesColumnsList.h
@ -12,6 +12,7 @@

 #include <map>
 #include <vector>
+#include <Core/Settings.h>

 namespace DB
 {
@ -20,7 +21,7 @@ std::map<String, NamesAndTypesList> fetchTablesColumnsList(
        mysqlxx::PoolWithFailover & pool,
        const String & database_name,
        const std::vector<String> & tables_name,
-        bool external_table_functions_use_nulls,
+        const Settings & settings,
        MultiEnum<MySQLDataTypesSupport> type_support);

 }
--- a/src/Databases/MySQL/MaterializeMetadata.cpp
+++ b/src/Databases/MySQL/MaterializeMetadata.cpp
@ -24,7 +24,8 @@ namespace ErrorCodes
 }

 static std::unordered_map<String, String> fetchTablesCreateQuery(
-    const mysqlxx::PoolWithFailover::Entry & connection, const String & database_name, const std::vector<String> & fetch_tables)
+    const mysqlxx::PoolWithFailover::Entry & connection, const String & database_name,
+    const std::vector<String> & fetch_tables, const Settings & global_settings)
 {
    std::unordered_map<String, String> tables_create_query;
    for (const auto & fetch_table_name : fetch_tables)
@ -34,9 +35,10 @@ static std::unordered_map<String, String> fetchTablesCreateQuery(
            {std::make_shared<DataTypeString>(), "Create Table"},
        };

+        StreamSettings mysql_input_stream_settings(global_settings, false, true);
        MySQLBlockInputStream show_create_table(
            connection, "SHOW CREATE TABLE " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(fetch_table_name),
-            show_create_table_header, DEFAULT_BLOCK_SIZE, false, true);
+            show_create_table_header, mysql_input_stream_settings);

        Block create_query_block = show_create_table.read();
        if (!create_query_block || create_query_block.rows() != 1)
@ -49,13 +51,14 @@ static std::unordered_map<String, String> fetchTablesCreateQuery(
 }


-static std::vector<String> fetchTablesInDB(const mysqlxx::PoolWithFailover::Entry & connection, const std::string & database)
+static std::vector<String> fetchTablesInDB(const mysqlxx::PoolWithFailover::Entry & connection, const std::string & database, const Settings & global_settings)
 {
    Block header{{std::make_shared<DataTypeString>(), "table_name"}};
    String query = "SELECT TABLE_NAME AS table_name FROM INFORMATION_SCHEMA.TABLES  WHERE TABLE_TYPE != 'VIEW' AND TABLE_SCHEMA = " + quoteString(database);

    std::vector<String> tables_in_db;
-    MySQLBlockInputStream input(connection, query, header, DEFAULT_BLOCK_SIZE);
+    StreamSettings mysql_input_stream_settings(global_settings);
+    MySQLBlockInputStream input(connection, query, header, mysql_input_stream_settings);

    while (Block block = input.read())
    {
@ -77,7 +80,8 @@ void MaterializeMetadata::fetchMasterStatus(mysqlxx::PoolWithFailover::Entry & c
        {std::make_shared<DataTypeString>(), "Executed_Gtid_Set"},
    };

-    MySQLBlockInputStream input(connection, "SHOW MASTER STATUS;", header, DEFAULT_BLOCK_SIZE, false, true);
+    StreamSettings mysql_input_stream_settings(settings, false, true);
+    MySQLBlockInputStream input(connection, "SHOW MASTER STATUS;", header, mysql_input_stream_settings);
    Block master_status = input.read();

    if (!master_status || master_status.rows() != 1)
@ -99,7 +103,8 @@ void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailo
    };

    const String & fetch_query = "SHOW VARIABLES WHERE Variable_name = 'binlog_checksum'";
-    MySQLBlockInputStream variables_input(connection, fetch_query, variables_header, DEFAULT_BLOCK_SIZE, false, true);
+    StreamSettings mysql_input_stream_settings(settings, false, true);
+    MySQLBlockInputStream variables_input(connection, fetch_query, variables_header, mysql_input_stream_settings);

    while (Block variables_block = variables_input.read())
    {
@ -114,7 +119,7 @@ void MaterializeMetadata::fetchMasterVariablesValue(const mysqlxx::PoolWithFailo
    }
 }

-static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & connection, WriteBuffer & out)
+static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & connection, const Settings & global_settings, WriteBuffer & out)
 {
    Block sync_user_privs_header
    {
@ -122,7 +127,8 @@ static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & conne
    };

    String grants_query, sub_privs;
-    MySQLBlockInputStream input(connection, "SHOW GRANTS FOR CURRENT_USER();", sync_user_privs_header, DEFAULT_BLOCK_SIZE);
+    StreamSettings mysql_input_stream_settings(global_settings);
+    MySQLBlockInputStream input(connection, "SHOW GRANTS FOR CURRENT_USER();", sync_user_privs_header, mysql_input_stream_settings);
    while (Block block = input.read())
    {
        for (size_t index = 0; index < block.rows(); ++index)
@ -146,11 +152,11 @@ static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & conne
    return false;
 }

-static void checkSyncUserPriv(const mysqlxx::PoolWithFailover::Entry & connection)
+static void checkSyncUserPriv(const mysqlxx::PoolWithFailover::Entry & connection, const Settings & global_settings)
 {
    WriteBufferFromOwnString out;

-    if (!checkSyncUserPrivImpl(connection, out))
+    if (!checkSyncUserPrivImpl(connection, global_settings, out))
        throw Exception("MySQL SYNC USER ACCESS ERR: mysql sync user needs "
                        "at least GLOBAL PRIVILEGES:'RELOAD, REPLICATION SLAVE, REPLICATION CLIENT' "
                        "and SELECT PRIVILEGE on MySQL Database."
@ -167,7 +173,8 @@ bool MaterializeMetadata::checkBinlogFileExists(const mysqlxx::PoolWithFailover:
        {std::make_shared<DataTypeUInt64>(), "File_size"}
    };

-    MySQLBlockInputStream input(connection, "SHOW MASTER LOGS", logs_header, DEFAULT_BLOCK_SIZE, false, true);
+    StreamSettings mysql_input_stream_settings(settings, false, true);
+    MySQLBlockInputStream input(connection, "SHOW MASTER LOGS", logs_header, mysql_input_stream_settings);

    while (Block block = input.read())
    {
@ -222,7 +229,7 @@ void MaterializeMetadata::transaction(const MySQLReplication::Position & positio
    commitMetadata(std::move(fun), persistent_tmp_path, persistent_path);
 }

-MaterializeMetadata::MaterializeMetadata(const String & path_) : persistent_path(path_)
+MaterializeMetadata::MaterializeMetadata(const String & path_, const Settings & settings_) : persistent_path(path_), settings(settings_)
 {
    if (Poco::File(persistent_path).exists())
    {
@ -244,7 +251,7 @@ void MaterializeMetadata::startReplication(
    mysqlxx::PoolWithFailover::Entry & connection, const String & database,
    bool & opened_transaction, std::unordered_map<String, String> & need_dumping_tables)
 {
-    checkSyncUserPriv(connection);
+    checkSyncUserPriv(connection, settings);

    if (checkBinlogFileExists(connection))
      return;
@ -263,7 +270,7 @@ void MaterializeMetadata::startReplication(
        connection->query("START TRANSACTION /*!40100 WITH CONSISTENT SNAPSHOT */;").execute();

        opened_transaction = true;
-        need_dumping_tables = fetchTablesCreateQuery(connection, database, fetchTablesInDB(connection, database));
+        need_dumping_tables = fetchTablesCreateQuery(connection, database, fetchTablesInDB(connection, database, settings), settings);
        connection->query("UNLOCK TABLES;").execute();
    }
    catch (...)
--- a/src/Databases/MySQL/MaterializeMetadata.h
+++ b/src/Databases/MySQL/MaterializeMetadata.h
@ -10,6 +10,7 @@
 #include <Core/MySQL/MySQLReplication.h>
 #include <mysqlxx/Connection.h>
 #include <mysqlxx/PoolWithFailover.h>
+#include <Interpreters/Context.h>

 namespace DB
 {
@ -25,6 +26,7 @@ namespace DB
 struct MaterializeMetadata
 {
    const String persistent_path;
+    const Settings settings;

    String binlog_file;
    UInt64 binlog_position;
@ -50,7 +52,7 @@ struct MaterializeMetadata
        bool & opened_transaction,
        std::unordered_map<String, String> & need_dumping_tables);

-    MaterializeMetadata(const String & path_);
+    MaterializeMetadata(const String & path_, const Settings & settings_);
 };

 }
--- a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp
+++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp
@ -90,7 +90,7 @@ MaterializeMySQLSyncThread::~MaterializeMySQLSyncThread()
    }
 }

-static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection)
+static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const Settings & settings)
 {
    Block variables_header{
        {std::make_shared<DataTypeString>(), "Variable_name"},
@ -104,19 +104,19 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection)
         "OR (Variable_name = 'default_authentication_plugin' AND upper(Value) = 'MYSQL_NATIVE_PASSWORD') "
         "OR (Variable_name = 'log_bin_use_v1_row_events' AND upper(Value) = 'OFF');";

-    MySQLBlockInputStream variables_input(connection, check_query, variables_header, DEFAULT_BLOCK_SIZE, false, true);
+    StreamSettings mysql_input_stream_settings(settings, false, true);
+    MySQLBlockInputStream variables_input(connection, check_query, variables_header, mysql_input_stream_settings);

-    Block variables_block = variables_input.read();
-    if (!variables_block || variables_block.rows() != 5)
+    std::unordered_map<String, String> variables_error_message{
+        {"log_bin", "log_bin = 'ON'"},
+        {"binlog_format", "binlog_format='ROW'"},
+        {"binlog_row_image", "binlog_row_image='FULL'"},
+        {"default_authentication_plugin", "default_authentication_plugin='mysql_native_password'"},
+        {"log_bin_use_v1_row_events", "log_bin_use_v1_row_events='OFF'"}
+    };
+
+    while (Block variables_block = variables_input.read())
    {
-        std::unordered_map<String, String> variables_error_message{
-            {"log_bin", "log_bin = 'ON'"},
-            {"binlog_format", "binlog_format='ROW'"},
-            {"binlog_row_image", "binlog_row_image='FULL'"},
-            {"default_authentication_plugin", "default_authentication_plugin='mysql_native_password'"},
-            {"log_bin_use_v1_row_events", "log_bin_use_v1_row_events='OFF'"}
-        };
-
        ColumnPtr variable_name_column = variables_block.getByName("Variable_name").column;

        for (size_t index = 0; index < variables_block.rows(); ++index)
@ -126,7 +126,10 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection)
            if (error_message_it != variables_error_message.end())
                variables_error_message.erase(error_message_it);
        }
+    }

+    if  (!variables_error_message.empty())
+    {
        bool first = true;
        WriteBufferFromOwnString error_message;
        error_message << "Illegal MySQL variables, the MaterializeMySQL engine requires ";
@ -167,7 +170,7 @@ void MaterializeMySQLSyncThread::synchronization()
    try
    {
        MaterializeMetadata metadata(
-            DatabaseCatalog::instance().getDatabase(database_name)->getMetadataPath() + "/.metadata");
+            DatabaseCatalog::instance().getDatabase(database_name)->getMetadataPath() + "/.metadata", getContext()->getSettingsRef());
        bool need_reconnect = true;

        Stopwatch watch;
@ -240,7 +243,7 @@ void MaterializeMySQLSyncThread::assertMySQLAvailable()
 {
    try
    {
-        checkMySQLVariables(pool.get());
+        checkMySQLVariables(pool.get(), getContext()->getSettingsRef());
    }
    catch (const mysqlxx::ConnectionFailed & e)
    {
@ -326,9 +329,10 @@ static inline void dumpDataForTables(
            tryToExecuteQuery(query_prefix + " " + iterator->second, query_context, database_name, comment); /// create table.

            auto out = std::make_shared<CountingBlockOutputStream>(getTableOutput(database_name, table_name, query_context));
+            StreamSettings mysql_input_stream_settings(context->getSettingsRef());
            MySQLBlockInputStream input(
                connection, "SELECT * FROM " + backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name),
-                out->getHeader(), DEFAULT_BLOCK_SIZE);
+                out->getHeader(), mysql_input_stream_settings);

            Stopwatch watch;
            copyData(input, *out, is_cancelled);
@ -375,7 +379,7 @@ bool MaterializeMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metad

            opened_transaction = false;

-            checkMySQLVariables(connection);
+            checkMySQLVariables(connection, getContext()->getSettingsRef());
            std::unordered_map<String, String> need_dumping_tables;
            metadata.startReplication(connection, mysql_database_name, opened_transaction, need_dumping_tables);

--- a/src/Dictionaries/MySQLDictionarySource.cpp
+++ b/src/Dictionaries/MySQLDictionarySource.cpp
@ -4,9 +4,15 @@
 #include "DictionarySourceFactory.h"
 #include "DictionaryStructure.h"
 #include "registerDictionaries.h"
+#include <Core/Settings.h>
+#include <Interpreters/Context.h>

 namespace DB
 {
+
+[[maybe_unused]]
+static const size_t default_num_tries_on_connection_loss = 3;
+
 namespace ErrorCodes
 {
    extern const int SUPPORT_IS_DISABLED;
@ -14,20 +20,20 @@ namespace ErrorCodes

 void registerDictionarySourceMysql(DictionarySourceFactory & factory)
 {
-    auto create_table_source = [=](const DictionaryStructure & dict_struct,
-                                 const Poco::Util::AbstractConfiguration & config,
-                                 const std::string & config_prefix,
-                                 Block & sample_block,
-                                 ContextPtr /* context */,
+    auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct,
+                                   [[maybe_unused]] const Poco::Util::AbstractConfiguration & config,
+                                   [[maybe_unused]] const std::string & config_prefix,
+                                   [[maybe_unused]] Block & sample_block,
+                                   [[maybe_unused]] ContextPtr context,
                                 const std::string & /* default_database */,
                                 bool /* check_config */) -> DictionarySourcePtr {
 #if USE_MYSQL
-        return std::make_unique<MySQLDictionarySource>(dict_struct, config, config_prefix + ".mysql", sample_block);
+        StreamSettings mysql_input_stream_settings(context->getSettingsRef()
+            , config.getBool(config_prefix + ".mysql.close_connection", false) || config.getBool(config_prefix + ".mysql.share_connection", false)
+            , false
+            , config.getBool(config_prefix + ".mysql.fail_on_connection_loss", false) ? 1 : default_num_tries_on_connection_loss);
+        return std::make_unique<MySQLDictionarySource>(dict_struct, config, config_prefix + ".mysql", sample_block, mysql_input_stream_settings);
 #else
-        (void)dict_struct;
-        (void)config;
-        (void)config_prefix;
-        (void)sample_block;
        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
            "Dictionary source of type `mysql` is disabled because ClickHouse was built without mysql support.");
 #endif
@ -45,22 +51,21 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
 #    include <IO/WriteHelpers.h>
 #    include <common/LocalDateTime.h>
 #    include <common/logger_useful.h>
-#    include <Formats/MySQLBlockInputStream.h>
 #    include "readInvalidateQuery.h"
 #    include <mysqlxx/Exception.h>
 #    include <mysqlxx/PoolFactory.h>
+#    include <Core/Settings.h>

 namespace DB
 {
-static const UInt64 max_block_size = 8192;
-static const size_t default_num_tries_on_connection_loss = 3;


 MySQLDictionarySource::MySQLDictionarySource(
    const DictionaryStructure & dict_struct_,
    const Poco::Util::AbstractConfiguration & config,
    const std::string & config_prefix,
-    const Block & sample_block_)
+    const Block & sample_block_,
+    const StreamSettings & settings_)
    : log(&Poco::Logger::get("MySQLDictionarySource"))
    , update_time{std::chrono::system_clock::from_time_t(0)}
    , dict_struct{dict_struct_}
@ -74,10 +79,7 @@ MySQLDictionarySource::MySQLDictionarySource(
    , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks}
    , load_all_query{query_builder.composeLoadAllQuery()}
    , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}
-    , close_connection(
-            config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false))
-    , max_tries_for_mysql_block_input_stream(
-            config.getBool(config_prefix + ".fail_on_connection_loss", false) ? 1 : default_num_tries_on_connection_loss)
+    , settings(settings_)
 {
 }

@ -98,8 +100,7 @@ MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other
    , last_modification{other.last_modification}
    , invalidate_query{other.invalidate_query}
    , invalidate_query_response{other.invalidate_query_response}
-    , close_connection{other.close_connection}
-    , max_tries_for_mysql_block_input_stream{other.max_tries_for_mysql_block_input_stream}
+    , settings(other.settings)
 {
 }

@ -122,7 +123,7 @@ std::string MySQLDictionarySource::getUpdateFieldAndDate()
 BlockInputStreamPtr MySQLDictionarySource::loadFromQuery(const String & query)
 {
    return std::make_shared<MySQLWithFailoverBlockInputStream>(
-            pool, query, sample_block, max_block_size, close_connection, false, max_tries_for_mysql_block_input_stream);
+            pool, query, sample_block, settings);
 }

 BlockInputStreamPtr MySQLDictionarySource::loadAll()
@ -245,7 +246,7 @@ LocalDateTime MySQLDictionarySource::getLastModification(mysqlxx::Pool::Entry &
                ++fetched_rows;
        }

-        if (close_connection && allow_connection_closure)
+        if (settings.auto_close && allow_connection_closure)
        {
            connection.disconnect();
        }
@ -269,7 +270,7 @@ std::string MySQLDictionarySource::doInvalidateQuery(const std::string & request
    Block invalidate_sample_block;
    ColumnPtr column(ColumnString::create());
    invalidate_sample_block.insert(ColumnWithTypeAndName(column, std::make_shared<DataTypeString>(), "Sample Block"));
-    MySQLBlockInputStream block_input_stream(pool->get(), request, invalidate_sample_block, 1, close_connection);
+    MySQLBlockInputStream block_input_stream(pool->get(), request, invalidate_sample_block, settings);
    return readInvalidateQuery(block_input_stream);
 }

--- a/src/Dictionaries/MySQLDictionarySource.h
+++ b/src/Dictionaries/MySQLDictionarySource.h
@ -12,7 +12,7 @@
 #    include "DictionaryStructure.h"
 #    include "ExternalQueryBuilder.h"
 #    include "IDictionarySource.h"
-
+#    include <Formats/MySQLBlockInputStream.h>

 namespace Poco
 {
@ -35,7 +35,8 @@ public:
        const DictionaryStructure & dict_struct_,
        const Poco::Util::AbstractConfiguration & config,
        const String & config_prefix,
-        const Block & sample_block_);
+        const Block & sample_block_,
+        const StreamSettings & settings_);

    /// copy-constructor is provided in order to support cloneability
    MySQLDictionarySource(const MySQLDictionarySource & other);
@ -87,8 +88,7 @@ private:
    LocalDateTime last_modification;
    std::string invalidate_query;
    mutable std::string invalidate_query_response;
-    const bool close_connection;
-    const size_t max_tries_for_mysql_block_input_stream;
+    const StreamSettings settings;
 };

 }
--- a/src/Disks/S3/registerDiskS3.cpp
+++ b/src/Disks/S3/registerDiskS3.cpp
@ -148,7 +148,8 @@ void registerDiskS3(DiskFactory & factory)
            config.getString(config_prefix + ".secret_access_key", ""),
            config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
            {},
-            config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false))
+            config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", false)),
+            config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false))
        );

        String metadata_path = config.getString(config_prefix + ".metadata_path", context->getPath() + "disks/" + name + "/");
--- a/src/Disks/tests/gtest_disk.cpp
+++ b/src/Disks/tests/gtest_disk.cpp
@ -6,7 +6,7 @@
 #include <Poco/Util/XMLConfiguration.h>


-#if !__clang__
+#if !defined(__clang__)
 #    pragma GCC diagnostic push
 #    pragma GCC diagnostic ignored "-Wsuggest-override"
 #endif
--- a/src/Formats/MySQLBlockInputStream.cpp
+++ b/src/Formats/MySQLBlockInputStream.cpp
@ -30,6 +30,15 @@ namespace ErrorCodes
    extern const int NOT_IMPLEMENTED;
 }

+StreamSettings::StreamSettings(const Settings & settings, bool auto_close_, bool fetch_by_name_, size_t max_retry_)
+    : max_read_mysql_row_nums((settings.external_storage_max_read_rows) ? settings.external_storage_max_read_rows : settings.max_block_size)
+    , max_read_mysql_bytes_size(settings.external_storage_max_read_bytes)
+    , auto_close(auto_close_)
+    , fetch_by_name(fetch_by_name_)
+    , default_num_tries_on_connection_loss(max_retry_)
+{
+}
+
 MySQLBlockInputStream::Connection::Connection(
    const mysqlxx::PoolWithFailover::Entry & entry_,
    const std::string & query_str)
@ -44,29 +53,19 @@ MySQLBlockInputStream::MySQLBlockInputStream(
    const mysqlxx::PoolWithFailover::Entry & entry,
    const std::string & query_str,
    const Block & sample_block,
-    const UInt64 max_block_size_,
-    const bool auto_close_,
-    const bool fetch_by_name_)
+    const StreamSettings & settings_)
    : log(&Poco::Logger::get("MySQLBlockInputStream"))
    , connection{std::make_unique<Connection>(entry, query_str)}
-    , max_block_size{max_block_size_}
-    , auto_close{auto_close_}
-    , fetch_by_name(fetch_by_name_)
+    , settings{std::make_unique<StreamSettings>(settings_)}
 {
    description.init(sample_block);
    initPositionMappingFromQueryResultStructure();
 }

 /// For descendant MySQLWithFailoverBlockInputStream
-MySQLBlockInputStream::MySQLBlockInputStream(
-    const Block & sample_block_,
-    UInt64 max_block_size_,
-    bool auto_close_,
-    bool fetch_by_name_)
+    MySQLBlockInputStream::MySQLBlockInputStream(const Block &sample_block_, const StreamSettings & settings_)
    : log(&Poco::Logger::get("MySQLBlockInputStream"))
-    , max_block_size(max_block_size_)
-    , auto_close(auto_close_)
-    , fetch_by_name(fetch_by_name_)
+    , settings(std::make_unique<StreamSettings>(settings_))
 {
    description.init(sample_block_);
 }
@ -76,14 +75,10 @@ MySQLWithFailoverBlockInputStream::MySQLWithFailoverBlockInputStream(
    mysqlxx::PoolWithFailoverPtr pool_,
    const std::string & query_str_,
    const Block & sample_block_,
-    const UInt64 max_block_size_,
-    const bool auto_close_,
-    const bool fetch_by_name_,
-    const size_t max_tries_)
-    : MySQLBlockInputStream(sample_block_, max_block_size_, auto_close_, fetch_by_name_)
-    , pool(pool_)
-    , query_str(query_str_)
-    , max_tries(max_tries_)
+    const StreamSettings & settings_)
+: MySQLBlockInputStream(sample_block_, settings_)
+, pool(pool_)
+, query_str(query_str_)
 {
 }

@ -101,12 +96,12 @@ void MySQLWithFailoverBlockInputStream::readPrefix()
        }
        catch (const mysqlxx::ConnectionLost & ecl)  /// There are two retriable failures: CR_SERVER_GONE_ERROR, CR_SERVER_LOST
        {
-            LOG_WARNING(log, "Failed connection ({}/{}). Trying to reconnect... (Info: {})", count_connect_attempts, max_tries, ecl.displayText());
+            LOG_WARNING(log, "Failed connection ({}/{}). Trying to reconnect... (Info: {})", count_connect_attempts, settings->default_num_tries_on_connection_loss, ecl.displayText());
        }

-        if (++count_connect_attempts > max_tries)
+        if (++count_connect_attempts > settings->default_num_tries_on_connection_loss)
        {
-            LOG_ERROR(log, "Failed to create connection to MySQL. ({}/{})", count_connect_attempts, max_tries);
+            LOG_ERROR(log, "Failed to create connection to MySQL. ({}/{})", count_connect_attempts, settings->default_num_tries_on_connection_loss);
            throw;
        }
    }
@ -118,45 +113,57 @@ namespace
 {
    using ValueType = ExternalResultDescription::ValueType;

-    void insertValue(const IDataType & data_type, IColumn & column, const ValueType type, const mysqlxx::Value & value)
+    void insertValue(const IDataType & data_type, IColumn & column, const ValueType type, const mysqlxx::Value & value, size_t & read_bytes_size)
    {
        switch (type)
        {
            case ValueType::vtUInt8:
                assert_cast<ColumnUInt8 &>(column).insertValue(value.getUInt());
+                read_bytes_size += 1;
                break;
            case ValueType::vtUInt16:
                assert_cast<ColumnUInt16 &>(column).insertValue(value.getUInt());
+                read_bytes_size += 2;
                break;
            case ValueType::vtUInt32:
                assert_cast<ColumnUInt32 &>(column).insertValue(value.getUInt());
+                read_bytes_size += 4;
                break;
            case ValueType::vtUInt64:
                assert_cast<ColumnUInt64 &>(column).insertValue(value.getUInt());
+                read_bytes_size += 8;
                break;
            case ValueType::vtInt8:
                assert_cast<ColumnInt8 &>(column).insertValue(value.getInt());
+                read_bytes_size += 1;
                break;
            case ValueType::vtInt16:
                assert_cast<ColumnInt16 &>(column).insertValue(value.getInt());
+                read_bytes_size += 2;
                break;
            case ValueType::vtInt32:
                assert_cast<ColumnInt32 &>(column).insertValue(value.getInt());
+                read_bytes_size += 4;
                break;
            case ValueType::vtInt64:
                assert_cast<ColumnInt64 &>(column).insertValue(value.getInt());
+                read_bytes_size += 8;
                break;
            case ValueType::vtFloat32:
                assert_cast<ColumnFloat32 &>(column).insertValue(value.getDouble());
+                read_bytes_size += 4;
                break;
            case ValueType::vtFloat64:
                assert_cast<ColumnFloat64 &>(column).insertValue(value.getDouble());
+                read_bytes_size += 8;
                break;
            case ValueType::vtString:
                assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
+                read_bytes_size += assert_cast<ColumnString &>(column).byteSize();
                break;
            case ValueType::vtDate:
                assert_cast<ColumnUInt16 &>(column).insertValue(UInt16(value.getDate().getDayNum()));
+                read_bytes_size += 2;
                break;
            case ValueType::vtDateTime:
            {
@ -166,10 +173,12 @@ namespace
                if (time < 0)
                    time = 0;
                assert_cast<ColumnUInt32 &>(column).insertValue(time);
+                read_bytes_size += 4;
                break;
            }
            case ValueType::vtUUID:
                assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
+                read_bytes_size += assert_cast<ColumnUInt128 &>(column).byteSize();
                break;
            case ValueType::vtDateTime64:[[fallthrough]];
            case ValueType::vtDecimal32: [[fallthrough]];
@ -179,10 +188,12 @@ namespace
            {
                ReadBuffer buffer(const_cast<char *>(value.data()), value.size(), 0);
                data_type.getDefaultSerialization()->deserializeWholeText(column, buffer, FormatSettings{});
+                read_bytes_size += column.sizeOfValueIfFixed();
                break;
            }
            case ValueType::vtFixedString:
                assert_cast<ColumnFixedString &>(column).insertData(value.data(), value.size());
+                read_bytes_size += column.sizeOfValueIfFixed();
                break;
            default:
                throw Exception("Unsupported value type", ErrorCodes::NOT_IMPLEMENTED);
@ -198,7 +209,7 @@ Block MySQLBlockInputStream::readImpl()
    auto row = connection->result.fetch();
    if (!row)
    {
-        if (auto_close)
+        if (settings->auto_close)
           connection->entry.disconnect();

        return {};
@ -209,6 +220,8 @@ Block MySQLBlockInputStream::readImpl()
        columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty();

    size_t num_rows = 0;
+    size_t read_bytes_size = 0;
+
    while (row)
    {
        for (size_t index = 0; index < position_mapping.size(); ++index)
@ -224,12 +237,12 @@ Block MySQLBlockInputStream::readImpl()
                {
                    ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[index]);
                    const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
-                    insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value);
+                    insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[index].first, value, read_bytes_size);
                    column_nullable.getNullMapData().emplace_back(false);
                }
                else
                {
-                    insertValue(*sample.type, *columns[index], description.types[index].first, value);
+                    insertValue(*sample.type, *columns[index], description.types[index].first, value, read_bytes_size);
                }
            }
            else
@ -245,7 +258,7 @@ Block MySQLBlockInputStream::readImpl()
        }

        ++num_rows;
-        if (num_rows == max_block_size)
+        if (num_rows == settings->max_read_mysql_row_nums || (settings->max_read_mysql_bytes_size && read_bytes_size >= settings->max_read_mysql_bytes_size))
            break;

        row = connection->result.fetch();
@ -257,7 +270,7 @@ void MySQLBlockInputStream::initPositionMappingFromQueryResultStructure()
 {
    position_mapping.resize(description.sample_block.columns());

-    if (!fetch_by_name)
+    if (!settings->fetch_by_name)
    {
        if (description.sample_block.columns() != connection->result.getNumFields())
            throw Exception{"mysqlxx::UseQueryResult contains " + toString(connection->result.getNumFields()) + " columns while "
--- a/src/Formats/MySQLBlockInputStream.h
+++ b/src/Formats/MySQLBlockInputStream.h
@ -6,11 +6,24 @@
 #include <mysqlxx/PoolWithFailover.h>
 #include <mysqlxx/Query.h>
 #include <Core/ExternalResultDescription.h>
-
+#include <Core/Settings.h>

 namespace DB
 {

+struct StreamSettings
+{
+    /// Check if setting is enabled, otherwise use common `max_block_size` setting.
+    size_t max_read_mysql_row_nums;
+    size_t max_read_mysql_bytes_size;
+    bool auto_close;
+    bool fetch_by_name;
+    size_t default_num_tries_on_connection_loss;
+
+    StreamSettings(const Settings & settings, bool auto_close_ = false, bool fetch_by_name_ = false, size_t max_retry_ = 5);
+
+};
+
 /// Allows processing results of a MySQL query as a sequence of Blocks, simplifies chaining
 class MySQLBlockInputStream : public IBlockInputStream
 {
@ -19,16 +32,14 @@ public:
        const mysqlxx::PoolWithFailover::Entry & entry,
        const std::string & query_str,
        const Block & sample_block,
-        const UInt64 max_block_size_,
-        const bool auto_close_ = false,
-        const bool fetch_by_name_ = false);
+        const StreamSettings & settings_);

    String getName() const override { return "MySQL"; }

    Block getHeader() const override { return description.sample_block.cloneEmpty(); }

 protected:
-    MySQLBlockInputStream(const Block & sample_block_, UInt64 max_block_size_, bool auto_close_, bool fetch_by_name_);
+    MySQLBlockInputStream(const Block & sample_block_, const StreamSettings & settings);
    Block readImpl() override;
    void initPositionMappingFromQueryResultStructure();

@ -44,9 +55,7 @@ protected:
    Poco::Logger * log;
    std::unique_ptr<Connection> connection;

-    const UInt64 max_block_size;
-    const bool auto_close;
-    const bool fetch_by_name;
+    const std::unique_ptr<StreamSettings> settings;
    std::vector<size_t> position_mapping;
    ExternalResultDescription description;
 };
@ -57,23 +66,18 @@ protected:
 class MySQLWithFailoverBlockInputStream final : public MySQLBlockInputStream
 {
 public:
-    static constexpr inline auto MAX_TRIES_MYSQL_CONNECT = 5;

    MySQLWithFailoverBlockInputStream(
        mysqlxx::PoolWithFailoverPtr pool_,
        const std::string & query_str_,
        const Block & sample_block_,
-        const UInt64 max_block_size_,
-        const bool auto_close_ = false,
-        const bool fetch_by_name_ = false,
-        const size_t max_tries_ = MAX_TRIES_MYSQL_CONNECT);
+        const StreamSettings & settings_);

 private:
    void readPrefix() override;

    mysqlxx::PoolWithFailoverPtr pool;
    std::string query_str;
-    size_t max_tries;
 };

 }
--- a/src/Functions/GatherUtils/Sources.h
+++ b/src/Functions/GatherUtils/Sources.h
@ -140,7 +140,7 @@ struct NumericArraySource : public ArraySourceImpl<NumericArraySource<T>>


 /// The methods can be virtual or not depending on the template parameter. See IStringSource.
-#if !__clang__
+#if !defined(__clang__)
 #   pragma GCC diagnostic push
 #   pragma GCC diagnostic ignored "-Wsuggest-override"
 #elif __clang_major__ >= 11
@ -233,7 +233,7 @@ struct ConstSource : public Base
    }
 };

-#if !__clang__ || __clang_major__ >= 11
+#if !defined(__clang__) || __clang_major__ >= 11
 #   pragma GCC diagnostic pop
 #endif

--- a/src/Functions/PolygonUtils.h
+++ b/src/Functions/PolygonUtils.h
@ -12,7 +12,7 @@
 /// Warning in boost::geometry during template strategy substitution.
 #pragma GCC diagnostic push

-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif

@ -285,7 +285,7 @@ void PointInPolygonWithGrid<CoordinateType>::calcGridAttributes(
    const Point & max_corner = box.max_corner();

 #pragma GCC diagnostic push
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif

@ -322,7 +322,7 @@ void PointInPolygonWithGrid<CoordinateType>::buildGrid()
    for (size_t row = 0; row < grid_size; ++row)
    {
 #pragma GCC diagnostic push
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
        CoordinateType y_min = min_corner.y() + row * cell_height;
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@ -6,16 +6,20 @@
 #    include <IO/WriteBufferFromString.h>
 #    include <Storages/StorageS3Settings.h>

+#    include <aws/core/Version.h>
 #    include <aws/core/auth/AWSCredentialsProvider.h>
 #    include <aws/core/auth/AWSCredentialsProviderChain.h>
 #    include <aws/core/auth/STSCredentialsProvider.h>
 #    include <aws/core/client/DefaultRetryStrategy.h>
 #    include <aws/core/platform/Environment.h>
+#    include <aws/core/platform/OSVersionInfo.h>
+#    include <aws/core/utils/json/JsonSerializer.h>
 #    include <aws/core/utils/logging/LogMacros.h>
 #    include <aws/core/utils/logging/LogSystemInterface.h>
 #    include <aws/core/utils/HashingUtils.h>
-#    include <aws/s3/S3Client.h>
 #    include <aws/core/http/HttpClientFactory.h>
+#    include <aws/s3/S3Client.h>
+
 #    include <IO/S3/PocoHTTPClientFactory.h>
 #    include <IO/S3/PocoHTTPClient.h>
 #    include <Poco/URI.h>
@ -91,28 +95,289 @@ private:
    std::unordered_map<String, Poco::Logger *> tag_loggers;
 };

+class AWSEC2MetadataClient : public Aws::Internal::AWSHttpResourceClient
+{
+    static constexpr char EC2_SECURITY_CREDENTIALS_RESOURCE[] = "/latest/meta-data/iam/security-credentials";
+    static constexpr char EC2_IMDS_TOKEN_RESOURCE[] = "/latest/api/token";
+    static constexpr char EC2_IMDS_TOKEN_HEADER[] = "x-aws-ec2-metadata-token";
+    static constexpr char EC2_IMDS_TOKEN_TTL_DEFAULT_VALUE[] = "21600";
+    static constexpr char EC2_IMDS_TOKEN_TTL_HEADER[] = "x-aws-ec2-metadata-token-ttl-seconds";
+
+    static constexpr char EC2_DEFAULT_METADATA_ENDPOINT[] = "http://169.254.169.254";
+
+public:
+    /// See EC2MetadataClient.
+
+    explicit AWSEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration)
+        : Aws::Internal::AWSHttpResourceClient(client_configuration)
+        , logger(&Poco::Logger::get("AWSEC2InstanceProfileConfigLoader"))
+    {
+    }
+
+    AWSEC2MetadataClient& operator =(const AWSEC2MetadataClient & rhs) = delete;
+    AWSEC2MetadataClient(const AWSEC2MetadataClient & rhs) = delete;
+    AWSEC2MetadataClient& operator =(const AWSEC2MetadataClient && rhs) = delete;
+    AWSEC2MetadataClient(const AWSEC2MetadataClient && rhs) = delete;
+
+    virtual ~AWSEC2MetadataClient() override = default;
+
+    using Aws::Internal::AWSHttpResourceClient::GetResource;
+
+    virtual Aws::String GetResource(const char * resource_path) const
+    {
+        return GetResource(endpoint.c_str(), resource_path, nullptr/*authToken*/);
+    }
+
+    virtual Aws::String getDefaultCredentials() const
+    {
+        String credentials_string;
+        {
+            std::unique_lock<std::recursive_mutex> locker(token_mutex);
+
+            LOG_TRACE(logger, "Getting default credentials for EC2 instance.");
+            auto result = GetResourceWithAWSWebServiceResult(endpoint.c_str(), EC2_SECURITY_CREDENTIALS_RESOURCE, nullptr);
+            credentials_string = result.GetPayload();
+            if (result.GetResponseCode() == Aws::Http::HttpResponseCode::UNAUTHORIZED)
+            {
+                return {};
+            }
+        }
+
+        String trimmed_credentials_string = Aws::Utils::StringUtils::Trim(credentials_string.c_str());
+        if (trimmed_credentials_string.empty())
+            return {};
+
+        std::vector<String> security_credentials = Aws::Utils::StringUtils::Split(trimmed_credentials_string, '\n');
+
+        LOG_DEBUG(logger, "Calling EC2MetadataService resource, {} returned credential string {}.",
+                EC2_SECURITY_CREDENTIALS_RESOURCE, trimmed_credentials_string);
+
+        if (security_credentials.empty())
+        {
+            LOG_WARNING(logger, "Initial call to EC2MetadataService to get credentials failed.");
+            return {};
+        }
+
+        Aws::StringStream ss;
+        ss << EC2_SECURITY_CREDENTIALS_RESOURCE << "/" << security_credentials[0];
+        LOG_DEBUG(logger, "Calling EC2MetadataService resource {}.", ss.str());
+        return GetResource(ss.str().c_str());
+    }
+
+    static Aws::String awsComputeUserAgentString()
+    {
+        Aws::StringStream ss;
+        ss << "aws-sdk-cpp/" << Aws::Version::GetVersionString() << " " << Aws::OSVersionInfo::ComputeOSVersionString()
+                << " " << Aws::Version::GetCompilerVersionString();
+        return ss.str();
+    }
+
+    virtual Aws::String getDefaultCredentialsSecurely() const
+    {
+        String user_agent_string = awsComputeUserAgentString();
+        String new_token;
+
+        {
+            std::unique_lock<std::recursive_mutex> locker(token_mutex);
+
+            Aws::StringStream ss;
+            ss << endpoint << EC2_IMDS_TOKEN_RESOURCE;
+            std::shared_ptr<Aws::Http::HttpRequest> token_request(Aws::Http::CreateHttpRequest(ss.str(), Aws::Http::HttpMethod::HTTP_PUT,
+                                                                        Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
+            token_request->SetHeaderValue(EC2_IMDS_TOKEN_TTL_HEADER, EC2_IMDS_TOKEN_TTL_DEFAULT_VALUE);
+            token_request->SetUserAgent(user_agent_string);
+            LOG_TRACE(logger, "Calling EC2MetadataService to get token.");
+            auto result = GetResourceWithAWSWebServiceResult(token_request);
+            const String & token_string = result.GetPayload();
+            new_token = Aws::Utils::StringUtils::Trim(token_string.c_str());
+
+            if (result.GetResponseCode() == Aws::Http::HttpResponseCode::BAD_REQUEST)
+            {
+                return {};
+            }
+            else if (result.GetResponseCode() != Aws::Http::HttpResponseCode::OK || new_token.empty())
+            {
+                LOG_TRACE(logger, "Calling EC2MetadataService to get token failed, falling back to less secure way.");
+                return getDefaultCredentials();
+            }
+            token = new_token;
+        }
+
+        String url = endpoint + EC2_SECURITY_CREDENTIALS_RESOURCE;
+        std::shared_ptr<Aws::Http::HttpRequest> profile_request(Aws::Http::CreateHttpRequest(url,
+                Aws::Http::HttpMethod::HTTP_GET,
+                Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
+        profile_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, new_token);
+        profile_request->SetUserAgent(user_agent_string);
+        String profile_string = GetResourceWithAWSWebServiceResult(profile_request).GetPayload();
+
+        String trimmed_profile_string = Aws::Utils::StringUtils::Trim(profile_string.c_str());
+        std::vector<String> security_credentials = Aws::Utils::StringUtils::Split(trimmed_profile_string, '\n');
+
+        LOG_DEBUG(logger, "Calling EC2MetadataService resource, {} with token returned profile string {}.",
+                EC2_SECURITY_CREDENTIALS_RESOURCE, trimmed_profile_string);
+
+        if (security_credentials.empty())
+        {
+            LOG_WARNING(logger, "Calling EC2Metadataservice to get profiles failed.");
+            return {};
+        }
+
+        Aws::StringStream ss;
+        ss << endpoint << EC2_SECURITY_CREDENTIALS_RESOURCE << "/" << security_credentials[0];
+        std::shared_ptr<Aws::Http::HttpRequest> credentials_request(Aws::Http::CreateHttpRequest(ss.str(),
+                Aws::Http::HttpMethod::HTTP_GET,
+                Aws::Utils::Stream::DefaultResponseStreamFactoryMethod));
+        credentials_request->SetHeaderValue(EC2_IMDS_TOKEN_HEADER, new_token);
+        credentials_request->SetUserAgent(user_agent_string);
+        LOG_DEBUG(logger, "Calling EC2MetadataService resource {} with token.", ss.str());
+        return GetResourceWithAWSWebServiceResult(credentials_request).GetPayload();
+    }
+
+    virtual Aws::String getCurrentRegion() const
+    {
+        return Aws::Region::AWS_GLOBAL;
+    }
+
+private:
+    const Aws::String endpoint = EC2_DEFAULT_METADATA_ENDPOINT;
+    mutable std::recursive_mutex token_mutex;
+    mutable Aws::String token;
+    Poco::Logger * logger;
+};
+
+class AWSEC2InstanceProfileConfigLoader : public Aws::Config::AWSProfileConfigLoader
+{
+public:
+    explicit AWSEC2InstanceProfileConfigLoader(const std::shared_ptr<AWSEC2MetadataClient> & client_, bool use_secure_pull_)
+        : client(client_)
+        , use_secure_pull(use_secure_pull_)
+        , logger(&Poco::Logger::get("AWSEC2InstanceProfileConfigLoader"))
+    {
+    }
+
+    virtual ~AWSEC2InstanceProfileConfigLoader() override = default;
+
+protected:
+    virtual bool LoadInternal() override
+    {
+        auto credentials_str = use_secure_pull ? client->getDefaultCredentialsSecurely() : client->getDefaultCredentials();
+
+        /// See EC2InstanceProfileConfigLoader.
+        if (credentials_str.empty())
+            return false;
+
+        Aws::Utils::Json::JsonValue credentials_doc(credentials_str);
+        if (!credentials_doc.WasParseSuccessful())
+        {
+            LOG_ERROR(logger, "Failed to parse output from EC2MetadataService.");
+            return false;
+        }
+        String access_key, secret_key, token;
+
+        auto credentials_view = credentials_doc.View();
+        access_key = credentials_view.GetString("AccessKeyId");
+        LOG_ERROR(logger, "Successfully pulled credentials from EC2MetadataService with access key {}.", access_key);
+
+        secret_key = credentials_view.GetString("SecretAccessKey");
+        token = credentials_view.GetString("Token");
+
+        auto region = client->getCurrentRegion();
+
+        Aws::Config::Profile profile;
+        profile.SetCredentials(Aws::Auth::AWSCredentials(access_key, secret_key, token));
+        profile.SetRegion(region);
+        profile.SetName(Aws::Config::INSTANCE_PROFILE_KEY);
+
+        m_profiles[Aws::Config::INSTANCE_PROFILE_KEY] = profile;
+
+        return true;
+    }
+
+private:
+    std::shared_ptr<AWSEC2MetadataClient> client;
+    bool use_secure_pull;
+    Poco::Logger * logger;
+};
+
+class AWSInstanceProfileCredentialsProvider : public Aws::Auth::AWSCredentialsProvider
+{
+public:
+    /// See InstanceProfileCredentialsProvider.
+
+    explicit AWSInstanceProfileCredentialsProvider(const std::shared_ptr<AWSEC2InstanceProfileConfigLoader> & config_loader)
+        : ec2_metadata_config_loader(config_loader)
+        , load_frequency_ms(Aws::Auth::REFRESH_THRESHOLD)
+        , logger(&Poco::Logger::get("AWSInstanceProfileCredentialsProvider"))
+    {
+        LOG_INFO(logger, "Creating Instance with injected EC2MetadataClient and refresh rate {}.");
+    }
+
+    Aws::Auth::AWSCredentials GetAWSCredentials() override
+    {
+        refreshIfExpired();
+        Aws::Utils::Threading::ReaderLockGuard guard(m_reloadLock);
+        auto profile_it = ec2_metadata_config_loader->GetProfiles().find(Aws::Config::INSTANCE_PROFILE_KEY);
+
+        if (profile_it != ec2_metadata_config_loader->GetProfiles().end())
+        {
+            return profile_it->second.GetCredentials();
+        }
+
+        return Aws::Auth::AWSCredentials();
+    }
+
+protected:
+    void Reload() override
+    {
+        LOG_INFO(logger, "Credentials have expired attempting to repull from EC2 Metadata Service.");
+        ec2_metadata_config_loader->Load();
+        AWSCredentialsProvider::Reload();
+    }
+
+private:
+    void refreshIfExpired()
+    {
+        LOG_DEBUG(logger, "Checking if latest credential pull has expired.");
+        Aws::Utils::Threading::ReaderLockGuard guard(m_reloadLock);
+        if (!IsTimeToRefresh(load_frequency_ms))
+        {
+            return;
+        }
+
+        guard.UpgradeToWriterLock();
+        if (!IsTimeToRefresh(load_frequency_ms)) // double-checked lock to avoid refreshing twice
+        {
+            return;
+        }
+        Reload();
+    }
+
+    std::shared_ptr<AWSEC2InstanceProfileConfigLoader> ec2_metadata_config_loader;
+    Int64 load_frequency_ms;
+    Poco::Logger * logger;
+};
+
 class S3CredentialsProviderChain : public Aws::Auth::AWSCredentialsProviderChain
 {
 public:
-    explicit S3CredentialsProviderChain(const DB::S3::PocoHTTPClientConfiguration & configuration, const Aws::Auth::AWSCredentials & credentials, bool use_environment_credentials)
+    explicit S3CredentialsProviderChain(const DB::S3::PocoHTTPClientConfiguration & configuration, const Aws::Auth::AWSCredentials & credentials, bool use_environment_credentials, bool use_insecure_imds_request)
    {
+        auto * logger = &Poco::Logger::get("S3CredentialsProviderChain");
+
        if (use_environment_credentials)
        {
-            const DB::RemoteHostFilter & remote_host_filter = configuration.remote_host_filter;
-            const unsigned int s3_max_redirects = configuration.s3_max_redirects;
-
            static const char AWS_ECS_CONTAINER_CREDENTIALS_RELATIVE_URI[] = "AWS_CONTAINER_CREDENTIALS_RELATIVE_URI";
            static const char AWS_ECS_CONTAINER_CREDENTIALS_FULL_URI[] = "AWS_CONTAINER_CREDENTIALS_FULL_URI";
            static const char AWS_ECS_CONTAINER_AUTHORIZATION_TOKEN[] = "AWS_CONTAINER_AUTHORIZATION_TOKEN";
            static const char AWS_EC2_METADATA_DISABLED[] = "AWS_EC2_METADATA_DISABLED";

-            auto * logger = &Poco::Logger::get("S3CredentialsProviderChain");
-
            /// The only difference from DefaultAWSCredentialsProviderChain::DefaultAWSCredentialsProviderChain()
            /// is that this chain uses custom ClientConfiguration.

            AddProvider(std::make_shared<Aws::Auth::EnvironmentAWSCredentialsProvider>());
            AddProvider(std::make_shared<Aws::Auth::ProfileConfigFileAWSCredentialsProvider>());
+            AddProvider(std::make_shared<Aws::Auth::ProcessCredentialsProvider>());
            AddProvider(std::make_shared<Aws::Auth::STSAssumeRoleWebIdentityCredentialsProvider>());

            /// ECS TaskRole Credentials only available when ENVIRONMENT VARIABLE is set.
@ -145,7 +410,7 @@ public:
            }
            else if (Aws::Utils::StringUtils::ToLower(ec2_metadata_disabled.c_str()) != "true")
            {
-                DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(remote_host_filter, s3_max_redirects);
+                DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.remote_host_filter, configuration.s3_max_redirects);

                /// See MakeDefaultHttpResourceClientConfiguration().
                /// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside
@ -163,13 +428,16 @@ public:
                /// EC2MetadataService throttles by delaying the response so the service client should set a large read timeout.
                /// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds.
                aws_client_configuration.connectTimeoutMs = 1000;
+
+                /// FIXME. Somehow this timeout does not work in docker without --net=host.
                aws_client_configuration.requestTimeoutMs = 1000;
+
                aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);

-                auto ec2_metadata_client = std::make_shared<Aws::Internal::EC2MetadataClient>(aws_client_configuration);
-                auto config_loader = std::make_shared<Aws::Config::EC2InstanceProfileConfigLoader>(ec2_metadata_client);
+                auto ec2_metadata_client = std::make_shared<AWSEC2MetadataClient>(aws_client_configuration);
+                auto config_loader = std::make_shared<AWSEC2InstanceProfileConfigLoader>(ec2_metadata_client, !use_insecure_imds_request);

-                AddProvider(std::make_shared<Aws::Auth::InstanceProfileCredentialsProvider>(config_loader));
+                AddProvider(std::make_shared<AWSInstanceProfileCredentialsProvider>(config_loader));
                LOG_INFO(logger, "Added EC2 metadata service credentials provider to the provider chain.");
            }
        }
@ -185,12 +453,14 @@ public:
        const Aws::Client::ClientConfiguration & client_configuration,
        const Aws::Auth::AWSCredentials & credentials,
        const DB::HeaderCollection & headers_,
-        bool use_environment_credentials)
+        bool use_environment_credentials,
+        bool use_insecure_imds_request)
        : Aws::Client::AWSAuthV4Signer(
            std::make_shared<S3CredentialsProviderChain>(
                static_cast<const DB::S3::PocoHTTPClientConfiguration &>(client_configuration),
                credentials,
-                use_environment_credentials),
+                use_environment_credentials,
+                use_insecure_imds_request),
            "s3",
            client_configuration.region,
            Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
@ -281,7 +551,8 @@ namespace S3
        const String & secret_access_key,
        const String & server_side_encryption_customer_key_base64,
        HeaderCollection headers,
-        bool use_environment_credentials)
+        bool use_environment_credentials,
+        bool use_insecure_imds_request)
    {
        PocoHTTPClientConfiguration client_configuration = cfg_;
        client_configuration.updateSchemeAndRegion();
@ -308,7 +579,8 @@ namespace S3
            client_configuration,
            std::move(credentials),
            std::move(headers),
-            use_environment_credentials);
+            use_environment_credentials,
+            use_insecure_imds_request);

        return std::make_shared<Aws::S3::S3Client>(
            std::move(auth_signer),
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@ -38,7 +38,8 @@ public:
        const String & secret_access_key,
        const String & server_side_encryption_customer_key_base64,
        HeaderCollection headers,
-        bool use_environment_credentials);
+        bool use_environment_credentials,
+        bool use_insecure_imds_request);

    PocoHTTPClientConfiguration createClientConfiguration(
        const RemoteHostFilter & remote_host_filter,
--- a/src/IO/parseDateTimeBestEffort.cpp
+++ b/src/IO/parseDateTimeBestEffort.cpp
@ -46,7 +46,7 @@ inline size_t readAlpha(char * res, size_t max_chars, ReadBuffer & in)
 }

 #if defined(__PPC__)
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif
 #endif
@ -634,7 +634,7 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf
 }

 #if defined(__PPC__)
-#if !__clang__
+#if !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
 #endif
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@ -129,6 +129,7 @@ struct QueryPlanSettings
            {"header", query_plan_options.header},
            {"description", query_plan_options.description},
            {"actions", query_plan_options.actions},
+            {"indexes", query_plan_options.indexes},
            {"optimize", optimize},
    };
 };
--- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp
+++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp
@ -4,6 +4,7 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTIdentifier.h>
 #include <DataTypes/FieldToDataType.h>
+#include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>

 namespace
@ -13,7 +14,7 @@ using namespace DB;

 Field executeFunctionOnField(
    const Field & field, const std::string & name,
-    const ExpressionActionsPtr & expr,
+    const ExpressionActionsPtr & sharding_expr,
    const std::string & sharding_key_column_name)
 {
    DataTypePtr type = applyVisitor(FieldToDataType{}, field);
@ -25,17 +26,23 @@ Field executeFunctionOnField(

    Block block{column};
    size_t num_rows = 1;
-    expr->execute(block, num_rows);
+    sharding_expr->execute(block, num_rows);

    ColumnWithTypeAndName & ret = block.getByName(sharding_key_column_name);
    return (*ret.column)[0];
 }

-/// Return true if shard may contain such value (or it is unknown), otherwise false.
+/// @param sharding_column_value - one of values from IN
+/// @param sharding_column_name - name of that column
+/// @param sharding_expr - expression of sharding_key for the Distributed() table
+/// @param sharding_key_column_name - name of the column for sharding_expr
+/// @param shard_info - info for the current shard (to compare shard_num with calculated)
+/// @param slots - weight -> shard mapping
+/// @return true if shard may contain such value (or it is unknown), otherwise false.
 bool shardContains(
    const Field & sharding_column_value,
    const std::string & sharding_column_name,
-    const ExpressionActionsPtr & expr,
+    const ExpressionActionsPtr & sharding_expr,
    const std::string & sharding_key_column_name,
    const Cluster::ShardInfo & shard_info,
    const Cluster::SlotToShard & slots)
@ -45,7 +52,14 @@ bool shardContains(
    if (sharding_column_value.isNull())
        return false;

-    Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, expr, sharding_key_column_name);
+    Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, sharding_expr, sharding_key_column_name);
+    /// The value from IN can be non-numeric,
+    /// but in this case it should be convertible to numeric type, let's try.
+    sharding_value = convertFieldToType(sharding_value, DataTypeUInt64());
+    /// In case of conversion is not possible (NULL), shard cannot contain the value anyway.
+    if (sharding_value.isNull())
+        return false;
+
    UInt64 value = sharding_value.get<UInt64>();
    const auto shard_num = slots[value % slots.size()] + 1;
    return shard_info.shard_num == shard_num;
@ -78,10 +92,10 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d
    if (!identifier)
        return;

-    const auto & expr = data.sharding_key_expr;
+    const auto & sharding_expr = data.sharding_key_expr;
    const auto & sharding_key_column_name = data.sharding_key_column_name;

-    if (!expr->getRequiredColumnsWithTypes().contains(identifier->name()))
+    if (!sharding_expr->getRequiredColumnsWithTypes().contains(identifier->name()))
        return;

    /// NOTE: that we should not take care about empty tuple,
@ -93,7 +107,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d
        std::erase_if(tuple_elements->children, [&](auto & child)
        {
            auto * literal = child->template as<ASTLiteral>();
-            return literal && !shardContains(literal->value, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots);
+            return literal && !shardContains(literal->value, identifier->name(), sharding_expr, sharding_key_column_name, data.shard_info, data.slots);
        });
    }
    else if (auto * tuple_literal = right->as<ASTLiteral>();
@ -102,7 +116,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d
        auto & tuple = tuple_literal->value.get<Tuple &>();
        std::erase_if(tuple, [&](auto & child)
        {
-            return !shardContains(child, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots);
+            return !shardContains(child, identifier->name(), sharding_expr, sharding_key_column_name, data.shard_info, data.slots);
        });
    }
 }
--- a/src/Interpreters/RemoveInjectiveFunctionsVisitor.cpp
+++ b/src/Interpreters/RemoveInjectiveFunctionsVisitor.cpp
@ -12,8 +12,7 @@ namespace DB
 static bool isUniq(const ASTFunction & func)
 {
    return func.name == "uniq" || func.name == "uniqExact" || func.name == "uniqHLL12"
-        || func.name == "uniqCombined" || func.name == "uniqCombined64"
-        || func.name == "uniqThetaSketch";
+        || func.name == "uniqCombined" || func.name == "uniqCombined64";
 }

 /// Remove injective functions of one argument: replace with a child
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@ -26,6 +26,7 @@
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Parsers/queryToString.h>
+#include <Parsers/ASTLiteral.h>

 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/DataTypeNullable.h>
@ -181,8 +182,72 @@ struct CustomizeAggregateFunctionsMoveSuffixData
    }
 };

+struct FuseSumCountAggregates
+{
+    std::vector<ASTFunction *> sums {};
+    std::vector<ASTFunction *> counts {};
+    std::vector<ASTFunction *> avgs {};
+
+    void addFuncNode(ASTFunction * func)
+    {
+        if (func->name == "sum")
+            sums.push_back(func);
+        else if (func->name == "count")
+            counts.push_back(func);
+        else
+        {
+            assert(func->name == "avg");
+            avgs.push_back(func);
+        }
+    }
+
+    bool canBeFused() const
+    {
+        // Need at least two different kinds of functions to fuse.
+        if (sums.empty() && counts.empty())
+            return false;
+        if (sums.empty() && avgs.empty())
+            return false;
+        if (counts.empty() && avgs.empty())
+            return false;
+        return true;
+    }
+};
+
+struct FuseSumCountAggregatesVisitorData
+{
+    using TypeToVisit = ASTFunction;
+
+    std::unordered_map<String, FuseSumCountAggregates> fuse_map;
+
+    void visit(ASTFunction & func, ASTPtr &)
+    {
+        if (func.name == "sum" || func.name == "avg" || func.name == "count")
+        {
+            if (func.arguments->children.empty())
+                return;
+
+            // Probably we can extend it to match count() for non-nullable argument
+            // to sum/avg with any other argument. Now we require strict match.
+            const auto argument = func.arguments->children.at(0)->getColumnName();
+            auto it = fuse_map.find(argument);
+            if (it != fuse_map.end())
+            {
+                it->second.addFuncNode(&func);
+            }
+            else
+            {
+                FuseSumCountAggregates funcs{};
+                funcs.addFuncNode(&func);
+                fuse_map[argument] = funcs;
+            }
+        }
+    }
+};
+
 using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeAggregateFunctionsSuffixData>, true>;
 using CustomizeAggregateFunctionsMoveOrNullVisitor = InDepthNodeVisitor<OneTypeMatcher<CustomizeAggregateFunctionsMoveSuffixData>, true>;
+using FuseSumCountAggregatesVisitor = InDepthNodeVisitor<OneTypeMatcher<FuseSumCountAggregatesVisitorData>, true>;

 /// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form.
 /// Expand asterisks and qualified asterisks with column names.
@ -200,6 +265,49 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query
        throw Exception("Empty list of columns in SELECT query", ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED);
 }

+// Replaces one avg/sum/count function with an appropriate expression with
+// sumCount().
+void replaceWithSumCount(String column_name, ASTFunction & func)
+{
+    auto func_base = makeASTFunction("sumCount", std::make_shared<ASTIdentifier>(column_name));
+    auto exp_list = std::make_shared<ASTExpressionList>();
+    if (func.name == "sum" || func.name == "count")
+    {
+        /// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2
+        UInt8 idx = (func.name == "sum" ? 1 : 2);
+        func.name = "tupleElement";
+        exp_list->children.push_back(func_base);
+        exp_list->children.push_back(std::make_shared<ASTLiteral>(idx));
+    }
+    else
+    {
+        /// Rewrite "avg" to sumCount().1 / sumCount().2
+        auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(1)));
+        auto new_arg2 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(2)));
+        func.name = "divide";
+        exp_list->children.push_back(new_arg1);
+        exp_list->children.push_back(new_arg2);
+    }
+    func.arguments = exp_list;
+    func.children.push_back(func.arguments);
+}
+
+void fuseSumCountAggregates(std::unordered_map<String, FuseSumCountAggregates> & fuse_map)
+{
+    for (auto & it : fuse_map)
+    {
+        if (it.second.canBeFused())
+        {
+            for (auto & func: it.second.sums)
+                replaceWithSumCount(it.first, *func);
+            for (auto & func: it.second.avgs)
+                replaceWithSumCount(it.first, *func);
+            for (auto & func: it.second.counts)
+                replaceWithSumCount(it.first, *func);
+        }
+    }
+}
+
 bool hasArrayJoin(const ASTPtr & ast)
 {
    if (const ASTFunction * function = ast->as<ASTFunction>())
@ -910,7 +1018,18 @@ void TreeRewriter::normalize(ASTPtr & query, Aliases & aliases, const NameSet &
        CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query);
    }

-    // Rewrite all aggregate functions to add -OrNull suffix to them
+    // Try to fuse sum/avg/count with identical arguments to one sumCount call,
+    // if we have at least two different functions. E.g. we will replace sum(x)
+    // and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will
+    // be calculated only once because of CSE.
+    if (settings.optimize_fuse_sum_count_avg)
+    {
+        FuseSumCountAggregatesVisitor::Data data;
+        FuseSumCountAggregatesVisitor(data).visit(query);
+        fuseSumCountAggregates(data.fuse_map);
+    }
+
+    /// Rewrite all aggregate functions to add -OrNull suffix to them
    if (settings.aggregate_functions_null_for_empty)
    {
        CustomizeAggregateFunctionsOrNullVisitor::Data data_or_null{"OrNull"};
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@ -3,6 +3,7 @@
 #include <Common/quoteString.h>
 #include <Common/SipHash.h>
 #include <Common/typeid_cast.h>
+#include <DataTypes/NumberTraits.h>
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/WriteHelpers.h>
@ -234,7 +235,11 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
                 * interpreted as a comment. Instead, negate the literal
                 * in place. Another possible solution is to use parentheses,
                 * but the old comment said it is impossible, without mentioning
-                 * the reason.
+                 * the reason. We should also negate the nonnegative literals,
+                 * for symmetry. We print the negated value without parentheses,
+                 * because they are not needed around a single literal. Also we
+                 * use formatting from FieldVisitorToString, so that the type is
+                 * preserved (e.g. -0. is printed with trailing period).
                 */
                if (literal && name == "negate")
                {
@ -251,26 +256,18 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
                            {
                                // The parser doesn't create decimal literals, but
                                // they can be produced by constant folding or the
-                                // fuzzer.
+                                // fuzzer. Decimals are always signed, so no need
+                                // to deduce the result type like we do for ints.
                                const auto int_value = value.getValue().value;
-                                // We compare to zero so we don't care about scale.
-                                if (int_value >= 0)
-                                {
-                                    return false;
-                                }
-
-                                settings.ostr << ValueType{-int_value,
-                                    value.getScale()};
+                                settings.ostr << FieldVisitorToString{}(ValueType{
+                                    -int_value,
+                                    value.getScale()});
                            }
                            else if constexpr (std::is_arithmetic_v<ValueType>)
                            {
-                                if (value >= 0)
-                                {
-                                    return false;
-                                }
-                                // We don't need parentheses around a single
-                                // literal.
-                                settings.ostr << -value;
+                                using ResultType = typename NumberTraits::ResultOfNegate<ValueType>::Type;
+                                settings.ostr << FieldVisitorToString{}(
+                                    -static_cast<ResultType>(value));
                                return true;
                            }

--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@ -19,6 +19,13 @@ namespace ErrorCodes
    extern const int CANNOT_READ_ALL_DATA;
 }

+#define THROW_ARROW_NOT_OK(status)                                     \
+    do                                                                 \
+    {                                                                  \
+        if (::arrow::Status _s = (status); !_s.ok())                   \
+            throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \
+    } while (false)
+
 ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer & in_, Block header_) : IInputFormat(std::move(header_), in_)
 {
 }
@ -28,21 +35,26 @@ Chunk ORCBlockInputFormat::generate()
    Chunk res;
    const Block & header = getPort().getHeader();

-    if (file_reader)
+    if (!file_reader)
+        prepareReader();
+
+    if (stripe_current >= stripe_total)
        return res;

-    arrow::Status open_status = arrow::adapters::orc::ORCFileReader::Open(asArrowFile(in), arrow::default_memory_pool(), &file_reader);
-    if (!open_status.ok())
-        throw Exception(open_status.ToString(), ErrorCodes::BAD_ARGUMENTS);
+    std::shared_ptr<arrow::RecordBatch> batch_result;
+    arrow::Status batch_status = file_reader->ReadStripe(stripe_current, include_indices, &batch_result);
+    if (!batch_status.ok())
+        throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA,
+                               "Error while reading batch of ORC data: {}", batch_status.ToString());

-    std::shared_ptr<arrow::Table> table;
-    arrow::Status read_status = file_reader->Read(&table);
-    if (!read_status.ok())
-        throw ParsingException{"Error while reading ORC data: " + read_status.ToString(),
-                        ErrorCodes::CANNOT_READ_ALL_DATA};
+    auto table_result = arrow::Table::FromRecordBatches({batch_result});
+    if (!table_result.ok())
+        throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA,
+                               "Error while reading batch of ORC data: {}", table_result.status().ToString());

-    ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, header, "ORC");
+    ++stripe_current;

+    ArrowColumnToCHColumn::arrowTableToCHChunk(res, *table_result, header, "ORC");
    return res;
 }

@ -51,6 +63,26 @@ void ORCBlockInputFormat::resetParser()
    IInputFormat::resetParser();

    file_reader.reset();
+    include_indices.clear();
+    stripe_current = 0;
+}
+
+void ORCBlockInputFormat::prepareReader()
+{
+    THROW_ARROW_NOT_OK(arrow::adapters::orc::ORCFileReader::Open(asArrowFile(in), arrow::default_memory_pool(), &file_reader));
+    stripe_total = file_reader->NumberOfStripes();
+    stripe_current = 0;
+
+    std::shared_ptr<arrow::Schema> schema;
+    THROW_ARROW_NOT_OK(file_reader->ReadSchema(&schema));
+
+    for (int i = 0; i < schema->num_fields(); ++i)
+    {
+        if (getPort().getHeader().has(schema->field(i)->name()))
+        {
+            include_indices.push_back(i+1);
+        }
+    }
 }

 void registerInputFormatProcessorORC(FormatFactory &factory)
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h
@ -25,6 +25,15 @@ private:
    // TODO: check that this class implements every part of its parent

    std::unique_ptr<arrow::adapters::orc::ORCFileReader> file_reader;
+
+    int stripe_total = 0;
+
+    int stripe_current = 0;
+
+    // indices of columns to read from ORC file
+    std::vector<int> include_indices;
+
+    void prepareReader();
 };

 }
--- a/src/Processors/QueryPlan/IQueryPlanStep.h
+++ b/src/Processors/QueryPlan/IQueryPlanStep.h
@ -99,6 +99,9 @@ public:
    /// Get detailed description of step actions. This is shown in EXPLAIN query with options `actions = 1`.
    virtual void describeActions(FormatSettings & /*settings*/) const {}

+    /// Get detailed description of read-from-storage step indexes (if any). Shown in with options `indexes = 1`.
+    virtual void describeIndexes(FormatSettings & /*settings*/) const {}
+
    /// Get description of processors added in current step. Should be called after updatePipeline().
    virtual void describePipeline(FormatSettings & /*settings*/) const {}

--- a/src/Processors/QueryPlan/QueryPlan.cpp
+++ b/src/Processors/QueryPlan/QueryPlan.cpp
@ -243,6 +243,9 @@ static void explainStep(

    if (options.actions)
        step.describeActions(settings);
+
+    if (options.indexes)
+        step.describeIndexes(settings);
 }

 std::string debugExplainStep(const IQueryPlanStep & step)
--- a/src/Processors/QueryPlan/QueryPlan.h
+++ b/src/Processors/QueryPlan/QueryPlan.h
@ -66,6 +66,8 @@ public:
        bool description = true;
        /// Add detailed information about step actions.
        bool actions = false;
+        /// Add information about indexes actions.
+        bool indexes = false;
    };

    struct ExplainPipelineOptions
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@ -0,0 +1,249 @@
+#include <Processors/QueryPlan/ReadFromMergeTree.h>
+#include <Processors/QueryPipeline.h>
+#include <Processors/ConcatProcessor.h>
+#include <Processors/Transforms/ReverseTransform.h>
+#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
+#include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
+#include <Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h>
+#include <common/logger_useful.h>
+
+namespace DB
+{
+
+ReadFromMergeTree::ReadFromMergeTree(
+    const MergeTreeData & storage_,
+    StorageMetadataPtr metadata_snapshot_,
+    String query_id_,
+    Names required_columns_,
+    RangesInDataParts parts_,
+    IndexStatPtr index_stats_,
+    PrewhereInfoPtr prewhere_info_,
+    Names virt_column_names_,
+    Settings settings_,
+    size_t num_streams_,
+    ReadType read_type_)
+    : ISourceStep(DataStream{.header = MergeTreeBaseSelectProcessor::transformHeader(
+        metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals(), storage_.getStorageID()),
+        prewhere_info_,
+        virt_column_names_)})
+    , storage(storage_)
+    , metadata_snapshot(std::move(metadata_snapshot_))
+    , query_id(std::move(query_id_))
+    , required_columns(std::move(required_columns_))
+    , parts(std::move(parts_))
+    , index_stats(std::move(index_stats_))
+    , prewhere_info(std::move(prewhere_info_))
+    , virt_column_names(std::move(virt_column_names_))
+    , settings(std::move(settings_))
+    , num_streams(num_streams_)
+    , read_type(read_type_)
+{
+}
+
+Pipe ReadFromMergeTree::readFromPool()
+{
+    Pipes pipes;
+    size_t sum_marks = 0;
+    size_t total_rows = 0;
+
+    for (const auto & part : parts)
+    {
+        sum_marks += part.getMarksCount();
+        total_rows += part.getRowsCount();
+    }
+
+    auto pool = std::make_shared<MergeTreeReadPool>(
+        num_streams,
+        sum_marks,
+        settings.min_marks_for_concurrent_read,
+        std::move(parts),
+        storage,
+        metadata_snapshot,
+        prewhere_info,
+        true,
+        required_columns,
+        settings.backoff_settings,
+        settings.preferred_block_size_bytes,
+        false);
+
+    auto * logger = &Poco::Logger::get(storage.getLogName() + " (SelectExecutor)");
+    LOG_DEBUG(logger, "Reading approx. {} rows with {} streams", total_rows, num_streams);
+
+    for (size_t i = 0; i < num_streams; ++i)
+    {
+        auto source = std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
+            i, pool, settings.min_marks_for_concurrent_read, settings.max_block_size,
+            settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes,
+            storage, metadata_snapshot, settings.use_uncompressed_cache,
+            prewhere_info, settings.reader_settings, virt_column_names);
+
+        if (i == 0)
+        {
+            /// Set the approximate number of rows for the first source only
+            source->addTotalRowsApprox(total_rows);
+        }
+
+        pipes.emplace_back(std::move(source));
+    }
+
+    return Pipe::unitePipes(std::move(pipes));
+}
+
+template<typename TSource>
+ProcessorPtr ReadFromMergeTree::createSource(const RangesInDataPart & part)
+{
+    return std::make_shared<TSource>(
+            storage, metadata_snapshot, part.data_part, settings.max_block_size, settings.preferred_block_size_bytes,
+            settings.preferred_max_column_in_block_size_bytes, required_columns, part.ranges, settings.use_uncompressed_cache,
+            prewhere_info, true, settings.reader_settings, virt_column_names, part.part_index_in_query);
+}
+
+Pipe ReadFromMergeTree::readInOrder()
+{
+    Pipes pipes;
+    for (const auto & part : parts)
+    {
+        auto source = read_type == ReadType::InReverseOrder
+                    ? createSource<MergeTreeReverseSelectProcessor>(part)
+                    : createSource<MergeTreeSelectProcessor>(part);
+
+        pipes.emplace_back(std::move(source));
+    }
+
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+
+    if (read_type == ReadType::InReverseOrder)
+    {
+        pipe.addSimpleTransform([&](const Block & header)
+        {
+            return std::make_shared<ReverseTransform>(header);
+        });
+    }
+
+    return pipe;
+}
+
+Pipe ReadFromMergeTree::read()
+{
+    if (read_type == ReadType::Default && num_streams > 1)
+        return readFromPool();
+
+    auto pipe = readInOrder();
+
+    /// Use ConcatProcessor to concat sources together.
+    /// It is needed to read in parts order (and so in PK order) if single thread is used.
+    if (read_type == ReadType::Default && pipe.numOutputPorts() > 1)
+        pipe.addTransform(std::make_shared<ConcatProcessor>(pipe.getHeader(), pipe.numOutputPorts()));
+
+    return pipe;
+}
+
+void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &)
+{
+    Pipe pipe = read();
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    // Attach QueryIdHolder if needed
+    if (!query_id.empty())
+        pipe.addQueryIdHolder(std::make_shared<QueryIdHolder>(query_id, storage));
+
+    pipeline.init(std::move(pipe));
+}
+
+static const char * indexTypeToString(ReadFromMergeTree::IndexType type)
+{
+    switch (type)
+    {
+        case ReadFromMergeTree::IndexType::None:
+            return "None";
+        case ReadFromMergeTree::IndexType::MinMax:
+            return "MinMax";
+        case ReadFromMergeTree::IndexType::Partition:
+            return "Partition";
+        case ReadFromMergeTree::IndexType::PrimaryKey:
+            return "PrimaryKey";
+        case ReadFromMergeTree::IndexType::Skip:
+            return "Skip";
+    }
+
+    __builtin_unreachable();
+}
+
+static const char * readTypeToString(ReadFromMergeTree::ReadType type)
+{
+    switch (type)
+    {
+        case ReadFromMergeTree::ReadType::Default:
+            return "Default";
+        case ReadFromMergeTree::ReadType::InOrder:
+            return "InOrder";
+        case ReadFromMergeTree::ReadType::InReverseOrder:
+            return "InReverseOrder";
+    }
+
+    __builtin_unreachable();
+}
+
+void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const
+{
+    std::string prefix(format_settings.offset, format_settings.indent_char);
+    format_settings.out << prefix << "ReadType: " << readTypeToString(read_type) << '\n';
+
+    if (index_stats && !index_stats->empty())
+    {
+        format_settings.out << prefix << "Parts: " << index_stats->back().num_parts_after << '\n';
+        format_settings.out << prefix << "Granules: " << index_stats->back().num_granules_after << '\n';
+    }
+}
+
+void ReadFromMergeTree::describeIndexes(FormatSettings & format_settings) const
+{
+    std::string prefix(format_settings.offset, format_settings.indent_char);
+    if (index_stats && !index_stats->empty())
+    {
+        std::string indent(format_settings.indent, format_settings.indent_char);
+
+        /// Do not print anything if no indexes is applied.
+        if (index_stats->size() > 1 || index_stats->front().type != IndexType::None)
+            format_settings.out << prefix << "Indexes:\n";
+
+        for (size_t i = 0; i < index_stats->size(); ++i)
+        {
+            const auto & stat = (*index_stats)[i];
+            if (stat.type == IndexType::None)
+                continue;
+
+            format_settings.out << prefix << indent << indexTypeToString(stat.type) << '\n';
+
+            if (!stat.name.empty())
+                format_settings.out << prefix << indent << indent << "Name: " << stat.name << '\n';
+
+            if (!stat.description.empty())
+                format_settings.out << prefix << indent << indent << "Description: " << stat.description << '\n';
+
+            if (!stat.used_keys.empty())
+            {
+                format_settings.out << prefix << indent << indent << "Keys: " << stat.name << '\n';
+                for (const auto & used_key : stat.used_keys)
+                    format_settings.out << prefix << indent << indent << indent << used_key << '\n';
+            }
+
+            if (!stat.condition.empty())
+                format_settings.out << prefix << indent << indent << "Condition: " << stat.condition << '\n';
+
+            format_settings.out << prefix << indent << indent << "Parts: " << stat.num_parts_after;
+            if (i)
+                format_settings.out << '/' << (*index_stats)[i - 1].num_parts_after;
+            format_settings.out << '\n';
+
+            format_settings.out << prefix << indent << indent << "Granules: " << stat.num_granules_after;
+            if (i)
+                format_settings.out << '/' << (*index_stats)[i - 1].num_granules_after;
+            format_settings.out << '\n';
+        }
+    }
+}
+
+}
--- a/src/Processors/QueryPlan/ReadFromMergeTree.h
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.h
@ -0,0 +1,113 @@
+#pragma once
+#include <Processors/QueryPlan/ISourceStep.h>
+#include <Processors/Pipe.h>
+#include <Storages/MergeTree/RangesInDataPart.h>
+#include <Storages/MergeTree/MergeTreeReadPool.h>
+
+namespace DB
+{
+
+/// This step is created to read from MergeTree* table.
+/// For now, it takes a list of parts and creates source from it.
+class ReadFromMergeTree final : public ISourceStep
+{
+public:
+
+    enum class IndexType
+    {
+        None,
+        MinMax,
+        Partition,
+        PrimaryKey,
+        Skip,
+    };
+
+    /// This is a struct with information about applied indexes.
+    /// Is used for introspection only, in EXPLAIN query.
+    struct IndexStat
+    {
+        IndexType type;
+        std::string name;
+        std::string description;
+        std::string condition;
+        std::vector<std::string> used_keys;
+        size_t num_parts_after;
+        size_t num_granules_after;
+    };
+
+    using IndexStats = std::vector<IndexStat>;
+    using IndexStatPtr = std::unique_ptr<IndexStats>;
+
+    /// Part of settings which are needed for reading.
+    struct Settings
+    {
+        UInt64 max_block_size;
+        size_t preferred_block_size_bytes;
+        size_t preferred_max_column_in_block_size_bytes;
+        size_t min_marks_for_concurrent_read;
+        bool use_uncompressed_cache;
+
+        MergeTreeReaderSettings reader_settings;
+        MergeTreeReadPool::BackoffSettings backoff_settings;
+    };
+
+    enum class ReadType
+    {
+        /// By default, read will use MergeTreeReadPool and return pipe with num_streams outputs.
+        /// If num_streams == 1, will read without pool, in order specified in parts.
+        Default,
+        /// Read in sorting key order.
+        /// Returned pipe will have the number of ports equals to parts.size().
+        /// Parameter num_streams_ is ignored in this case.
+        /// User should add MergingSorted itself if needed.
+        InOrder,
+        /// The same as InOrder, but in reverse order.
+        /// For every part, read ranges and granules from end to begin. Also add ReverseTransform.
+        InReverseOrder,
+    };
+
+    ReadFromMergeTree(
+        const MergeTreeData & storage_,
+        StorageMetadataPtr metadata_snapshot_,
+        String query_id_,
+        Names required_columns_,
+        RangesInDataParts parts_,
+        IndexStatPtr index_stats_,
+        PrewhereInfoPtr prewhere_info_,
+        Names virt_column_names_,
+        Settings settings_,
+        size_t num_streams_,
+        ReadType read_type_
+    );
+
+    String getName() const override { return "ReadFromMergeTree"; }
+
+    void initializePipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override;
+
+    void describeActions(FormatSettings & format_settings) const override;
+    void describeIndexes(FormatSettings & format_settings) const override;
+
+private:
+    const MergeTreeData & storage;
+    StorageMetadataPtr metadata_snapshot;
+    String query_id;
+
+    Names required_columns;
+    RangesInDataParts parts;
+    IndexStatPtr index_stats;
+    PrewhereInfoPtr prewhere_info;
+    Names virt_column_names;
+    Settings settings;
+
+    size_t num_streams;
+    ReadType read_type;
+
+    Pipe read();
+    Pipe readFromPool();
+    Pipe readInOrder();
+
+    template<typename TSource>
+    ProcessorPtr createSource(const RangesInDataPart & part);
+};
+
+}
--- a/src/Processors/QueryPlan/ReverseRowsStep.cpp
+++ b/src/Processors/QueryPlan/ReverseRowsStep.cpp
@ -1,37 +0,0 @@
-#include <Processors/QueryPlan/ReverseRowsStep.h>
-#include <Processors/QueryPipeline.h>
-#include <Processors/Transforms/ReverseTransform.h>
-
-namespace DB
-{
-
-static ITransformingStep::Traits getTraits()
-{
-    return ITransformingStep::Traits
-    {
-        {
-            .preserves_distinct_columns = true,
-            .returns_single_stream = false,
-            .preserves_number_of_streams = true,
-            .preserves_sorting = false,
-        },
-        {
-            .preserves_number_of_rows = true,
-        }
-    };
-}
-
-ReverseRowsStep::ReverseRowsStep(const DataStream & input_stream_)
-    : ITransformingStep(input_stream_, input_stream_.header, getTraits())
-{
-}
-
-void ReverseRowsStep::transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &)
-{
-    pipeline.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<ReverseTransform>(header);
-    });
-}
-
-}
--- a/src/Processors/QueryPlan/ReverseRowsStep.h
+++ b/src/Processors/QueryPlan/ReverseRowsStep.h
@ -1,18 +0,0 @@
-#pragma once
-#include <Processors/QueryPlan/ITransformingStep.h>
-
-namespace DB
-{
-
-/// Reverse rows in chunk.
-class ReverseRowsStep : public ITransformingStep
-{
-public:
-    explicit ReverseRowsStep(const DataStream & input_stream_);
-
-    String getName() const override { return "ReverseRows"; }
-
-    void transformPipeline(QueryPipeline & pipeline, const BuildQueryPipelineSettings &) override;
-};
-
-}
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@ -124,9 +124,9 @@ SRCS(
    QueryPlan/PartialSortingStep.cpp
    QueryPlan/QueryIdHolder.cpp
    QueryPlan/QueryPlan.cpp
+    QueryPlan/ReadFromMergeTree.cpp
    QueryPlan/ReadFromPreparedSource.cpp
    QueryPlan/ReadNothingStep.cpp
-    QueryPlan/ReverseRowsStep.cpp
    QueryPlan/RollupStep.cpp
    QueryPlan/SettingQuotaAndLimitsStep.cpp
    QueryPlan/TotalsHavingStep.cpp
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@ -158,6 +158,8 @@ void TCPHandler::runImpl()
    }

    Settings connection_settings = connection_context->getSettings();
+    UInt64 idle_connection_timeout = connection_settings.idle_connection_timeout;
+    UInt64 poll_interval = connection_settings.poll_interval;

    sendHello();

@ -168,10 +170,10 @@ void TCPHandler::runImpl()
        /// We are waiting for a packet from the client. Thus, every `poll_interval` seconds check whether we need to shut down.
        {
            Stopwatch idle_time;
-            while (!server.isCancelled() && !static_cast<ReadBufferFromPocoSocket &>(*in).poll(
-                std::min(connection_settings.poll_interval, connection_settings.idle_connection_timeout) * 1000000))
+            UInt64 timeout_ms = std::min(poll_interval, idle_connection_timeout) * 1000000;
+            while (!server.isCancelled() && !static_cast<ReadBufferFromPocoSocket &>(*in).poll(timeout_ms))
            {
-                if (idle_time.elapsedSeconds() > connection_settings.idle_connection_timeout)
+                if (idle_time.elapsedSeconds() > idle_connection_timeout)
                {
                    LOG_TRACE(log, "Closing idle connection");
                    return;
@ -212,6 +214,15 @@ void TCPHandler::runImpl()
            if (!receivePacket())
                continue;

+            /** If Query received, then settings in query_context has been updated
+             *  So, update some other connection settings, for flexibility.
+             */
+            {
+                const Settings & settings = query_context->getSettingsRef();
+                idle_connection_timeout = settings.idle_connection_timeout;
+                poll_interval = settings.poll_interval;
+            }
+
            /** If part_uuids got received in previous packet, trying to read again.
              */
            if (state.empty() && state.part_uuids && !receivePacket())
@ -274,10 +285,10 @@ void TCPHandler::runImpl()
                if (context != query_context)
                    throw Exception("Unexpected context in InputBlocksReader", ErrorCodes::LOGICAL_ERROR);

-                size_t poll_interval;
+                size_t poll_interval_ms;
                int receive_timeout;
-                std::tie(poll_interval, receive_timeout) = getReadTimeouts(connection_settings);
-                if (!readDataNext(poll_interval, receive_timeout))
+                std::tie(poll_interval_ms, receive_timeout) = getReadTimeouts(connection_settings);
+                if (!readDataNext(poll_interval_ms, receive_timeout))
                {
                    state.block_in.reset();
                    state.maybe_compressed_in.reset();
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -1107,13 +1107,13 @@ void IMergeTreeDataPart::remove(bool keep_s3) const
        {
            /// Remove each expected file in directory, then remove directory itself.

-    #if !__clang__
+    #if !defined(__clang__)
    #    pragma GCC diagnostic push
    #    pragma GCC diagnostic ignored "-Wunused-variable"
    #endif
            for (const auto & [file, _] : checksums.files)
                volume->getDisk()->removeSharedFile(to + "/" + file, keep_s3);
-    #if !__clang__
+    #if !defined(__clang__)
    #    pragma GCC diagnostic pop
    #endif

--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@ -938,6 +938,9 @@ public:
        return func->getMonotonicityForRange(type, left, right);
    }

+    Kind getKind() const { return kind; }
+    const ColumnWithTypeAndName & getConstArg() const { return const_arg; }
+
 private:
    FunctionBasePtr func;
    ColumnWithTypeAndName const_arg;
@ -1308,6 +1311,235 @@ String KeyCondition::toString() const
    return res;
 }

+KeyCondition::Description KeyCondition::getDescription() const
+{
+    /// This code may seem to be too difficult.
+    /// Here we want to convert RPN back to tree, and also simplify some logical expressions like `and(x, true) -> x`.
+    Description description;
+
+    /// That's a binary tree. Explicit.
+    /// Build and optimize it simultaneously.
+    struct Node
+    {
+        enum class Type
+        {
+            /// Leaf, which is RPNElement.
+            Leaf,
+            /// Leafs, which are logical constants.
+            True,
+            False,
+            /// Binary operators.
+            And,
+            Or,
+        };
+
+        Type type;
+
+        /// Only for Leaf
+        const RPNElement * element = nullptr;
+        /// This means that logical NOT is applied to leaf.
+        bool negate = false;
+
+        std::unique_ptr<Node> left = nullptr;
+        std::unique_ptr<Node> right = nullptr;
+    };
+
+    /// The algorithm is the same as in KeyCondition::checkInHyperrectangle
+    /// We build a pair of trees on stack. For checking if key condition may be true, and if it may be false.
+    /// We need only `can_be_true` in result.
+    struct Frame
+    {
+        std::unique_ptr<Node> can_be_true;
+        std::unique_ptr<Node> can_be_false;
+    };
+
+    /// Combine two subtrees using logical operator.
+    auto combine = [](std::unique_ptr<Node> left, std::unique_ptr<Node> right, Node::Type type)
+    {
+        /// Simplify operators with for one constant condition.
+
+        if (type == Node::Type::And)
+        {
+            /// false AND right
+            if (left->type == Node::Type::False)
+                return left;
+
+            /// left AND false
+            if (right->type == Node::Type::False)
+                return right;
+
+            /// true AND right
+            if (left->type == Node::Type::True)
+                return right;
+
+            /// left AND true
+            if (right->type == Node::Type::True)
+                return left;
+        }
+
+        if (type == Node::Type::Or)
+        {
+            /// false OR right
+            if (left->type == Node::Type::False)
+                return right;
+
+            /// left OR false
+            if (right->type == Node::Type::False)
+                return left;
+
+            /// true OR right
+            if (left->type == Node::Type::True)
+                return left;
+
+            /// left OR true
+            if (right->type == Node::Type::True)
+                return right;
+        }
+
+        return std::make_unique<Node>(Node{
+                .type = type,
+                .left = std::move(left),
+                .right = std::move(right)
+            });
+    };
+
+    std::vector<Frame> rpn_stack;
+    for (const auto & element : rpn)
+    {
+        if (element.function == RPNElement::FUNCTION_UNKNOWN)
+        {
+            auto can_be_true = std::make_unique<Node>(Node{.type = Node::Type::True});
+            auto can_be_false = std::make_unique<Node>(Node{.type = Node::Type::True});
+            rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)});
+        }
+        else if (
+               element.function == RPNElement::FUNCTION_IN_RANGE
+            || element.function == RPNElement::FUNCTION_NOT_IN_RANGE
+            || element.function == RPNElement::FUNCTION_IN_SET
+            || element.function == RPNElement::FUNCTION_NOT_IN_SET)
+        {
+            auto can_be_true = std::make_unique<Node>(Node{.type = Node::Type::Leaf, .element = &element, .negate = false});
+            auto can_be_false = std::make_unique<Node>(Node{.type = Node::Type::Leaf, .element = &element, .negate = true});
+            rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)});
+        }
+        else if (element.function == RPNElement::FUNCTION_NOT)
+        {
+            assert(!rpn_stack.empty());
+
+            std::swap(rpn_stack.back().can_be_true, rpn_stack.back().can_be_false);
+        }
+        else if (element.function == RPNElement::FUNCTION_AND)
+        {
+            assert(!rpn_stack.empty());
+            auto arg1 = std::move(rpn_stack.back());
+
+            rpn_stack.pop_back();
+
+            assert(!rpn_stack.empty());
+            auto arg2 = std::move(rpn_stack.back());
+
+            Frame frame;
+            frame.can_be_true = combine(std::move(arg1.can_be_true), std::move(arg2.can_be_true), Node::Type::And);
+            frame.can_be_false = combine(std::move(arg1.can_be_false), std::move(arg2.can_be_false), Node::Type::Or);
+
+            rpn_stack.back() = std::move(frame);
+        }
+        else if (element.function == RPNElement::FUNCTION_OR)
+        {
+            assert(!rpn_stack.empty());
+            auto arg1 = std::move(rpn_stack.back());
+
+            rpn_stack.pop_back();
+
+            assert(!rpn_stack.empty());
+            auto arg2 = std::move(rpn_stack.back());
+
+            Frame frame;
+            frame.can_be_true = combine(std::move(arg1.can_be_true), std::move(arg2.can_be_true), Node::Type::Or);
+            frame.can_be_false = combine(std::move(arg1.can_be_false), std::move(arg2.can_be_false), Node::Type::And);
+
+            rpn_stack.back() = std::move(frame);
+        }
+        else if (element.function == RPNElement::ALWAYS_FALSE)
+        {
+            auto can_be_true = std::make_unique<Node>(Node{.type = Node::Type::False});
+            auto can_be_false = std::make_unique<Node>(Node{.type = Node::Type::True});
+
+            rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)});
+        }
+        else if (element.function == RPNElement::ALWAYS_TRUE)
+        {
+            auto can_be_true = std::make_unique<Node>(Node{.type = Node::Type::True});
+            auto can_be_false = std::make_unique<Node>(Node{.type = Node::Type::False});
+            rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)});
+        }
+        else
+            throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR);
+    }
+
+    if (rpn_stack.size() != 1)
+        throw Exception("Unexpected stack size in KeyCondition::checkInRange", ErrorCodes::LOGICAL_ERROR);
+
+    std::vector<std::string_view> key_names(key_columns.size());
+    std::vector<bool> is_key_used(key_columns.size(), false);
+
+    for (const auto & key : key_columns)
+        key_names[key.second] = key.first;
+
+    WriteBufferFromOwnString buf;
+
+    std::function<void(const Node *)> describe;
+    describe = [&describe, &key_names, &is_key_used, &buf](const Node * node)
+    {
+        switch (node->type)
+        {
+            case Node::Type::Leaf:
+            {
+                is_key_used[node->element->key_column] = true;
+
+                /// Note: for condition with double negation, like `not(x not in set)`,
+                /// we can replace it to `x in set` here.
+                /// But I won't do it, because `cloneASTWithInversionPushDown` already push down `not`.
+                /// So, this seem to be impossible for `can_be_true` tree.
+                if (node->negate)
+                    buf << "not(";
+                buf << node->element->toString(key_names[node->element->key_column], true);
+                if (node->negate)
+                    buf << ")";
+                break;
+            }
+            case Node::Type::True:
+                buf << "true";
+                break;
+            case Node::Type::False:
+                buf << "false";
+                break;
+            case Node::Type::And:
+                buf << "and(";
+                describe(node->left.get());
+                buf << ", ";
+                describe(node->right.get());
+                buf << ")";
+                break;
+            case Node::Type::Or:
+                buf << "or(";
+                describe(node->left.get());
+                buf << ", ";
+                describe(node->right.get());
+                buf << ")";
+                break;
+        }
+    };
+
+    describe(rpn_stack.front().can_be_true.get());
+    description.condition = std::move(buf.str());
+
+    for (size_t i = 0; i < key_names.size(); ++i)
+        if (is_key_used[i])
+            description.used_keys.emplace_back(key_names[i]);
+
+    return description;
+}

 /** Index is the value of key every `index_granularity` rows.
  * This value is called a "mark". That is, the index consists of marks.
@ -1733,18 +1965,38 @@ bool KeyCondition::mayBeTrueAfter(
    return checkInRange(used_key_size, left_key, nullptr, data_types, false, BoolMask::consider_only_can_be_true).can_be_true;
 }

-
-String KeyCondition::RPNElement::toString() const
+String KeyCondition::RPNElement::toString() const { return toString("column " + std::to_string(key_column), false); }
+String KeyCondition::RPNElement::toString(const std::string_view & column_name, bool print_constants) const
 {
-    auto print_wrapped_column = [this](WriteBuffer & buf)
+    auto print_wrapped_column = [this, &column_name, print_constants](WriteBuffer & buf)
    {
        for (auto it = monotonic_functions_chain.rbegin(); it != monotonic_functions_chain.rend(); ++it)
+        {
            buf << (*it)->getName() << "(";
+            if (print_constants)
+            {
+                if (const auto * func = typeid_cast<const FunctionWithOptionalConstArg *>(it->get()))
+                {
+                    if (func->getKind() == FunctionWithOptionalConstArg::Kind::LEFT_CONST)
+                        buf << applyVisitor(FieldVisitorToString(), (*func->getConstArg().column)[0]) << ", ";
+                }
+            }
+        }

-        buf << "column " << key_column;
+        buf << column_name;

        for (auto it = monotonic_functions_chain.rbegin(); it != monotonic_functions_chain.rend(); ++it)
+        {
+            if (print_constants)
+            {
+                if (const auto * func = typeid_cast<const FunctionWithOptionalConstArg *>(it->get()))
+                {
+                    if (func->getKind() == FunctionWithOptionalConstArg::Kind::RIGHT_CONST)
+                        buf << ", " << applyVisitor(FieldVisitorToString(), (*func->getConstArg().column)[0]);
+                }
+            }
            buf << ")";
+        }
    };

    WriteBufferFromOwnString buf;
--- a/src/Storages/MergeTree/KeyCondition.h
+++ b/src/Storages/MergeTree/KeyCondition.h
@ -293,6 +293,16 @@ public:

    String toString() const;

+    /// Condition description for EXPLAIN query.
+    struct Description
+    {
+        /// Which columns from PK were used, in PK order.
+        std::vector<std::string> used_keys;
+        /// Condition which was applied, mostly human-readable.
+        std::string condition;
+    };
+
+    Description getDescription() const;

    /** A chain of possibly monotone functions.
      * If the key column is wrapped in functions that can be monotonous in some value ranges
@ -345,6 +355,7 @@ private:
            : function(function_), range(range_), key_column(key_column_) {}

        String toString() const;
+        String toString(const std::string_view & column_name, bool print_constants) const;

        Function function = FUNCTION_UNKNOWN;

--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@ -30,7 +30,7 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor(
    const MergeTreeReaderSettings & reader_settings_,
    bool use_uncompressed_cache_,
    const Names & virt_column_names_)
-    : SourceWithProgress(getHeader(std::move(header), prewhere_info_, virt_column_names_))
+    : SourceWithProgress(transformHeader(std::move(header), prewhere_info_, virt_column_names_))
    , storage(storage_)
    , metadata_snapshot(metadata_snapshot_)
    , prewhere_info(prewhere_info_)
@ -370,7 +370,7 @@ void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const P
    }
 }

-Block MergeTreeBaseSelectProcessor::getHeader(
+Block MergeTreeBaseSelectProcessor::transformHeader(
    Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns)
 {
    executePrewhereActions(block, prewhere_info);
--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h
@ -33,6 +33,8 @@ public:

    ~MergeTreeBaseSelectProcessor() override;

+    static Block transformHeader(Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns);
+
    static void executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info);

 protected:
@ -49,8 +51,6 @@ protected:
    static void injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns);
    static void injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns);

-    static Block getHeader(Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns);
-
    void initializeRangeReaders(MergeTreeReadTask & task);

 protected:
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@ -28,7 +28,7 @@
 #include <Processors/QueryPlan/FilterStep.h>
 #include <Processors/QueryPlan/ExpressionStep.h>
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
-#include <Processors/QueryPlan/ReverseRowsStep.h>
+#include <Processors/QueryPlan/ReadFromMergeTree.h>
 #include <Processors/QueryPlan/MergingSortedStep.h>
 #include <Processors/QueryPlan/UnionStep.h>
 #include <Processors/QueryPlan/MergingFinal.h>
@ -282,11 +282,40 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(

    auto query_context = context->hasQueryContext() ? context->getQueryContext() : context;

-    if (query_context->getSettingsRef().allow_experimental_query_deduplication)
-        selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, query_context);
-    else
-        selectPartsToRead(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read);
+    PartFilterCounters part_filter_counters;
+    auto index_stats = std::make_unique<ReadFromMergeTree::IndexStats>();

+    if (query_context->getSettingsRef().allow_experimental_query_deduplication)
+        selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, query_context, part_filter_counters);
+    else
+        selectPartsToRead(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, part_filter_counters);
+
+    index_stats->emplace_back(ReadFromMergeTree::IndexStat{
+        .type = ReadFromMergeTree::IndexType::None,
+        .num_parts_after = part_filter_counters.num_initial_selected_parts,
+        .num_granules_after = part_filter_counters.num_initial_selected_granules});
+
+    if (minmax_idx_condition)
+    {
+        auto description = minmax_idx_condition->getDescription();
+        index_stats->emplace_back(ReadFromMergeTree::IndexStat{
+            .type = ReadFromMergeTree::IndexType::MinMax,
+            .condition = std::move(description.condition),
+            .used_keys = std::move(description.used_keys),
+            .num_parts_after = part_filter_counters.num_parts_after_minmax,
+            .num_granules_after = part_filter_counters.num_granules_after_minmax});
+    }
+
+    if (partition_pruner)
+    {
+        auto description = partition_pruner->getKeyCondition().getDescription();
+        index_stats->emplace_back(ReadFromMergeTree::IndexStat{
+            .type = ReadFromMergeTree::IndexType::Partition,
+            .condition = std::move(description.condition),
+            .used_keys = std::move(description.used_keys),
+            .num_parts_after = part_filter_counters.num_parts_after_partition_pruner,
+            .num_granules_after = part_filter_counters.num_granules_after_partition_pruner});
+    }

    /// Sampling.
    Names column_names_to_read = real_column_names;
@ -568,6 +597,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
        MergeTreeIndexConditionPtr condition;
        std::atomic<size_t> total_granules{0};
        std::atomic<size_t> granules_dropped{0};
+        std::atomic<size_t> total_parts{0};
+        std::atomic<size_t> parts_dropped{0};

        DataSkippingIndexAndCondition(MergeTreeIndexPtr index_, MergeTreeIndexConditionPtr condition_)
            : index(index_)
@ -620,6 +651,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
    RangesInDataParts parts_with_ranges(parts.size());
    size_t sum_marks = 0;
    std::atomic<size_t> sum_marks_pk = 0;
+    std::atomic<size_t> sum_parts_pk = 0;
    std::atomic<size_t> total_marks_pk = 0;

    size_t sum_ranges = 0;
@ -642,25 +674,29 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(

            RangesInDataPart ranges(part, part_index);

-            total_marks_pk.fetch_add(part->index_granularity.getMarksCount(), std::memory_order_relaxed);
+            size_t total_marks_count = part->getMarksCount();
+            if (total_marks_count && part->index_granularity.hasFinalMark())
+                --total_marks_count;
+
+            total_marks_pk.fetch_add(total_marks_count, std::memory_order_relaxed);

            if (metadata_snapshot->hasPrimaryKey())
                ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log);
-            else
-            {
-                size_t total_marks_count = part->getMarksCount();
-                if (total_marks_count)
-                {
-                    if (part->index_granularity.hasFinalMark())
-                        --total_marks_count;
-                    ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}};
-                }
-            }
+            else if (total_marks_count)
+                ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}};

            sum_marks_pk.fetch_add(ranges.getMarksCount(), std::memory_order_relaxed);

+            if (!ranges.ranges.empty())
+                sum_parts_pk.fetch_add(1, std::memory_order_relaxed);
+
            for (auto & index_and_condition : useful_indices)
            {
+                if (ranges.ranges.empty())
+                    break;
+
+                index_and_condition.total_parts.fetch_add(1, std::memory_order_relaxed);
+
                size_t total_granules = 0;
                size_t granules_dropped = 0;
                ranges.ranges = filterMarksUsingIndex(
@ -672,6 +708,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(

                index_and_condition.total_granules.fetch_add(total_granules, std::memory_order_relaxed);
                index_and_condition.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed);
+
+                if (ranges.ranges.empty())
+                    index_and_condition.parts_dropped.fetch_add(1, std::memory_order_relaxed);
            }

            if (!ranges.ranges.empty())
@ -737,12 +776,34 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
        parts_with_ranges.resize(next_part);
    }

+    if (metadata_snapshot->hasPrimaryKey())
+    {
+        auto description = key_condition.getDescription();
+
+        index_stats->emplace_back(ReadFromMergeTree::IndexStat{
+            .type = ReadFromMergeTree::IndexType::PrimaryKey,
+            .condition = std::move(description.condition),
+            .used_keys = std::move(description.used_keys),
+            .num_parts_after = sum_parts_pk.load(std::memory_order_relaxed),
+            .num_granules_after = sum_marks_pk.load(std::memory_order_relaxed)});
+    }
+
    for (const auto & index_and_condition : useful_indices)
    {
        const auto & index_name = index_and_condition.index->index.name;
        LOG_DEBUG(log, "Index {} has dropped {}/{} granules.",
            backQuote(index_name),
            index_and_condition.granules_dropped, index_and_condition.total_granules);
+
+        std::string description = index_and_condition.index->index.type
+            + " GRANULARITY " + std::to_string(index_and_condition.index->index.granularity);
+
+        index_stats->emplace_back(ReadFromMergeTree::IndexStat{
+            .type = ReadFromMergeTree::IndexType::Skip,
+            .name = index_name,
+            .description = std::move(description),
+            .num_parts_after = index_and_condition.total_parts - index_and_condition.parts_dropped,
+            .num_granules_after = index_and_condition.total_granules - index_and_condition.granules_dropped});
    }

    LOG_DEBUG(log, "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges",
@ -809,6 +870,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(

        plan = spreadMarkRangesAmongStreamsFinal(
            std::move(parts_with_ranges),
+            std::move(index_stats),
            num_streams,
            column_names_to_read,
            metadata_snapshot,
@ -832,6 +894,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(

        plan = spreadMarkRangesAmongStreamsWithOrder(
            std::move(parts_with_ranges),
+            std::move(index_stats),
            num_streams,
            column_names_to_read,
            metadata_snapshot,
@ -849,6 +912,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
    {
        plan = spreadMarkRangesAmongStreams(
            std::move(parts_with_ranges),
+            std::move(index_stats),
            num_streams,
            column_names_to_read,
            metadata_snapshot,
@ -960,25 +1024,9 @@ size_t minMarksForConcurrentRead(

 }

-static QueryPlanPtr createPlanFromPipe(Pipe pipe, const String & query_id, const MergeTreeData & data, const std::string & description = "")
-{
-    auto plan = std::make_unique<QueryPlan>();
-
-    std::string storage_name = "MergeTree";
-    if (!description.empty())
-        storage_name += ' ' + description;
-
-    // Attach QueryIdHolder if needed
-    if (!query_id.empty())
-        pipe.addQueryIdHolder(std::make_shared<QueryIdHolder>(query_id, data));
-
-    auto step = std::make_unique<ReadFromStorageStep>(std::move(pipe), storage_name);
-    plan->addStep(std::move(step));
-    return plan;
-}
-
 QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
    RangesInDataParts && parts,
+    ReadFromMergeTree::IndexStatPtr index_stats,
    size_t num_streams,
    const Names & column_names,
    const StorageMetadataPtr & metadata_snapshot,
@ -1030,75 +1078,32 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
    if (0 == sum_marks)
        return {};

+    ReadFromMergeTree::Settings step_settings
+    {
+        .max_block_size = max_block_size,
+        .preferred_block_size_bytes = settings.preferred_block_size_bytes,
+        .preferred_max_column_in_block_size_bytes = settings.preferred_max_column_in_block_size_bytes,
+        .min_marks_for_concurrent_read = min_marks_for_concurrent_read,
+        .use_uncompressed_cache = use_uncompressed_cache,
+        .reader_settings = reader_settings,
+        .backoff_settings = MergeTreeReadPool::BackoffSettings(settings),
+    };
+
    if (num_streams > 1)
    {
-        /// Parallel query execution.
-        Pipes res;
-
        /// Reduce the number of num_streams if the data is small.
        if (sum_marks < num_streams * min_marks_for_concurrent_read && parts.size() < num_streams)
            num_streams = std::max((sum_marks + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, parts.size());
-
-        MergeTreeReadPoolPtr pool = std::make_shared<MergeTreeReadPool>(
-            num_streams,
-            sum_marks,
-            min_marks_for_concurrent_read,
-            std::move(parts),
-            data,
-            metadata_snapshot,
-            query_info.prewhere_info,
-            true,
-            column_names,
-            MergeTreeReadPool::BackoffSettings(settings),
-            settings.preferred_block_size_bytes,
-            false);
-
-        /// Let's estimate total number of rows for progress bar.
-        LOG_DEBUG(log, "Reading approx. {} rows with {} streams", total_rows, num_streams);
-
-        for (size_t i = 0; i < num_streams; ++i)
-        {
-            auto source = std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
-                i, pool, min_marks_for_concurrent_read, max_block_size,
-                settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes,
-                data, metadata_snapshot, use_uncompressed_cache,
-                query_info.prewhere_info, reader_settings, virt_columns);
-
-            if (i == 0)
-            {
-                /// Set the approximate number of rows for the first source only
-                source->addTotalRowsApprox(total_rows);
-            }
-
-            res.emplace_back(std::move(source));
-        }
-
-        return createPlanFromPipe(Pipe::unitePipes(std::move(res)), query_id, data);
    }
-    else
-    {
-        /// Sequential query execution.
-        Pipes res;

-        for (const auto & part : parts)
-        {
-            auto source = std::make_shared<MergeTreeSelectProcessor>(
-                data, metadata_snapshot, part.data_part, max_block_size, settings.preferred_block_size_bytes,
-                settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache,
-                query_info.prewhere_info, true, reader_settings, virt_columns, part.part_index_in_query);
+    auto plan = std::make_unique<QueryPlan>();
+    auto step = std::make_unique<ReadFromMergeTree>(
+        data, metadata_snapshot, query_id,
+        column_names, std::move(parts), std::move(index_stats), query_info.prewhere_info, virt_columns,
+        step_settings, num_streams, ReadFromMergeTree::ReadType::Default);

-            res.emplace_back(std::move(source));
-        }
-
-        auto pipe = Pipe::unitePipes(std::move(res));
-
-        /// Use ConcatProcessor to concat sources together.
-        /// It is needed to read in parts order (and so in PK order) if single thread is used.
-        if (pipe.numOutputPorts() > 1)
-            pipe.addTransform(std::make_shared<ConcatProcessor>(pipe.getHeader(), pipe.numOutputPorts()));
-
-        return createPlanFromPipe(std::move(pipe), query_id, data);
-    }
+    plan->addStep(std::move(step));
+    return plan;
 }

 static ActionsDAGPtr createProjection(const Block & header)
@ -1111,6 +1116,7 @@ static ActionsDAGPtr createProjection(const Block & header)

 QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
    RangesInDataParts && parts,
+    ReadFromMergeTree::IndexStatPtr index_stats,
    size_t num_streams,
    const Names & column_names,
    const StorageMetadataPtr & metadata_snapshot,
@ -1218,8 +1224,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
    for (size_t i = 0; i < num_streams && !parts.empty(); ++i)
    {
        size_t need_marks = min_marks_per_stream;
-
-        Pipes pipes;
+        RangesInDataParts new_parts;

        /// Loop over parts.
        /// We will iteratively take part or some subrange of a part from the back
@ -1274,53 +1279,31 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
                parts.emplace_back(part);
            }
            ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, input_order_info->direction);
-
-            if (input_order_info->direction == 1)
-            {
-                pipes.emplace_back(std::make_shared<MergeTreeSelectProcessor>(
-                    data,
-                    metadata_snapshot,
-                    part.data_part,
-                    max_block_size,
-                    settings.preferred_block_size_bytes,
-                    settings.preferred_max_column_in_block_size_bytes,
-                    column_names,
-                    ranges_to_get_from_part,
-                    use_uncompressed_cache,
-                    query_info.prewhere_info,
-                    true,
-                    reader_settings,
-                    virt_columns,
-                    part.part_index_in_query));
-            }
-            else
-            {
-                pipes.emplace_back(std::make_shared<MergeTreeReverseSelectProcessor>(
-                    data,
-                    metadata_snapshot,
-                    part.data_part,
-                    max_block_size,
-                    settings.preferred_block_size_bytes,
-                    settings.preferred_max_column_in_block_size_bytes,
-                    column_names,
-                    ranges_to_get_from_part,
-                    use_uncompressed_cache,
-                    query_info.prewhere_info,
-                    true,
-                    reader_settings,
-                    virt_columns,
-                    part.part_index_in_query));
-            }
+            new_parts.emplace_back(part.data_part, part.part_index_in_query, std::move(ranges_to_get_from_part));
        }

-        auto plan = createPlanFromPipe(Pipe::unitePipes(std::move(pipes)), query_id, data, "with order");
-
-        if (input_order_info->direction != 1)
+        ReadFromMergeTree::Settings step_settings
        {
-            auto reverse_step = std::make_unique<ReverseRowsStep>(plan->getCurrentDataStream());
-            plan->addStep(std::move(reverse_step));
-        }
+            .max_block_size = max_block_size,
+            .preferred_block_size_bytes = settings.preferred_block_size_bytes,
+            .preferred_max_column_in_block_size_bytes = settings.preferred_max_column_in_block_size_bytes,
+            .min_marks_for_concurrent_read = min_marks_for_concurrent_read,
+            .use_uncompressed_cache = use_uncompressed_cache,
+            .reader_settings = reader_settings,
+            .backoff_settings = MergeTreeReadPool::BackoffSettings(settings),
+        };

+        auto read_type = input_order_info->direction == 1
+                       ? ReadFromMergeTree::ReadType::InOrder
+                       : ReadFromMergeTree::ReadType::InReverseOrder;
+
+        auto plan = std::make_unique<QueryPlan>();
+        auto step = std::make_unique<ReadFromMergeTree>(
+            data, metadata_snapshot, query_id,
+            column_names, std::move(new_parts), std::move(index_stats), query_info.prewhere_info, virt_columns,
+            step_settings, num_streams, read_type);
+
+        plan->addStep(std::move(step));
        plans.emplace_back(std::move(plan));
    }

@ -1371,6 +1354,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(

 QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
    RangesInDataParts && parts,
+    ReadFromMergeTree::IndexStatPtr index_stats,
    size_t num_streams,
    const Names & column_names,
    const StorageMetadataPtr & metadata_snapshot,
@ -1412,7 +1396,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
        num_streams = settings.max_final_threads;

    /// If setting do_not_merge_across_partitions_select_final is true than we won't merge parts from different partitions.
-    /// We have all parts in parts vector, where parts with same partition are nerby.
+    /// We have all parts in parts vector, where parts with same partition are nearby.
    /// So we will store iterators pointed to the beginning of each partition range (and parts.end()),
    /// then we will create a pipe for each partition that will run selecting processor and merging processor
    /// for the parts with this partition. In the end we will unite all the pipes.
@ -1451,7 +1435,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
        QueryPlanPtr plan;

        {
-            Pipes pipes;
+            RangesInDataParts new_parts;

            /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
            /// with level > 0 then we won't postprocess this part and if num_streams > 1 we
@ -1470,36 +1454,35 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
            {
                for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it)
                {
-                    auto source_processor = std::make_shared<MergeTreeSelectProcessor>(
-                        data,
-                        metadata_snapshot,
-                        part_it->data_part,
-                        max_block_size,
-                        settings.preferred_block_size_bytes,
-                        settings.preferred_max_column_in_block_size_bytes,
-                        column_names,
-                        part_it->ranges,
-                        use_uncompressed_cache,
-                        query_info.prewhere_info,
-                        true,
-                        reader_settings,
-                        virt_columns,
-                        part_it->part_index_in_query);
-
-                    pipes.emplace_back(std::move(source_processor));
+                    new_parts.emplace_back(part_it->data_part, part_it->part_index_in_query, part_it->ranges);
                }
            }

-            if (pipes.empty())
+            if (new_parts.empty())
                continue;

-            auto pipe = Pipe::unitePipes(std::move(pipes));
+            ReadFromMergeTree::Settings step_settings
+            {
+                .max_block_size = max_block_size,
+                .preferred_block_size_bytes = settings.preferred_block_size_bytes,
+                .preferred_max_column_in_block_size_bytes = settings.preferred_max_column_in_block_size_bytes,
+                .min_marks_for_concurrent_read = 0, /// this setting is not used for reading in order
+                .use_uncompressed_cache = use_uncompressed_cache,
+                .reader_settings = reader_settings,
+                .backoff_settings = MergeTreeReadPool::BackoffSettings(settings),
+            };
+
+            plan = std::make_unique<QueryPlan>();
+            auto step = std::make_unique<ReadFromMergeTree>(
+                data, metadata_snapshot, query_id,
+                column_names, std::move(new_parts), std::move(index_stats), query_info.prewhere_info, virt_columns,
+                step_settings, num_streams, ReadFromMergeTree::ReadType::InOrder);
+
+            plan->addStep(std::move(step));

            /// Drop temporary columns, added by 'sorting_key_expr'
            if (!out_projection)
-                out_projection = createProjection(pipe.getHeader());
-
-            plan = createPlanFromPipe(std::move(pipe), query_id, data, "with final");
+                out_projection = createProjection(plan->getCurrentDataStream().header);
        }

        auto expression_step = std::make_unique<ExpressionStep>(
@ -1546,7 +1529,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(

    if (!lonely_parts.empty())
    {
-        Pipes pipes;
+        RangesInDataParts new_parts;

        size_t num_streams_for_lonely_parts = num_streams * lonely_parts.size();

@ -1561,41 +1544,28 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal(
        if (sum_marks_in_lonely_parts < num_streams_for_lonely_parts * min_marks_for_concurrent_read && lonely_parts.size() < num_streams_for_lonely_parts)
            num_streams_for_lonely_parts = std::max((sum_marks_in_lonely_parts + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, lonely_parts.size());

-
-        MergeTreeReadPoolPtr pool = std::make_shared<MergeTreeReadPool>(
-            num_streams_for_lonely_parts,
-            sum_marks_in_lonely_parts,
-            min_marks_for_concurrent_read,
-            std::move(lonely_parts),
-            data,
-            metadata_snapshot,
-            query_info.prewhere_info,
-            true,
-            column_names,
-            MergeTreeReadPool::BackoffSettings(settings),
-            settings.preferred_block_size_bytes,
-            false);
-
-        LOG_DEBUG(log, "Reading approx. {} rows with {} streams", total_rows_in_lonely_parts, num_streams_for_lonely_parts);
-
-        for (size_t i = 0; i < num_streams_for_lonely_parts; ++i)
+        ReadFromMergeTree::Settings step_settings
        {
-            auto source = std::make_shared<MergeTreeThreadSelectBlockInputProcessor>(
-                i, pool, min_marks_for_concurrent_read, max_block_size,
-                settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes,
-                data, metadata_snapshot, use_uncompressed_cache,
-                query_info.prewhere_info, reader_settings, virt_columns);
+            .max_block_size = max_block_size,
+            .preferred_block_size_bytes = settings.preferred_block_size_bytes,
+            .preferred_max_column_in_block_size_bytes = settings.preferred_max_column_in_block_size_bytes,
+            .min_marks_for_concurrent_read = min_marks_for_concurrent_read,
+            .use_uncompressed_cache = use_uncompressed_cache,
+            .reader_settings = reader_settings,
+            .backoff_settings = MergeTreeReadPool::BackoffSettings(settings),
+        };

-            pipes.emplace_back(std::move(source));
-        }
+        auto plan = std::make_unique<QueryPlan>();
+        auto step = std::make_unique<ReadFromMergeTree>(
+            data, metadata_snapshot, query_id,
+            column_names, std::move(lonely_parts), std::move(index_stats), query_info.prewhere_info, virt_columns,
+            step_settings, num_streams_for_lonely_parts, ReadFromMergeTree::ReadType::Default);

-        auto pipe = Pipe::unitePipes(std::move(pipes));
+        plan->addStep(std::move(step));

        /// Drop temporary columns, added by 'sorting_key_expr'
        if (!out_projection)
-            out_projection = createProjection(pipe.getHeader());
-
-        QueryPlanPtr plan = createPlanFromPipe(std::move(pipe), query_id, data, "with final");
+            out_projection = createProjection(plan->getCurrentDataStream().header);

        auto expression_step = std::make_unique<ExpressionStep>(
            plan->getCurrentDataStream(),
@ -1896,7 +1866,8 @@ void MergeTreeDataSelectExecutor::selectPartsToRead(
    const std::optional<KeyCondition> & minmax_idx_condition,
    const DataTypes & minmax_columns_types,
    std::optional<PartitionPruner> & partition_pruner,
-    const PartitionIdToMaxBlock * max_block_numbers_to_read)
+    const PartitionIdToMaxBlock * max_block_numbers_to_read,
+    PartFilterCounters & counters)
 {
    auto prev_parts = parts;
    parts.clear();
@ -1909,22 +1880,35 @@ void MergeTreeDataSelectExecutor::selectPartsToRead(
        if (part->isEmpty())
            continue;

+        if (max_block_numbers_to_read)
+        {
+            auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
+            if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second)
+                continue;
+        }
+
+        size_t num_granules = part->getMarksCount();
+        if (num_granules && part->index_granularity.hasFinalMark())
+            --num_granules;
+
+        counters.num_initial_selected_parts += 1;
+        counters.num_initial_selected_granules += num_granules;
+
        if (minmax_idx_condition && !minmax_idx_condition->checkInHyperrectangle(
                part->minmax_idx.hyperrectangle, minmax_columns_types).can_be_true)
            continue;

+        counters.num_parts_after_minmax += 1;
+        counters.num_granules_after_minmax += num_granules;
+
        if (partition_pruner)
        {
            if (partition_pruner->canBePruned(part))
                continue;
        }

-        if (max_block_numbers_to_read)
-        {
-            auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
-            if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second)
-                continue;
-        }
+        counters.num_parts_after_partition_pruner += 1;
+        counters.num_granules_after_partition_pruner += num_granules;

        parts.push_back(part);
    }
@ -1937,7 +1921,8 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
    const DataTypes & minmax_columns_types,
    std::optional<PartitionPruner> & partition_pruner,
    const PartitionIdToMaxBlock * max_block_numbers_to_read,
-    ContextPtr query_context) const
+    ContextPtr query_context,
+    PartFilterCounters & counters) const
 {
    /// process_parts prepare parts that have to be read for the query,
    /// returns false if duplicated parts' UUID have been met
@ -1957,17 +1942,6 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
            if (part->isEmpty())
                continue;

-            if (minmax_idx_condition
-                && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, minmax_columns_types)
-                        .can_be_true)
-                continue;
-
-            if (partition_pruner)
-            {
-                if (partition_pruner->canBePruned(part))
-                    continue;
-            }
-
            if (max_block_numbers_to_read)
            {
                auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id);
@ -1975,13 +1949,37 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
                    continue;
            }

+            /// Skip the part if its uuid is meant to be excluded
+            if (part->uuid != UUIDHelpers::Nil && ignored_part_uuids->has(part->uuid))
+                continue;
+
+            size_t num_granules = part->getMarksCount();
+            if (num_granules && part->index_granularity.hasFinalMark())
+                --num_granules;
+
+            counters.num_initial_selected_parts += 1;
+            counters.num_initial_selected_granules += num_granules;
+
+            if (minmax_idx_condition
+                && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx.hyperrectangle, minmax_columns_types)
+                        .can_be_true)
+                continue;
+
+            counters.num_parts_after_minmax += 1;
+            counters.num_granules_after_minmax += num_granules;
+
+            if (partition_pruner)
+            {
+                if (partition_pruner->canBePruned(part))
+                    continue;
+            }
+
+            counters.num_parts_after_partition_pruner += 1;
+            counters.num_granules_after_partition_pruner += num_granules;
+
            /// populate UUIDs and exclude ignored parts if enabled
            if (part->uuid != UUIDHelpers::Nil)
            {
-                /// Skip the part if its uuid is meant to be excluded
-                if (ignored_part_uuids->has(part->uuid))
-                    continue;
-
                auto result = temp_part_uuids.insert(part->uuid);
                if (!result.second)
                    throw Exception("Found a part with the same UUID on the same replica.", ErrorCodes::LOGICAL_ERROR);
@ -2013,6 +2011,8 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter(
    {
        LOG_DEBUG(log, "Found duplicate uuids locally, will retry part selection without them");

+        counters = PartFilterCounters();
+
        /// Second attempt didn't help, throw an exception
        if (!select_parts(parts))
            throw Exception("Found duplicate UUIDs while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS);
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@ -5,6 +5,7 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/RangesInDataPart.h>
 #include <Storages/MergeTree/PartitionPruner.h>
+#include <Processors/QueryPlan/ReadFromMergeTree.h>


 namespace DB
@ -57,6 +58,7 @@ private:

    QueryPlanPtr spreadMarkRangesAmongStreams(
        RangesInDataParts && parts,
+        ReadFromMergeTree::IndexStatPtr index_stats,
        size_t num_streams,
        const Names & column_names,
        const StorageMetadataPtr & metadata_snapshot,
@ -71,6 +73,7 @@ private:
    /// out_projection - save projection only with columns, requested to read
    QueryPlanPtr spreadMarkRangesAmongStreamsWithOrder(
        RangesInDataParts && parts,
+        ReadFromMergeTree::IndexStatPtr index_stats,
        size_t num_streams,
        const Names & column_names,
        const StorageMetadataPtr & metadata_snapshot,
@ -86,6 +89,7 @@ private:

    QueryPlanPtr spreadMarkRangesAmongStreamsFinal(
        RangesInDataParts && parts,
+        ReadFromMergeTree::IndexStatPtr index_stats,
        size_t num_streams,
        const Names & column_names,
        const StorageMetadataPtr & metadata_snapshot,
@ -123,6 +127,16 @@ private:
        size_t & granules_dropped,
        Poco::Logger * log);

+    struct PartFilterCounters
+    {
+        size_t num_initial_selected_parts = 0;
+        size_t num_initial_selected_granules = 0;
+        size_t num_parts_after_minmax = 0;
+        size_t num_granules_after_minmax = 0;
+        size_t num_parts_after_partition_pruner = 0;
+        size_t num_granules_after_partition_pruner = 0;
+    };
+
    /// Select the parts in which there can be data that satisfy `minmax_idx_condition` and that match the condition on `_part`,
    ///  as well as `max_block_number_to_read`.
    static void selectPartsToRead(
@ -131,7 +145,8 @@ private:
        const std::optional<KeyCondition> & minmax_idx_condition,
        const DataTypes & minmax_columns_types,
        std::optional<PartitionPruner> & partition_pruner,
-        const PartitionIdToMaxBlock * max_block_numbers_to_read);
+        const PartitionIdToMaxBlock * max_block_numbers_to_read,
+        PartFilterCounters & counters);

    /// Same as previous but also skip parts uuids if any to the query context, or skip parts which uuids marked as excluded.
    void selectPartsToReadWithUUIDFilter(
@ -141,7 +156,8 @@ private:
        const DataTypes & minmax_columns_types,
        std::optional<PartitionPruner> & partition_pruner,
        const PartitionIdToMaxBlock * max_block_numbers_to_read,
-        ContextPtr query_context) const;
+        ContextPtr query_context,
+        PartFilterCounters & counters) const;
 };

 }
--- a/src/Storages/MergeTree/MergeTreeReadPool.h
+++ b/src/Storages/MergeTree/MergeTreeReadPool.h
@ -100,7 +100,7 @@ private:

    const MergeTreeData & data;
    StorageMetadataPtr metadata_snapshot;
-    Names column_names;
+    const Names column_names;
    bool do_not_steal_tasks;
    bool predict_block_size_bytes;
    std::vector<NameSet> per_part_column_name_set;
--- a/src/Storages/MergeTree/PartitionPruner.h
+++ b/src/Storages/MergeTree/PartitionPruner.h
@ -32,6 +32,8 @@ public:
    bool canBePruned(const DataPartPtr & part);

    bool isUseless() const { return useless; }
+
+    const KeyCondition & getKeyCondition() const { return partition_condition; }
 };

 }
--- a/Show More
+++ b/Show More