diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh
index 15590902b68..0c11e0a615d 100755
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# shellcheck disable=SC2086,SC2001
+# shellcheck disable=SC2086,SC2001,SC2046
 
 set -eux
 set -o pipefail
@@ -13,24 +13,48 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 echo "$script_dir"
 repo_dir=ch
 BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"}
+BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
 
 function clone
 {
-    # The download() function is dependent on CI binaries anyway, so we can take
-    # the repo from the CI as well. For local runs, start directly from the "fuzz"
-    # stage.
-    rm -rf ch ||:
-    mkdir ch ||:
-    wget -nv -nd -c "https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz"
-    tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz
+    # For local runs, start directly from the "fuzz" stage.
+    rm -rf "$repo_dir" ||:
+    mkdir "$repo_dir" ||:
+
+    git clone --depth 1 https://github.com/ClickHouse/ClickHouse.git -- "$repo_dir" 2>&1 | ts '%Y-%m-%d %H:%M:%S'
+    (
+        cd "$repo_dir"
+        if [ "$PR_TO_TEST" != "0" ]; then
+            if git fetch --depth 1 origin "+refs/pull/$PR_TO_TEST/merge"; then
+                git checkout FETCH_HEAD
+                echo "Checked out pull/$PR_TO_TEST/merge ($(git rev-parse FETCH_HEAD))"
+            else
+                git fetch --depth 1 origin "+refs/pull/$PR_TO_TEST/head"
+                git checkout "$SHA_TO_TEST"
+                echo "Checked out nominal SHA $SHA_TO_TEST for PR $PR_TO_TEST"
+            fi
+            git diff --name-only master HEAD | tee ci-changed-files.txt
+        else
+            if [ -v COMMIT_SHA ]; then
+                git fetch --depth 2 origin "$SHA_TO_TEST"
+                git checkout "$SHA_TO_TEST"
+                echo "Checked out nominal SHA $SHA_TO_TEST for master"
+            else
+                git fetch --depth 2 origin
+                echo "Using default repository head $(git rev-parse HEAD)"
+            fi
+            git diff --name-only HEAD~1 HEAD | tee ci-changed-files.txt
+        fi
+        cd -
+    )
+
     ls -lath ||:
+
 }
 
 function download
 {
-    wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse" &
-    wget -nv -nd -c "https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/repo/ci-changed-files.txt" &
-    wait
+    wget -nv -nd -c "$BINARY_URL_TO_DOWNLOAD"
 
     chmod +x clickhouse
     ln -s ./clickhouse ./clickhouse-server
@@ -113,7 +137,7 @@ function fuzz
 
     # Obtain the list of newly added tests. They will be fuzzed in more extreme way than other tests.
     # Don't overwrite the NEW_TESTS_OPT so that it can be set from the environment.
-    NEW_TESTS="$(sed -n 's!\(^tests/queries/0_stateless/.*\.sql\(\.j2\)\?\)$!ch/\1!p' ci-changed-files.txt | sort -R)"
+    NEW_TESTS="$(sed -n 's!\(^tests/queries/0_stateless/.*\.sql\(\.j2\)\?\)$!ch/\1!p' $repo_dir/ci-changed-files.txt | sort -R)"
     # ci-changed-files.txt contains also files that has been deleted/renamed, filter them out.
     NEW_TESTS="$(filter_exists_and_template $NEW_TESTS)"
     if [[ -n "$NEW_TESTS" ]]
diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile
index 88b66d42ecb..73d9454ab7f 100644
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@@ -33,7 +33,7 @@ RUN apt-get update \
             tzdata \
             vim \
             wget \
-    && pip3 --no-cache-dir install 'git+https://github.com/mymarilyn/clickhouse-driver.git' scipy \
+    && pip3 --no-cache-dir install 'clickhouse-driver==0.2.1' scipy \
     && apt-get purge --yes python3-dev g++ \
     && apt-get autoremove --yes \
     && apt-get clean \
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index c97e8a6ed2b..38595d47528 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -196,7 +196,6 @@ function run_tests
         test_files=$(ls "$test_prefix" | grep "$CHPC_TEST_GREP" | xargs -I{} -n1 readlink -f "$test_prefix/{}")
     elif [ "$PR_TO_TEST" -ne 0 ] \
         && [ "$(wc -l < changed-test-definitions.txt)" -gt 0 ] \
-        && [ "$(wc -l < changed-test-scripts.txt)" -eq 0 ] \
         && [ "$(wc -l < other-changed-files.txt)" -eq 0 ]
     then
         # If only the perf tests were changed in the PR, we will run only these
@@ -208,15 +207,15 @@ function run_tests
         test_files=$(ls "$test_prefix"/*.xml)
     fi
 
-    # For PRs w/o changes in test definitons and scripts, test only a subset of
-    # queries, and run them less times. If the corresponding environment variables
-    # are already set, keep those values.
-    if [ "$PR_TO_TEST" -ne 0 ] \
-        && [ "$(wc -l < changed-test-definitions.txt)" -eq 0 ] \
-        && [ "$(wc -l < changed-test-scripts.txt)" -eq 0 ]
+    # For PRs w/o changes in test definitons, test only a subset of queries,
+    # and run them less times. If the corresponding environment variables are
+    # already set, keep those values.
+    #
+    # NOTE: too high CHPC_RUNS/CHPC_MAX_QUERIES may hit internal CI timeout.
+    if [ "$PR_TO_TEST" -ne 0 ] && [ "$(wc -l < changed-test-definitions.txt)" -eq 0 ]
     then
         CHPC_RUNS=${CHPC_RUNS:-7}
-        CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-20}
+        CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-10}
     else
         CHPC_RUNS=${CHPC_RUNS:-13}
         CHPC_MAX_QUERIES=${CHPC_MAX_QUERIES:-0}
@@ -319,14 +318,14 @@ function get_profiles
 
     wait
 
-    clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type = 'QueryFinish' format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
+    clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
     clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: &
     clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
     clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
     clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: &
     clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: &
 
-    clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type = 'QueryFinish' format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
+    clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
     clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: &
     clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
     clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
index a9ae31bf38c..093834943a3 100644
--- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
+++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml
@@ -24,6 +24,13 @@
 
             <!-- Don't fail some prewarm queries too early -->
             <timeout_before_checking_execution_speed>60</timeout_before_checking_execution_speed>
+
+            <!-- Query profiler enabled only for prewarm queries explicitly (see perf.py)
+                 This is needed for flamegraphs.  -->
+            <query_profiler_real_time_period_ns>0</query_profiler_real_time_period_ns>
+            <query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
+            <!-- Disable memory profiler too, since due to max_untracked_memory some queries may add trace entry and some may not -->
+            <memory_profiler_step>0</memory_profiler_step>
         </default>
     </profiles>
     <users>
diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh
index 1295e5567fb..d87b95b1129 100755
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@@ -102,7 +102,6 @@ then
     base=$(git -C right/ch merge-base pr origin/master)
     git -C right/ch diff --name-only "$base" pr -- . | tee all-changed-files.txt
     git -C right/ch diff --name-only "$base" pr -- tests/performance | tee changed-test-definitions.txt
-    git -C right/ch diff --name-only "$base" pr -- docker/test/performance-comparison | tee changed-test-scripts.txt
     git -C right/ch diff --name-only "$base" pr -- :!tests/performance :!docker/test/performance-comparison | tee other-changed-files.txt
 fi
 
diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py
index a6e7e397e32..301c5cc7d73 100755
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@@ -283,8 +283,11 @@ for query_index in queries_to_run:
                 #   test coverage. We disable profiler for normal runs because
                 #   it makes the results unstable.
                 res = c.execute(q, query_id = prewarm_id,
-                    settings = {'max_execution_time': args.max_query_seconds,
-                        'query_profiler_real_time_period_ns': 10000000})
+                    settings = {
+                        'max_execution_time': args.max_query_seconds,
+                        'query_profiler_real_time_period_ns': 10000000,
+                        'memory_profiler_step': '4Mi',
+                    })
             except clickhouse_driver.errors.Error as e:
                 # Add query id to the exception to make debugging easier.
                 e.args = (prewarm_id, *e.args)
diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md
index 6dd53fffb86..f04d4102138 100644
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@@ -170,5 +170,7 @@ toc_title: Adopters
 | <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
 | <a href="https://promo.croc.ru/digitalworker" class="favicon">Цифровой Рабочий</a> | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) |
 | <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
+| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
+| <a href="https://www.deepl.com/" class="favicon">Deepl</a> | Machine Learning | — | — | — | [Video, October 2021](https://www.youtube.com/watch?v=WIYJiPwxXdM&t=1182s) |
 
 [Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->
diff --git a/docs/en/operations/performance-test.md b/docs/en/operations/performance-test.md
index 2880793962a..a220575cb3c 100644
--- a/docs/en/operations/performance-test.md
+++ b/docs/en/operations/performance-test.md
@@ -23,7 +23,7 @@ chmod a+x ./hardware.sh
 ./hardware.sh
 ```
 
-3. Copy the output and send it to clickhouse-feedback@yandex-team.com
+3. Copy the output and send it to feedback@clickhouse.com
 
 All the results are published here: https://clickhouse.com/benchmark/hardware/
 
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index cdf49678570..8bb50144180 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -69,6 +69,8 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
 </compression>
 ```
 
+<!--
+
 ## encryption {#server-settings-encryption}
 
 Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). Key (or keys) should be written in environment variables or set in the configuration file.
@@ -131,7 +133,7 @@ Also, users can add nonce that must be 12 bytes long (by default encryption and
 ```xml
 <encryption_codecs>
     <aes_128_gcm_siv>
-        <nonce>0123456789101</nonce>
+        <nonce>012345678910</nonce>
     </aes_128_gcm_siv>
 </encryption_codecs>
 ```
@@ -148,6 +150,8 @@ Or it can be set in hex:
 
 Everything mentioned above can be applied for `aes_256_gcm_siv` (but the key must be 32 bytes long).
 
+-->
+
 ## custom_settings_prefixes {#custom_settings_prefixes}
 
 List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas.
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 8eb6101a605..ff47aa96502 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -1751,9 +1751,11 @@ Do not merge aggregation states from different servers for distributed query pro
 
 Possible values:
 
--   0 — Disabled (final query processing is done on the initiator node).
--   1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
--   2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
+-   `0` — Disabled (final query processing is done on the initiator node).
+-   `1` - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
+-   `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
+
+Default value: `0`
 
 **Example**
 
@@ -1784,29 +1786,27 @@ FORMAT PrettyCompactMonoBlock
 └───────┘
 ```
 
-Default value: 0
+## distributed_push_down_limit {#distributed-push-down-limit}
 
-## distributed_push_down_limit (#distributed-push-down-limit}
-
-LIMIT will be applied on each shard separatelly.
+Enables or disables [LIMIT](#limit) applying on each shard separatelly.
 
 This will allow to avoid:
+-  Sending extra rows over network;
+-  Processing rows behind the limit on the initiator.
 
-- sending extra rows over network,
-- processing rows behind the limit on the initiator.
-
-It is possible if at least one of the following conditions met:
-
-- `distributed_group_by_no_merge` > 0
-- query **does not have `GROUP BY`/`DISTINCT`/`LIMIT BY`**, but it has `ORDER BY`/`LIMIT`.
-- query **has `GROUP BY`/`DISTINCT`/`LIMIT BY`** with `ORDER BY`/`LIMIT` and:
-  - `optimize_skip_unused_shards_limit` is enabled
-  - `optimize_distributed_group_by_sharding_key` is enabled
+Starting from 21.9 version you cannot get inaccurate results anymore, since `distributed_push_down_limit` changes query execution only if at least one of the conditions met:
+-  [distributed_group_by_no_merge](#distributed-group-by-no-merge) > 0.
+-  Query **does not have** `GROUP BY`/`DISTINCT`/`LIMIT BY`, but it has `ORDER BY`/`LIMIT`.
+-  Query **has** `GROUP BY`/`DISTINCT`/`LIMIT BY` with `ORDER BY`/`LIMIT` and:
+    -  [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled.
+    -  [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) is enabled.
 
 Possible values:
 
--  0 - Disabled
--  1 - Enabled
+-  0 — Disabled.
+-  1 — Enabled.
+
+Default value: `1`.
 
 See also:
 
@@ -1920,6 +1920,7 @@ Default value: 0
 See also:
 
 -   [distributed_group_by_no_merge](#distributed-group-by-no-merge)
+-   [distributed_push_down_limit](#distributed-push-down-limit)
 -   [optimize_skip_unused_shards](#optimize-skip-unused-shards)
 
 !!! note "Note"
@@ -3831,6 +3832,21 @@ Default value: `0`.
 
 -   [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting
 
+## describe_include_subcolumns {#describe_include_subcolumns}
+
+Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md#finding-null) or an [Array](../../sql-reference/data-types/array.md#array-size) data type.
+
+Possible values:
+
+-   0 — Subcolumns are not included in `DESCRIBE` queries.
+-   1 — Subcolumns are included in `DESCRIBE` queries.
+
+Default value: `0`.
+
+**Example**
+
+See an example for the [DESCRIBE](../../sql-reference/statements/describe-table.md) statement.
+
 ## async_insert {#async-insert}
 
 Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts.
diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md
index d669b1c8b32..f8736bcc61a 100644
--- a/docs/en/sql-reference/functions/geo/s2.md
+++ b/docs/en/sql-reference/functions/geo/s2.md
@@ -2,13 +2,13 @@
 toc_title: S2 Geometry
 ---
 
-# Functions for Working with S2 Index {#s2Index}
+# Functions for Working with S2 Index {#s2index}
 
 [S2](https://s2geometry.io/) is a geographical indexing system where all geographical data is represented on a three-dimensional sphere (similar to a globe).
 
-In the S2 library points are represented as unit length vectors called S2 point indices (points on the surface of a three dimensional unit sphere) as opposed to traditional (latitude, longitude) pairs.
+In the S2 library points are represented as the S2 Index - a specific number which encodes internally a point on the surface of a unit sphere, unlike traditional (latitude, longitude) pairs. To get the S2 point index for a given point specified in the format (latitude, longitude) use the [geoToS2](#geotos2) function. Also, you can use the [s2ToGeo](#s2togeo) function for getting geographical coordinates corresponding to the specified S2 point index.
 
-## geoToS2 {#geoToS2}
+## geoToS2 {#geotos2}
 
 Returns [S2](#s2index) point index corresponding to the provided coordinates `(longitude, latitude)`.
 
@@ -34,7 +34,7 @@ Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 Query:
 
 ``` sql
-SELECT geoToS2(37.79506683, 55.71290588) as s2Index;
+SELECT geoToS2(37.79506683, 55.71290588) AS s2Index;
 ```
 
 Result:
@@ -45,7 +45,7 @@ Result:
 └─────────────────────┘
 ```
 
-## s2ToGeo {#s2ToGeo}
+## s2ToGeo {#s2togeo}
 
 Returns geo coordinates `(longitude, latitude)` corresponding to the provided [S2](#s2index) point index.
 
@@ -57,20 +57,20 @@ s2ToGeo(s2index)
 
 **Arguments**
 
--   `s2Index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
 -   A tuple consisting of two values: `tuple(lon,lat)`.
 
-Type: `lon` - [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md).
+Type: `lon` — [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
 Query:
 
 ``` sql
-SELECT s2ToGeo(4704772434919038107) as s2Coodrinates;
+SELECT s2ToGeo(4704772434919038107) AS s2Coodrinates;
 ```
 
 Result:
@@ -81,9 +81,9 @@ Result:
 └──────────────────────────────────────┘
 ```
 
-## s2GetNeighbors {#s2GetNeighbors}
+## s2GetNeighbors {#s2getneighbors}
 
-Returns S2 neighbor indices corresponding to the provided [S2](#s2index)). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors. 
+Returns S2 neighbor indixes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors. 
 
 **Syntax**
 
@@ -97,16 +97,16 @@ s2GetNeighbors(s2index)
 
 **Returned values**
 
--   An array consisting of the 4 neighbor indices: `array[s2index1, s2index3, s2index2, s2index4]`. 
+-   An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`.
 
-Type: Each S2 index is [UInt64](../../../sql-reference/data-types/int-uint.md).
+Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
 Query:
 
 ``` sql
- select  s2GetNeighbors(5074766849661468672) AS s2Neighbors;
+SELECT s2GetNeighbors(5074766849661468672) AS s2Neighbors;
 ```
 
 Result:
@@ -117,9 +117,9 @@ Result:
 └───────────────────────────────────────────────────────────────────────────────────┘
 ```
 
-## s2CellsIntersect {#s2CellsIntersect}
+## s2CellsIntersect {#s2cellsintersect}
 
-Determines if the two provided [S2](#s2index)) cell indices intersect or not.
+Determines if the two provided [S2](#s2index) cells intersect or not.
 
 **Syntax**
 
@@ -133,8 +133,8 @@ s2CellsIntersect(s2index1, s2index2)
 
 **Returned values**
 
--   1 — If the S2 cell indices intersect.
--   0 — If the S2 cell indices don't intersect.
+-   1 — If the cells intersect.
+-   0 — If the cells don't intersect.
 
 Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
 
@@ -143,7 +143,7 @@ Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
 Query:
 
 ``` sql
- select s2CellsIntersect(9926595209846587392, 9926594385212866560) as intersect;
+SELECT s2CellsIntersect(9926595209846587392, 9926594385212866560) AS intersect;
 ```
 
 Result:
@@ -154,11 +154,9 @@ Result:
 └───────────┘
 ```
 
-## s2CapContains {#s2CapContains}
+## s2CapContains {#s2capcontains}
 
-A cap represents a portion of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
-
-Determines if a cap contains a s2 point index.
+Determines if a cap contains a S2 point. A cap represents a part of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
 
 **Syntax**
 
@@ -168,9 +166,9 @@ s2CapContains(center, degrees, point)
 
 **Arguments**
 
--  `center`  - S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md).
- - `degrees` - Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md).
- - `point`   - S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `center` — S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md).
+-   `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
@@ -184,7 +182,7 @@ Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
 Query:
 
 ``` sql
-select s2CapContains(1157339245694594829, 1.0, 1157347770437378819) as capContains;
+SELECT s2CapContains(1157339245694594829, 1.0, 1157347770437378819) AS capContains;
 ```
 
 Result:
@@ -195,11 +193,9 @@ Result:
 └─────────────┘
 ```
 
-## s2CapUnion {#s2CapUnion}
+## s2CapUnion {#s2capunion}
 
-A cap represents a portion of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
-
-Determines the smallest cap that contains the given two input caps.
+Determines the smallest cap that contains the given two input caps. A cap represents a portion of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
 
 **Syntax**
 
@@ -209,13 +205,13 @@ s2CapUnion(center1, radius1, center2, radius2)
 
 **Arguments**
 
--  `center1`, `center2` - S2 point indices corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
- - `radius1`, `radius2` - Radii of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
+-   `center1`, `center2` — S2 point indixes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
 
 **Returned values**
 
--  `center` - S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- - `radius` - Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md).
+-   `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `radius` — Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -233,11 +229,9 @@ Result:
 └────────────────────────────────────────┘
 ```
 
-## s2RectAdd{#s2RectAdd}
+## s2RectAdd {#s2rectadd}
 
-In the S2 system, a rectangle is represented by a type of S2Region called a S2LatLngRect that represents a rectangle in latitude-longitude space.
-
-Increases the size of the bounding rectangle to include the given S2 point index.
+Increases the size of the bounding rectangle to include the given S2 point. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
 
 **Syntax**
 
@@ -247,21 +241,21 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point)
 
 **Arguments**
 
-- `s2PointLow`  - Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2PointHigh` - High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2Point`     - Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
--  `s2PointLow`  - Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- - `s2PointHigh` - Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
+-   `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2PointHigh` — Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
 Query:
 
 ``` sql
-SELECT s2RectAdd(5178914411069187297, 5177056748191934217, 5179056748191934217) as rectAdd;
+SELECT s2RectAdd(5178914411069187297, 5177056748191934217, 5179056748191934217) AS rectAdd;
 ```
 
 Result:
@@ -272,11 +266,9 @@ Result:
 └───────────────────────────────────────────┘
 ```
 
-## s2RectContains{#s2RectContains}
+## s2RectContains {#s2rectcontains}
 
-In the S2 system, a rectangle is represented by a type of S2Region called a S2LatLngRect that represents a rectangle in latitude-longitude space.
-
-Determines if a given rectangle contains a S2 point index.
+Determines if a given rectangle contains a S2 point. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
 
 **Syntax**
 
@@ -286,9 +278,9 @@ s2RectContains(s2PointLow, s2PointHi, s2Point)
 
 **Arguments**
 
-- `s2PointLow`  - Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2PointHigh` - High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2Point`     - Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
@@ -300,7 +292,7 @@ s2RectContains(s2PointLow, s2PointHi, s2Point)
 Query:
 
 ``` sql
-SELECT s2RectContains(5179062030687166815, 5177056748191934217, 5177914411069187297) AS rectContains
+SELECT s2RectContains(5179062030687166815, 5177056748191934217, 5177914411069187297) AS rectContains;
 ```
 
 Result:
@@ -311,11 +303,9 @@ Result:
 └──────────────┘
 ```
 
-## s2RectUinion{#s2RectUnion}
+## s2RectUinion {#s2rectunion}
 
-In the S2 system, a rectangle is represented by a type of S2Region called a S2LatLngRect that represents a rectangle in latitude-longitude space.
-
-Returns the smallest rectangle containing the union of this rectangle and the given rectangle.
+Returns the smallest rectangle containing the union of this rectangle and the given rectangle. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
 
 **Syntax**
 
@@ -325,20 +315,20 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
 
 **Arguments**
 
-- `s2Rect1PointLow`, `s2Rect1PointHi` - Low and High S2 point indices corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2Rect2PointLow`, `s2Rect2PointHi` - Low and High S2 point indices corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- `s2UnionRect2PointLow` - Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2UnionRect2PointHi` -  High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
 Query:
 
 ``` sql
-SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectUnion
+SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectUnion;
 ```
 
 Result:
@@ -349,9 +339,9 @@ Result:
 └───────────────────────────────────────────┘
 ```
 
-## s2RectIntersection{#s2RectIntersection}
+## s2RectIntersection {#s2rectintersection}
 
-Returns the smallest Rectangle containing the intersection of this rectangle and the given rectangle.
+Returns the smallest rectangle containing the intersection of this rectangle and the given rectangle. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
 
 **Syntax**
 
@@ -361,20 +351,20 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin
 
 **Arguments**
 
-- `s2Rect1PointLow`, `s2Rect1PointHi` - Low and High S2 point indices corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2Rect2PointLow`, `s2Rect2PointHi` - Low and High S2 point indices corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- `s2UnionRect2PointLow` - Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2UnionRect2PointHi` -  Hi S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
 Query:
 
 ``` sql
-SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectIntersection
+SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectIntersection;
 ```
 
 Result:
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index 46ce350377c..843cb16f572 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -22,7 +22,7 @@ map(key1, value1[, key2, value2, ...])
 
 **Returned value**
 
--  Data structure as `key:value` pairs.
+-   Data structure as `key:value` pairs.
 
 Type: [Map(key, value)](../../sql-reference/data-types/map.md).
 
@@ -165,9 +165,6 @@ Result:
 ## mapPopulateSeries {#function-mappopulateseries}
 
 Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array.
-Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
-
-For array arguments the number of elements in `keys` and `values` must be the same for each row.
 
 **Syntax**
 
@@ -178,12 +175,17 @@ mapPopulateSeries(map[, max])
 
 Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
 
+For array arguments the number of elements in `keys` and `values` must be the same for each row.
+
 **Arguments**
 
+Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
+
 Mapped arrays:
 
 -   `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
 -   `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
+-   `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges).
 
 or
 
@@ -191,14 +193,14 @@ or
 
 **Returned value**
 
--  Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
+-   Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
 
 **Example**
 
 Query with mapped arrays:
 
 ```sql
-select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
+SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type;
 ```
 
 Result:
diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md
index bc197bf0f72..823a31ed313 100644
--- a/docs/en/sql-reference/statements/describe-table.md
+++ b/docs/en/sql-reference/statements/describe-table.md
@@ -3,18 +3,67 @@ toc_priority: 42
 toc_title: DESCRIBE
 ---
 
-# DESCRIBE TABLE Statement {#misc-describe-table}
+# DESCRIBE TABLE {#misc-describe-table}
+
+Returns information about table columns.
+
+**Syntax**
 
 ``` sql
 DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
 ```
 
-Returns the following `String` type columns:
+The `DESCRIBE` statement returns a row for each table column with the following [String](../../sql-reference/data-types/string.md) values:
 
--   `name` — Column name.
--   `type`— Column type.
--   `default_type` — Clause that is used in [default expression](../../sql-reference/statements/create/table.md#create-default-values) (`DEFAULT`, `MATERIALIZED` or `ALIAS`). Column contains an empty string, if the default expression isn’t specified.
--   `default_expression` — Value specified in the `DEFAULT` clause.
--   `comment_expression` — Comment text.
+-   `name` — A column name.
+-   `type` — A column type.
+-   `default_type` — A clause that is used in the column [default expression](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` or `ALIAS`. If there is no default expression, then empty string is returned.
+-   `default_expression` — An expression specified after the `DEFAULT` clause.
+-   `comment` — A [column comment](../../sql-reference/statements/alter/column.md#alter_comment-column).
+-   `codec_expression` — A [codec](../../sql-reference/statements/create/table.md#codecs) that is applied to the column.
+-   `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression.
+-   `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
 
-Nested data structures are output in “expanded” format. Each column is shown separately, with the name after a dot.
+All columns in [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot.
+
+To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. 
+
+**Example**
+
+Query:
+
+``` sql
+CREATE TABLE describe_example (
+    id UInt64, text String DEFAULT 'unknown' CODEC(ZSTD),
+    user Tuple (name String, age UInt8)
+) ENGINE = MergeTree() ORDER BY id;
+
+DESCRIBE TABLE describe_example;
+DESCRIBE TABLE describe_example SETTINGS describe_include_subcolumns=1;
+```
+
+Result:
+
+``` text
+┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
+│ id   │ UInt64                        │              │                    │         │                  │                │
+│ text │ String                        │ DEFAULT      │ 'unknown'          │         │ ZSTD(1)          │                │
+│ user │ Tuple(name String, age UInt8) │              │                    │         │                  │                │
+└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
+```
+
+The second query additionally shows subcolumns:
+
+``` text
+┌─name──────┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
+│ id        │ UInt64                        │              │                    │         │                  │                │            0 │
+│ text      │ String                        │ DEFAULT      │ 'unknown'          │         │ ZSTD(1)          │                │            0 │
+│ user      │ Tuple(name String, age UInt8) │              │                    │         │                  │                │            0 │
+│ user.name │ String                        │              │                    │         │                  │                │            1 │
+│ user.age  │ UInt8                         │              │                    │         │                  │                │            1 │
+└───────────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┴──────────────┘
+```
+
+**See Also**
+
+-   [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 6d5bcda8452..e5efa657620 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -1705,6 +1705,32 @@ ClickHouse генерирует исключение
 
 Значение по умолчанию: 0.
 
+## distributed_push_down_limit {#distributed-push-down-limit}
+
+Включает или отключает [LIMIT](#limit), применяемый к каждому шарду по отдельности. 
+
+Это позволяет избежать:
+- отправки дополнительных строк по сети;
+- обработки строк за пределами ограничения для инициатора.
+
+Начиная с версии 21.9 вы больше не сможете получить неточные результаты, так как `distributed_push_down_limit` изменяет выполнение запроса только в том случае, если выполнено хотя бы одно из условий:
+- `distributed_group_by_no_merge` > 0.
+- запрос **не содержит** `GROUP BY`/`DISTINCT`/`LIMIT BY`, но содержит `ORDER BY`/`LIMIT`.
+- запрос **содержит** `GROUP BY`/`DISTINCT`/`LIMIT BY` с `ORDER BY`/`LIMIT` и:
+  - включена настройка [optimize_skip_unused_shards](#optimize-skip-unused-shards).
+  - включена настройка `optimize_distributed_group_by_sharding_key`.
+
+Возможные значения:
+
+-    0 — выключена.
+-    1 — включена.
+
+Значение по умолчанию: `1`.
+
+См. также:
+
+-   [optimize_skip_unused_shards](#optimize-skip-unused-shards)
+
 ## optimize_skip_unused_shards {#optimize-skip-unused-shards}
 
 Включает или отключает пропуск неиспользуемых шардов для запросов [SELECT](../../sql-reference/statements/select/index.md) , в которых условие ключа шардирования задано в секции `WHERE/PREWHERE`. Предполагается, что данные распределены с помощью ключа шардирования, в противном случае запрос выдаст неверный результат.
@@ -3641,6 +3667,21 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
 
 -   настройка [optimize_move_to_prewhere](#optimize_move_to_prewhere)
 
+## describe_include_subcolumns {#describe_include_subcolumns}
+
+Включает или отключает описание подстолбцов при выполнении запроса [DESCRIBE](../../sql-reference/statements/describe-table.md). Настройка действует, например, на элементы [Tuple](../../sql-reference/data-types/tuple.md) или подстолбцы типов [Map](../../sql-reference/data-types/map.md#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md#finding-null) или [Array](../../sql-reference/data-types/array.md#array-size).
+
+Возможные значения:
+
+-   0 — подстолбцы не включаются в результат запросов `DESCRIBE`.
+-   1 — подстолбцы включаются в результат запросов `DESCRIBE`.
+
+Значение по умолчанию: `0`.
+
+**Пример**
+
+Смотрите пример запроса [DESCRIBE](../../sql-reference/statements/describe-table.md).
+
 ## async_insert {#async-insert}
 
 Включает или отключает асинхронные вставки. Работает только для вставок по протоколу HTTP. Обратите внимание, что при таких вставках дедупликация не производится.
diff --git a/docs/ru/sql-reference/functions/geo/s2.md b/docs/ru/sql-reference/functions/geo/s2.md
new file mode 100644
index 00000000000..6b801e1d08f
--- /dev/null
+++ b/docs/ru/sql-reference/functions/geo/s2.md
@@ -0,0 +1,376 @@
+--- 
+toc_title: "Функции для работы с индексами S2"
+---
+
+# Функции для работы с индексами S2 {#s2index}
+
+[S2](https://s2geometry.io/) — это система геокодирования, в которой все географические данные представлены на трехмерной сфере (аналогично глобусу).
+
+В библиотеке S2 точки представлены в виде индекса S2 — определенного числа, которое внутренне кодирует точку на поверхности трехмерной единичной сферы, в отличие от традиционных пар (широта, долгота). Чтобы получить индекс S2 для точки, заданной в формате (широта, долгота), используйте функцию [geoToS2](#geotools2). Также вы можете использовать функцию [s2togeo](#s2togeo) для получения географических координат, соответствующих заданному S2 индексу точки.
+
+## geoToS2 {#geotos2}
+
+Возвращает [S2](#s2index) индекс точки, соответствующий заданным координатам в формате `(долгота, широта)`.
+
+**Синтаксис**
+
+``` sql
+geoToS2(lon, lat)
+```
+
+**Аргументы**
+
+-   `lon` — долгота. [Float64](../../../sql-reference/data-types/float.md).
+-   `lat` — широта. [Float64](../../../sql-reference/data-types/float.md).
+
+**Возвращаемое значение**
+
+-  S2 индекс точки.
+
+Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT geoToS2(37.79506683, 55.71290588) AS s2Index;
+```
+
+Результат:
+
+``` text
+┌─────────────s2Index─┐
+│ 4704772434919038107 │
+└─────────────────────┘
+```
+
+## s2ToGeo {#s2togeo}
+
+Возвращает географические координаты `(долгота, широта)`, соответствующие заданному [S2](#s2index) индексу точки.
+
+**Синтаксис**
+
+``` sql
+s2ToGeo(s2index)
+```
+
+**Аргументы**
+
+-   `s2index` — [S2](#s2index) индекс. [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   Кортеж их двух значений: `tuple(lon,lat)`.
+
+Тип: `lon` — [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT s2ToGeo(4704772434919038107) AS s2Coodrinates;
+```
+
+Результат:
+
+``` text
+┌─s2Coodrinates────────────────────────┐
+│ (37.79506681471008,55.7129059052841) │
+└──────────────────────────────────────┘
+```
+
+## s2GetNeighbors {#s2getneighbors}
+
+Возвращает [S2](#s2index) индексы ячеек, которые являются соседними для заданного S2 индекса. Ячейка в системе S2 представляет собой прямоугольник, ограниченный четырьмя сторонами. Соответственно, у каждой ячейки есть 4 соседние ячейки.
+
+**Синтаксис**
+
+``` sql
+s2GetNeighbors(s2index)
+```
+
+**Аргументы**
+
+-   `s2index` — [S2](#s2index) индекс. [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   Массив, содержащий 4 значения — S2 индекса соседних ячеек: `array[s2index1, s2index3, s2index2, s2index4]`.
+
+Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT s2GetNeighbors(5074766849661468672) AS s2Neighbors;
+```
+
+Результат:
+
+``` text
+┌─s2Neighbors───────────────────────────────────────────────────────────────────────┐
+│ [5074766987100422144,5074766712222515200,5074767536856236032,5074767261978329088] │
+└───────────────────────────────────────────────────────────────────────────────────┘
+```
+
+## s2CellsIntersect {#s2cellsintersect}
+
+Проверяет, пересекаются ли две заданные ячейки или нет.
+
+**Синтаксис**
+
+``` sql
+s2CellsIntersect(s2index1, s2index2)
+```
+
+**Аргументы**
+
+-   `siIndex1`, `s2index2` — S2 индексы первой и второй ячейки. [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   1 — ячейки пересекаются.
+-   0 — ячейки не пересекаются.
+
+Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT s2CellsIntersect(9926595209846587392, 9926594385212866560) AS intersect;
+```
+
+Результат:
+
+``` text
+┌─intersect─┐
+│         1 │
+└───────────┘
+```
+
+## s2CapContains {#s2capcontains}
+
+Определяет, содержит ли заданный купол указанную точку. Купол представляет собой часть сферы, которая была отрезана плоскостью. Купол задается точкой на сфере и радиусом в градусах.
+
+**Синтаксис**
+
+``` sql
+s2CapContains(center, degrees, point)
+```
+
+**Аргументы**
+
+-   `center` — S2 индекс точки, определяющей центр купола. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `degrees` — радиус купола в градусах. [Float64](../../../sql-reference/data-types/float.md).
+-   `point` — S2 индекс проверяемой точки. [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   1 — купол содержит точку.
+-   0 — купол не содержит точку.
+
+Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT s2CapContains(1157339245694594829, 1.0, 1157347770437378819) AS capContains;
+```
+
+Результат:
+
+``` text
+┌─capContains─┐
+│           1 │
+└─────────────┘
+```
+
+## s2CapUnion {#s2capunion}
+
+Определяет наименьший купол, содержащий два заданных купола. Купол представляет собой часть сферы, которая была отрезана плоскостью. Купол задается точкой на сфере и радиусом в градусах.
+
+**Синтаксис**
+
+``` sql
+s2CapUnion(center1, radius1, center2, radius2)
+```
+
+**Аргументы**
+
+-   `center1`, `center2` — S2 индексы точек, определяющие два центра куполов. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `radius1`, `radius2` — значения радиусов в градусах, определяющие два радиуса куполов. [Float64](../../../sql-reference/data-types/float.md).
+
+**Возвращаемые значения**
+
+-   `center` — S2 индекс точки, соответствующий центру наименьшего купола, содержащего заданные купола. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `radius` — радиус в градусах наименьшего купола, содержащего заданные купола. Тип: [Float64](../../../sql-reference/data-types/float.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT s2CapUnion(3814912406305146967, 1.0, 1157347770437378819, 1.0) AS capUnion;
+```
+
+Результат:
+
+``` text
+┌─capUnion───────────────────────────────┐
+│ (4534655147792050737,60.2088283994957) │
+└────────────────────────────────────────┘
+```
+
+## s2RectAdd {#s2rectadd}
+
+Увеличивает размер ограничивающего прямоугольника, чтобы включить в себя точку, заданную S2 индексом. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
+
+**Синтаксис**
+
+``` sql
+s2RectAdd(s2pointLow, s2pointHigh, s2Point)
+```
+
+**Аргументы**
+
+-   `s2PointLow` — S2 индекс нижней точки, которая задает ограничиваюший прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2PointHigh` — S2 индекс верхний точки, которая задает ограничиваюший прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2Point` — S2 индекс целевой точки, которая будет содержаться увеличенным ограничивающим прямоугольником. [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   `s2PointLow` — идентификатор нижней S2 ячейки, соответствующий увеличенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2PointHigh` — идентификатор верхней S2 ячейки, соответствующий увеличенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/float.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT s2RectAdd(5178914411069187297, 5177056748191934217, 5179056748191934217) AS rectAdd;
+```
+
+Результат:
+
+``` text
+┌─rectAdd───────────────────────────────────┐
+│ (5179062030687166815,5177056748191934217) │
+└───────────────────────────────────────────┘
+```
+
+## s2RectContains {#s2rectcontains}
+
+Проверяет, содержит ли заданный прямоугольник указанную S2 точку. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
+
+**Синтаксис**
+
+``` sql
+s2RectContains(s2PointLow, s2PointHi, s2Point)
+```
+
+**Аргументы**
+
+-   `s2PointLow` — S2 индекс самой низкой точки, которая задает прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2PointHigh` — S2 индекс самой высокой точки, которая задает прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2Point` — S2 индекс проверяемой точки. [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   1 — прямоугольник содержит заданную точку.
+-   0 — прямоугольник не содержит заданную точку.
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT s2RectContains(5179062030687166815, 5177056748191934217, 5177914411069187297) AS rectContains;
+```
+
+Результат:
+
+``` text
+┌─rectContains─┐
+│            0 │
+└──────────────┘
+```
+
+## s2RectUinion {#s2rectunion}
+
+Возвращает наименьший прямоугольник, содержащий объединение двух заданных прямоугольников. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
+
+**Синтаксис**
+
+``` sql
+s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
+```
+
+**Аргументы**
+
+-   `s2Rect1PointLow`, `s2Rect1PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают первый прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2Rect2PointLow`, `s2Rect2PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают второй прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   `s2UnionRect2PointLow` — идентификатор нижней ячейки, соответствующей объединенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2UnionRect2PointHi` — идентификатор  верхней ячейки, соответствующей объединенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectUnion;
+```
+
+Результат:
+
+``` text
+┌─rectUnion─────────────────────────────────┐
+│ (5179062030687166815,5177056748191934217) │
+└───────────────────────────────────────────┘
+```
+
+## s2RectIntersection {#s2rectintersection}
+
+Возвращает наименьший прямоугольник, содержащий пересечение двух заданных прямоугольников. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
+
+**Синтаксис**
+
+``` sql
+s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
+```
+
+**Аргументы**
+
+-   `s2Rect1PointLow`, `s2Rect1PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают первый прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2Rect2PointLow`, `s2Rect2PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают второй прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   `s2UnionRect2PointLow` — идентификатор нижней ячейки, соответствующей результирующему прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
+-   `s2UnionRect2PointHi` — идентификатор верхней ячейки, соответствующей результирующему прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectIntersection;
+```
+
+Результат:
+
+``` text
+┌─rectIntersection──────────────────────────┐
+│ (5178914411069187297,5177056748191934217) │
+└───────────────────────────────────────────┘
+```
diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md
index e4cc1fefab4..45a5018500f 100644
--- a/docs/ru/sql-reference/functions/tuple-map-functions.md
+++ b/docs/ru/sql-reference/functions/tuple-map-functions.md
@@ -108,7 +108,7 @@ SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTy
 SELECT mapAdd(map(1,1), map(1,1));
 ```
 
-Result:
+Результат:
 
 ```text
 ┌─mapAdd(map(1, 1), map(1, 1))─┐
@@ -128,13 +128,13 @@ mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
 
 **Аргументы**
 
-Аргументами являются [кортежи](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array), где элементы в первом массиве представляют ключи, а второй массив содержит значения для каждого ключа.
+Аргументами являются контейнеры [Map](../../sql-reference/data-types/map.md) или [кортежи](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array), где элементы в первом массиве представляют ключи, а второй массив содержит значения для каждого ключа.
 Все массивы ключей должны иметь один и тот же тип, а все массивы значений должны содержать элементы, которые можно приводить к одному типу ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) или [Float64](../../sql-reference/data-types/float.md#float32-float64)).
 Общий приведенный тип используется в качестве типа для результирующего массива.
 
 **Возвращаемое значение**
 
--   Возвращает один [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), в котором первый массив содержит отсортированные ключи, а второй - значения.
+-   В зависимости от аргумента возвращает один [Map](../../sql-reference/data-types/map.md) или [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2), в котором первый массив содержит отсортированные ключи, а второй — значения.
 
 **Пример**
 
@@ -152,6 +152,20 @@ SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt3
 └────────────────┴───────────────────────────────────┘
 ```
 
+Запрос с контейнером `Map`:
+
+```sql
+SELECT mapSubtract(map(1,1), map(1,1));
+```
+
+Результат:
+
+```text
+┌─mapSubtract(map(1, 1), map(1, 1))─┐
+│ {1:0}                             │
+└───────────────────────────────────┘
+```
+
 ## mapPopulateSeries {#function-mappopulateseries}
 
 Заполняет недостающие ключи в контейнере map (пара массивов ключей и значений), где ключи являются целыми числами. Кроме того, он поддерживает указание максимального ключа, который используется для расширения массива ключей.
@@ -160,6 +174,7 @@ SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt3
 
 ``` sql
 mapPopulateSeries(keys, values[, max])
+mapPopulateSeries(map[, max])
 ```
 
 Генерирует контейнер map, где ключи - это серия чисел, от минимального до максимального ключа (или аргумент `max`, если он указан), взятых из массива `keys` с размером шага один, и соответствующие значения, взятые из массива `values`. Если значение не указано для ключа, то в результирующем контейнере используется значение по умолчанию.
@@ -168,19 +183,28 @@ mapPopulateSeries(keys, values[, max])
 
 **Аргументы**
 
--   `keys` — массив ключей [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
+Аргументами являются контейнер [Map](../../sql-reference/data-types/map.md) или два [массива](../../sql-reference/data-types/array.md#data-type-array), где первый массив представляет ключи, а второй массив содержит значения для каждого ключа.
+
+Сопоставленные массивы:
+
+-   `keys` — массив ключей. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
 -   `values` — массив значений. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
+-   `max` — максимальное значение ключа. Необязательный параметр. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges).
+
+или
+
+-   `map` — контейнер `Map` с целочисленными ключами. [Map](../../sql-reference/data-types/map.md).
 
 **Возвращаемое значение**
 
--  Возвращает [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array): ключи отсортированные по порядку и значения соответствующих ключей.
+-   В зависимости от аргумента возвращает контейнер [Map](../../sql-reference/data-types/map.md) или [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array): ключи отсортированные по порядку и значения соответствующих ключей.
 
 **Пример**
 
-Запрос:
+Запрос с сопоставленными массивами:
 
 ```sql
-select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
+SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type;
 ```
 
 Результат:
@@ -191,6 +215,20 @@ select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type
 └──────────────────────────────┴───────────────────────────────────┘
 ```
 
+Запрос с контейнером `Map`:
+
+```sql
+SELECT mapPopulateSeries(map(1, 10, 5, 20), 6);
+```
+
+Результат:
+
+```text
+┌─mapPopulateSeries(map(1, 10, 5, 20), 6)─┐
+│ {1:10,2:0,3:0,4:0,5:20,6:0}             │
+└─────────────────────────────────────────┘
+```
+
 ## mapContains {#mapcontains}
 
 Определяет, содержит ли контейнер `map` ключ `key`.
@@ -319,4 +357,3 @@ SELECT mapValues(a) FROM test;
 │ ['twelve','6.0'] │
 └──────────────────┘
 ```
-
diff --git a/docs/ru/sql-reference/statements/describe-table.md b/docs/ru/sql-reference/statements/describe-table.md
index c66dbb66521..56c778f7c76 100644
--- a/docs/ru/sql-reference/statements/describe-table.md
+++ b/docs/ru/sql-reference/statements/describe-table.md
@@ -3,21 +3,66 @@ toc_priority: 42
 toc_title: DESCRIBE
 ---
 
-# DESCRIBE TABLE Statement {#misc-describe-table}
+# DESCRIBE TABLE {#misc-describe-table}
+
+Возвращает описание столбцов таблицы.
+
+**Синтаксис**
 
 ``` sql
 DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
 ```
 
-Возвращает описание столбцов таблицы.
+Запрос `DESCRIBE` для каждого столбца таблицы возвращает строку со следующими значениями типа [String](../../sql-reference/data-types/string.md):
 
-Результат запроса содержит столбцы (все столбцы имеют тип String):
-
--   `name` — имя столбца таблицы;
--   `type`— тип столбца;
--   `default_type` — в каком виде задано [выражение для значения по умолчанию](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` или `ALIAS`. Столбец содержит пустую строку, если значение по умолчанию не задано.
+-   `name` — имя столбца;
+-   `type` — тип столбца;
+-   `default_type` — вид [выражения для значения по умолчанию](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` или `ALIAS`. Если значение по умолчанию не задано, то возвращается пустая строка;
 -   `default_expression` — значение, заданное в секции `DEFAULT`;
--   `comment_expression` — комментарий к столбцу.
+-   `comment` — [комментарий](../../sql-reference/statements/alter/column.md#alter_comment-column);
+-   `codec_expression` — [кодек](../../sql-reference/statements/create/table.md#codecs), который применяется к столбцу;
+-   `ttl_expression` — выражение [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl);
+-   `is_subcolumn` — флаг, который равен `1` для внутренних подстолбцов. Он появляется в результате, только если описание подстолбцов разрешено настройкой [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns).
 
-Вложенные структуры данных выводятся в «развёрнутом» виде. То есть, каждый столбец - по отдельности, с именем через точку.
+Каждый столбец [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) структур описывается отдельно. Перед его именем ставится имя родительского столбца с точкой.
+Чтобы отобразить внутренние подстолбцы других типов данных, нужно включить настройку [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns). 
 
+**Пример**
+
+Запрос:
+
+``` sql
+CREATE TABLE describe_example (
+    id UInt64, text String DEFAULT 'unknown' CODEC(ZSTD),
+    user Tuple (name String, age UInt8)
+) ENGINE = MergeTree() ORDER BY id;
+
+DESCRIBE TABLE describe_example;
+DESCRIBE TABLE describe_example SETTINGS describe_include_subcolumns=1;
+```
+
+Результат:
+
+``` text
+┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
+│ id   │ UInt64                        │              │                    │         │                  │                │
+│ text │ String                        │ DEFAULT      │ 'unknown'          │         │ ZSTD(1)          │                │
+│ user │ Tuple(name String, age UInt8) │              │                    │         │                  │                │
+└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
+```
+
+Второй запрос дополнительно выводит информацию о подстолбцах:
+
+``` text
+┌─name──────┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
+│ id        │ UInt64                        │              │                    │         │                  │                │            0 │
+│ text      │ String                        │ DEFAULT      │ 'unknown'          │         │ ZSTD(1)          │                │            0 │
+│ user      │ Tuple(name String, age UInt8) │              │                    │         │                  │                │            0 │
+│ user.name │ String                        │              │                    │         │                  │                │            1 │
+│ user.age  │ UInt8                         │              │                    │         │                  │                │            1 │
+└───────────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┴──────────────┘
+```
+
+**См. также**
+
+-   настройка [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns).
diff --git a/src/Bridge/LibraryBridgeHelper.cpp b/src/Bridge/LibraryBridgeHelper.cpp
index e5c6c09ba62..bd0604ec8e0 100644
--- a/src/Bridge/LibraryBridgeHelper.cpp
+++ b/src/Bridge/LibraryBridgeHelper.cpp
@@ -258,6 +258,7 @@ Pipe LibraryBridgeHelper::loadBase(const Poco::URI & uri, ReadWriteBufferFromHTT
         0,
         Poco::Net::HTTPBasicCredentials{},
         DBMS_DEFAULT_BUFFER_SIZE,
+        getContext()->getReadSettings(),
         ReadWriteBufferFromHTTP::HTTPHeaderEntries{});
 
     auto source = FormatFactory::instance().getInput(LibraryBridgeHelper::DEFAULT_FORMAT, *read_buf_ptr, sample_block, getContext(), DEFAULT_BLOCK_SIZE);
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9e1f25c21d9..87e6cc86d94 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -77,6 +77,7 @@ add_headers_and_sources(clickhouse_common_io IO)
 add_headers_and_sources(clickhouse_common_io IO/S3)
 list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp)
 
+add_headers_and_sources(dbms Disks/IO)
 if (USE_SQLITE)
     add_headers_and_sources(dbms Databases/SQLite)
 endif()
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 44adbccc089..ee44b9eb927 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -250,6 +250,17 @@
     M(S3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to S3 storage.") \
     M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \
     \
+    M(RemoteFSReadMicroseconds, "Time of reading from remote filesystem.") \
+    M(RemoteFSReadBytes, "Read bytes from remote filesystem.") \
+    \
+    M(RemoteFSSeeks, "Total number of seeks for async buffer") \
+    M(RemoteFSPrefetches, "Number of prefetches made with asynchronous reading from remote filesystem") \
+    M(RemoteFSCancelledPrefetches, "Number of cancelled prefecthes (because of seek)") \
+    M(RemoteFSUnusedPrefetches, "Number of prefetches pending at buffer destruction") \
+    M(RemoteFSPrefetchedReads, "Number of reads from prefecthed buffer") \
+    M(RemoteFSUnprefetchedReads, "Number of reads from unprefetched buffer") \
+    M(RemoteFSBuffers, "Number of buffers created for asynchronous reading from remote filesystem") \
+    \
     M(SleepFunctionCalls, "Number of times a sleep function (sleep, sleepEachRow) has been called.") \
     M(SleepFunctionMicroseconds, "Time spent sleeping due to a sleep function call.") \
     \
diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp
index 4a583773b4b..c19e854dd45 100644
--- a/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/src/Compression/CachedCompressedReadBuffer.cpp
@@ -28,6 +28,12 @@ void CachedCompressedReadBuffer::initInput()
 }
 
 
+void CachedCompressedReadBuffer::prefetch()
+{
+    file_in->prefetch();
+}
+
+
 bool CachedCompressedReadBuffer::nextImpl()
 {
     /// Let's check for the presence of a decompressed block in the cache, grab the ownership of this block, if it exists.
diff --git a/src/Compression/CachedCompressedReadBuffer.h b/src/Compression/CachedCompressedReadBuffer.h
index bb24f699eed..16770e343cc 100644
--- a/src/Compression/CachedCompressedReadBuffer.h
+++ b/src/Compression/CachedCompressedReadBuffer.h
@@ -33,8 +33,11 @@ private:
     UncompressedCache::MappedPtr owned_cell;
 
     void initInput();
+
     bool nextImpl() override;
 
+    void prefetch() override;
+
     /// Passed into file_in.
     ReadBufferFromFileBase::ProfileCallback profile_callback;
     clockid_t clock_type {};
@@ -55,6 +58,18 @@ public:
         profile_callback = profile_callback_;
         clock_type = clock_type_;
     }
+
+    void setReadUntilPosition(size_t position) override
+    {
+        if (file_in)
+            file_in->setReadUntilPosition(position);
+    }
+
+    void setReadUntilEnd() override
+    {
+        if (file_in)
+            file_in->setReadUntilEnd();
+    }
 };
 
 }
diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp
index b6da105cd88..1a70b27e9f4 100644
--- a/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/src/Compression/CompressedReadBufferFromFile.cpp
@@ -44,12 +44,6 @@ bool CompressedReadBufferFromFile::nextImpl()
 }
 
 
-void CompressedReadBufferFromFile::prefetch()
-{
-    file_in.prefetch();
-}
-
-
 CompressedReadBufferFromFile::CompressedReadBufferFromFile(std::unique_ptr<ReadBufferFromFileBase> buf, bool allow_different_codecs_)
     : BufferWithOwnMemory<ReadBuffer>(0), p_file_in(std::move(buf)), file_in(*p_file_in)
 {
@@ -58,6 +52,12 @@ CompressedReadBufferFromFile::CompressedReadBufferFromFile(std::unique_ptr<ReadB
 }
 
 
+void CompressedReadBufferFromFile::prefetch()
+{
+    file_in.prefetch();
+}
+
+
 void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block)
 {
     /// Nothing to do if we already at required position
diff --git a/src/Compression/CompressedReadBufferFromFile.h b/src/Compression/CompressedReadBufferFromFile.h
index 8b33c8db7a5..2be8ce372c2 100644
--- a/src/Compression/CompressedReadBufferFromFile.h
+++ b/src/Compression/CompressedReadBufferFromFile.h
@@ -42,10 +42,11 @@ private:
     /* size_t nextimpl_working_buffer_offset; */
 
     bool nextImpl() override;
+
     void prefetch() override;
 
 public:
-    CompressedReadBufferFromFile(std::unique_ptr<ReadBufferFromFileBase> buf, bool allow_different_codecs_ = false);
+    explicit CompressedReadBufferFromFile(std::unique_ptr<ReadBufferFromFileBase> buf, bool allow_different_codecs_ = false);
 
     /// Seek is lazy in some sense. We move position in compressed file_in to offset_in_compressed_file, but don't
     /// read data into working_buffer and don't shit our position to offset_in_decompressed_block. Instead
@@ -58,6 +59,10 @@ public:
     {
         file_in.setProfileCallback(profile_callback_, clock_type_);
     }
+
+    void setReadUntilPosition(size_t position) override { file_in.setReadUntilPosition(position); }
+
+    void setReadUntilEnd() override { file_in.setReadUntilEnd(); }
 };
 
 }
diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp
index 96d264fea1e..c3f14fe1449 100644
--- a/src/Compression/CompressionCodecEncrypted.cpp
+++ b/src/Compression/CompressionCodecEncrypted.cpp
@@ -1,3 +1,4 @@
+#include <string_view>
 #include <Common/config.h>
 #include <Common/Exception.h>
 #include <base/types.h>
@@ -81,9 +82,11 @@ namespace ErrorCodes
 
 namespace
 {
-constexpr size_t tag_size        = 16;   /// AES-GCM-SIV always uses a tag of 16 bytes length
-constexpr size_t key_id_max_size = 8;    /// Max size of varint.
-constexpr size_t nonce_max_size  = 13;   /// Nonce size and one byte to show if nonce in in text
+constexpr size_t tag_size          = 16;   /// AES-GCM-SIV always uses a tag of 16 bytes length
+constexpr size_t key_id_max_size   = 8;    /// Max size of varint.
+constexpr size_t nonce_max_size    = 13;   /// Nonce size and one byte to show if nonce in in text
+constexpr size_t actual_nonce_size = 12;   /// Nonce actual size
+const String empty_nonce = {"\0\0\0\0\0\0\0\0\0\0\0\0", actual_nonce_size};
 
 /// Get encryption/decryption algorithms.
 auto getMethod(EncryptionMethod Method)
@@ -137,7 +140,7 @@ size_t encrypt(const std::string_view & plaintext, char * ciphertext_and_tag, En
     EVP_AEAD_CTX_zero(&encrypt_ctx);
     const int ok_init = EVP_AEAD_CTX_init(&encrypt_ctx, getMethod(method)(),
                                             reinterpret_cast<const uint8_t*>(key.data()), key.size(),
-                                            16 /* tag size */, nullptr);
+                                            tag_size, nullptr);
     if (!ok_init)
         throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
 
@@ -145,7 +148,7 @@ size_t encrypt(const std::string_view & plaintext, char * ciphertext_and_tag, En
     size_t out_len;
     const int ok_open = EVP_AEAD_CTX_seal(&encrypt_ctx,
                                             reinterpret_cast<uint8_t *>(ciphertext_and_tag),
-                                            &out_len, plaintext.size() + 16,
+                                            &out_len, plaintext.size() + tag_size,
                                             reinterpret_cast<const uint8_t *>(nonce.data()), nonce.size(),
                                             reinterpret_cast<const uint8_t *>(plaintext.data()), plaintext.size(),
                                             nullptr, 0);
@@ -167,7 +170,7 @@ size_t decrypt(const std::string_view & ciphertext, char * plaintext, Encryption
 
     const int ok_init = EVP_AEAD_CTX_init(&decrypt_ctx, getMethod(method)(),
                                           reinterpret_cast<const uint8_t*>(key.data()), key.size(),
-                                          16 /* tag size */, nullptr);
+                                          tag_size, nullptr);
     if (!ok_init)
         throw Exception(lastErrorString(), ErrorCodes::OPENSSL_ERROR);
 
@@ -221,7 +224,7 @@ inline char* writeNonce(const String& nonce, char* dest)
 {
     /// If nonce consists of nul bytes, it shouldn't be in dest. Zero byte is the only byte that should be written.
     /// Otherwise, 1 is written and data from nonce is copied
-    if (nonce != String("\0\0\0\0\0\0\0\0\0\0\0\0", 12))
+    if (nonce != empty_nonce)
     {
         *dest = 1;
         ++dest;
@@ -246,15 +249,15 @@ inline const char* readNonce(String& nonce, const char* source)
     /// If first is zero byte: move source and set zero-bytes nonce
     if (!*source)
     {
-        nonce = {"\0\0\0\0\0\0\0\0\0\0\0\0", 12};
+        nonce = empty_nonce;
         return ++source;
     }
     /// Move to next byte. Nonce will begin from there
     ++source;
 
     /// Otherwise, use data from source in nonce
-    nonce = {source, 12};
-    source += 12;
+    nonce = {source, actual_nonce_size};
+    source += actual_nonce_size;
     return source;
 }
 
@@ -332,14 +335,14 @@ void CompressionCodecEncrypted::Configuration::loadImpl(
     if (!new_params->keys_storage[method].contains(new_params->current_key_id[method]))
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not found a key with the current ID {}", new_params->current_key_id[method]);
 
-    /// Read nonce (in hex or in string). Its length should be 12 bytes.
+    /// Read nonce (in hex or in string). Its length should be 12 bytes (actual_nonce_size).
     if (config.has(config_prefix + ".nonce_hex"))
         new_params->nonce[method] = unhexKey(config.getString(config_prefix + ".nonce_hex"));
     else
         new_params->nonce[method] = config.getString(config_prefix + ".nonce", "");
 
-    if (new_params->nonce[method].size() != 12 && !new_params->nonce[method].empty())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got nonce with unexpected size {}, the size should be 12", new_params->nonce[method].size());
+    if (new_params->nonce[method].size() != actual_nonce_size && !new_params->nonce[method].empty())
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got nonce with unexpected size {}, the size should be {}", new_params->nonce[method].size(), actual_nonce_size);
 }
 
 bool CompressionCodecEncrypted::Configuration::tryLoad(const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
@@ -399,7 +402,7 @@ void CompressionCodecEncrypted::Configuration::getCurrentKeyAndNonce(EncryptionM
     /// This will lead to data loss.
     nonce = current_params->nonce[method];
     if (nonce.empty())
-        nonce = {"\0\0\0\0\0\0\0\0\0\0\0\0", 12};
+        nonce = empty_nonce;
 }
 
 String CompressionCodecEncrypted::Configuration::getKey(EncryptionMethod method, const UInt64 & key_id) const
@@ -448,8 +451,10 @@ UInt32 CompressionCodecEncrypted::getMaxCompressedDataSize(UInt32 uncompressed_s
 
 UInt32 CompressionCodecEncrypted::doCompressData(const char * source, UInt32 source_size, char * dest) const
 {
-    // Generate an IV out of the data block and the key-generation
-    // key. It is completely deterministic, but does not leak any
+    // Nonce, key and plaintext will be used to generate authentication tag
+    // and message encryption key. AES-GCM-SIV authenticates the encoded additional data and plaintext.
+    // For this purpose message_authentication_key is used.
+    // Algorithm is completely deterministic, but does not leak any
     // information about the data block except for equivalence of
     // identical blocks (under the same key).
 
@@ -470,8 +475,7 @@ UInt32 CompressionCodecEncrypted::doCompressData(const char * source, UInt32 sou
     char* ciphertext = writeNonce(nonce, ciphertext_with_nonce);
     UInt64 nonce_size = ciphertext - ciphertext_with_nonce;
 
-    // The IV will be used as an authentication tag. The ciphertext and the
-    // tag will be written directly in the dest buffer.
+    // The ciphertext and the authentication tag will be written directly in the dest buffer.
     size_t out_len = encrypt(plaintext, ciphertext, encryption_method, current_key, nonce);
 
     /// Length of encrypted text should be equal to text length plus tag_size (which was added by algorithm).
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 7aade8e2d0f..e6f685a0650 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -75,7 +75,6 @@ class IColumn;
     M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
     M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
     M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
-    M(UInt64, http_max_single_read_retries, 4, "The maximum number of retries during single http read.", 0) \
     M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \
     M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
     M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \
@@ -508,6 +507,7 @@ class IColumn;
     M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
     \
     M(String, local_filesystem_read_method, "pread", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
+    M(String, remote_filesystem_read_method, "read", "Method of reading data from remote filesystem, one of: read, read_threadpool.", 0) \
     M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
     M(Bool, remote_filesystem_read_prefetch, true, "Should use prefetching when reading data from remote filesystem.", 0) \
     M(Int64, read_priority, 0, "Priority to read data from local filesystem. Only supported for 'pread_threadpool' method.", 0) \
@@ -520,7 +520,7 @@ class IColumn;
     M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \
     M(Milliseconds, async_insert_stale_timeout_ms, 0, "Maximum time to wait before dumping collected data per query since the last data appeared. Zero means no timeout at all", 0) \
     \
-    M(Int64, remote_fs_read_backoff_threshold, 10000, "Max wait time when trying to read data for remote disk", 0) \
+    M(Int64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \
     M(Int64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \
     \
     M(Bool, force_remove_data_recursively_on_drop, false, "Recursively remove data on DROP query. Avoids 'Directory not empty' error, but may silently remove detached data", 0) \
diff --git a/src/Dictionaries/DictionarySource.cpp b/src/Dictionaries/DictionarySource.cpp
index c3f0ecf3cde..fa25dab6115 100644
--- a/src/Dictionaries/DictionarySource.cpp
+++ b/src/Dictionaries/DictionarySource.cpp
@@ -7,7 +7,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
-    extern const int UNSUPPORTED_METHOD;
+    extern const int NO_SUCH_COLUMN_IN_TABLE;
 }
 
 bool DictionarySourceCoordinator::getKeyColumnsNextRangeToRead(ColumnsWithTypeAndName & key_columns, ColumnsWithTypeAndName & data_columns)
@@ -69,12 +69,6 @@ void DictionarySourceCoordinator::initialize(const Names & column_names)
                     }
                 }
             }
-            else
-            {
-                throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "No such column name {} in dictionary {}",
-                    column_name,
-                    dictionary->getDictionaryID().getNameForLogs());
-            }
         }
         else
         {
@@ -86,6 +80,11 @@ void DictionarySourceCoordinator::initialize(const Names & column_names)
             column_with_type.type = attribute.type;
         }
 
+        if (!column_with_type.type)
+            throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "No such column name {} in dictionary {}",
+                column_name,
+                dictionary->getDictionaryID().getNameForLogs());
+
         column_with_type.column = column_with_type.type->createColumn();
         columns_with_type.emplace_back(std::move(column_with_type));
     }
diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp
index 26ebde36f7d..29f503fc160 100644
--- a/src/Dictionaries/HTTPDictionarySource.cpp
+++ b/src/Dictionaries/HTTPDictionarySource.cpp
@@ -101,6 +101,7 @@ Pipe HTTPDictionarySource::loadAll()
         0,
         credentials,
         DBMS_DEFAULT_BUFFER_SIZE,
+        context->getReadSettings(),
         configuration.header_entries);
 
     return createWrappedBuffer(std::move(in_ptr));
@@ -119,6 +120,7 @@ Pipe HTTPDictionarySource::loadUpdatedAll()
         0,
         credentials,
         DBMS_DEFAULT_BUFFER_SIZE,
+        context->getReadSettings(),
         configuration.header_entries);
 
     return createWrappedBuffer(std::move(in_ptr));
@@ -146,6 +148,7 @@ Pipe HTTPDictionarySource::loadIds(const std::vector<UInt64> & ids)
         0,
         credentials,
         DBMS_DEFAULT_BUFFER_SIZE,
+        context->getReadSettings(),
         configuration.header_entries);
 
     return createWrappedBuffer(std::move(in_ptr));
@@ -173,6 +176,7 @@ Pipe HTTPDictionarySource::loadKeys(const Columns & key_columns, const std::vect
         0,
         credentials,
         DBMS_DEFAULT_BUFFER_SIZE,
+        context->getReadSettings(),
         configuration.header_entries);
 
     return createWrappedBuffer(std::move(in_ptr));
diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp
index dfb64cac0ef..f2b27c2c876 100644
--- a/src/Disks/DiskRestartProxy.cpp
+++ b/src/Disks/DiskRestartProxy.cpp
@@ -20,6 +20,10 @@ public:
     RestartAwareReadBuffer(const DiskRestartProxy & disk, std::unique_ptr<ReadBufferFromFileBase> impl_)
         : ReadBufferFromFileDecorator(std::move(impl_)), lock(disk.mutex) { }
 
+    void prefetch() override { impl->prefetch(); }
+
+    void setReadUntilPosition(size_t position) override { impl->setReadUntilPosition(position); }
+
 private:
     ReadLock lock;
 };
diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp
index 320b4d179f3..55ea91c40c9 100644
--- a/src/Disks/DiskWebServer.cpp
+++ b/src/Disks/DiskWebServer.cpp
@@ -3,15 +3,18 @@
 #include <base/logger_useful.h>
 #include <Common/escapeForFileName.h>
 
-#include <Disks/IDiskRemote.h>
-#include <Disks/ReadIndirectBufferFromRemoteFS.h>
-#include <Disks/ReadIndirectBufferFromWebServer.h>
-
 #include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/SeekAvoidingReadBuffer.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 
+#include <Disks/IDiskRemote.h>
+#include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h>
+#include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
+#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
+#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
+#include <Disks/IO/ThreadPoolRemoteFSReader.h>
+
 #include <Storages/MergeTree/MergeTreeData.h>
 
 #include <Poco/Exception.h>
@@ -105,39 +108,6 @@ private:
 };
 
 
-class ReadBufferFromWebServer final : public ReadIndirectBufferFromRemoteFS<ReadIndirectBufferFromWebServer>
-{
-public:
-    ReadBufferFromWebServer(
-            const String & uri_,
-            RemoteMetadata metadata_,
-            ContextPtr context_,
-            size_t buf_size_,
-            size_t backoff_threshold_,
-            size_t max_tries_)
-        : ReadIndirectBufferFromRemoteFS<ReadIndirectBufferFromWebServer>(metadata_)
-        , uri(uri_)
-        , context(context_)
-        , buf_size(buf_size_)
-        , backoff_threshold(backoff_threshold_)
-        , max_tries(max_tries_)
-    {
-    }
-
-    std::unique_ptr<ReadIndirectBufferFromWebServer> createReadBuffer(const String & path) override
-    {
-        return std::make_unique<ReadIndirectBufferFromWebServer>(fs::path(uri) / path, context, buf_size, backoff_threshold, max_tries);
-    }
-
-private:
-    String uri;
-    ContextPtr context;
-    size_t buf_size;
-    size_t backoff_threshold;
-    size_t max_tries;
-};
-
-
 DiskWebServer::DiskWebServer(
             const String & disk_name_,
             const String & url_,
@@ -196,9 +166,20 @@ std::unique_ptr<ReadBufferFromFileBase> DiskWebServer::readFile(const String & p
     RemoteMetadata meta(path, remote_path);
     meta.remote_fs_objects.emplace_back(std::make_pair(remote_path, iter->second.size));
 
-    auto reader = std::make_unique<ReadBufferFromWebServer>(url, meta, getContext(),
-        read_settings.remote_fs_buffer_size, read_settings.remote_fs_backoff_threshold, read_settings.remote_fs_backoff_max_tries);
-    return std::make_unique<SeekAvoidingReadBuffer>(std::move(reader), min_bytes_for_seek);
+    bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::read_threadpool;
+
+    auto web_impl = std::make_unique<ReadBufferFromWebServerGather>(path, url, meta, getContext(), threadpool_read, read_settings);
+
+    if (threadpool_read)
+    {
+        auto reader = IDiskRemote::getThreadPoolReader();
+        return std::make_unique<AsynchronousReadIndirectBufferFromRemoteFS>(reader, read_settings, std::move(web_impl), min_bytes_for_seek);
+    }
+    else
+    {
+        auto buf = std::make_unique<ReadIndirectBufferFromRemoteFS>(std::move(web_impl));
+        return std::make_unique<SeekAvoidingReadBuffer>(std::move(buf), min_bytes_for_seek);
+    }
 }
 
 
diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp
index 3e137056377..9071ce1d139 100644
--- a/src/Disks/HDFS/DiskHDFS.cpp
+++ b/src/Disks/HDFS/DiskHDFS.cpp
@@ -1,10 +1,14 @@
 #include <Disks/HDFS/DiskHDFS.h>
 
-#include <Storages/HDFS/ReadBufferFromHDFS.h>
-#include <Storages/HDFS/WriteBufferFromHDFS.h>
 #include <IO/SeekAvoidingReadBuffer.h>
-#include <Disks/ReadIndirectBufferFromRemoteFS.h>
-#include <Disks/WriteIndirectBufferFromRemoteFS.h>
+#include <Storages/HDFS/WriteBufferFromHDFS.h>
+
+#include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h>
+#include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
+#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
+#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
+#include <Disks/IO/ThreadPoolRemoteFSReader.h>
+
 #include <base/logger_useful.h>
 #include <base/FnTraits.h>
 
@@ -48,37 +52,6 @@ private:
 };
 
 
-/// Reads data from HDFS using stored paths in metadata.
-class ReadIndirectBufferFromHDFS final : public ReadIndirectBufferFromRemoteFS<ReadBufferFromHDFS>
-{
-public:
-    ReadIndirectBufferFromHDFS(
-            const Poco::Util::AbstractConfiguration & config_,
-            const String & hdfs_uri_,
-            DiskHDFS::Metadata metadata_,
-            size_t buf_size_)
-        : ReadIndirectBufferFromRemoteFS<ReadBufferFromHDFS>(metadata_)
-        , config(config_)
-        , buf_size(buf_size_)
-    {
-        const size_t begin_of_path = hdfs_uri_.find('/', hdfs_uri_.find("//") + 2);
-        hdfs_directory = hdfs_uri_.substr(begin_of_path);
-        hdfs_uri = hdfs_uri_.substr(0, begin_of_path);
-    }
-
-    std::unique_ptr<ReadBufferFromHDFS> createReadBuffer(const String & path) override
-    {
-        return std::make_unique<ReadBufferFromHDFS>(hdfs_uri, hdfs_directory + path, config, buf_size);
-    }
-
-private:
-    const Poco::Util::AbstractConfiguration & config;
-    String hdfs_uri;
-    String hdfs_directory;
-    size_t buf_size;
-};
-
-
 DiskHDFS::DiskHDFS(
     const String & disk_name_,
     const String & hdfs_root_path_,
@@ -102,8 +75,18 @@ std::unique_ptr<ReadBufferFromFileBase> DiskHDFS::readFile(const String & path,
         "Read from file by path: {}. Existing HDFS objects: {}",
         backQuote(metadata_path + path), metadata.remote_fs_objects.size());
 
-    auto reader = std::make_unique<ReadIndirectBufferFromHDFS>(config, remote_fs_root_path, metadata, read_settings.remote_fs_buffer_size);
-    return std::make_unique<SeekAvoidingReadBuffer>(std::move(reader), settings->min_bytes_for_seek);
+    auto hdfs_impl = std::make_unique<ReadBufferFromHDFSGather>(path, config, remote_fs_root_path, metadata, read_settings.remote_fs_buffer_size);
+
+    if (read_settings.remote_fs_method == RemoteFSReadMethod::read_threadpool)
+    {
+        auto reader = getThreadPoolReader();
+        return std::make_unique<AsynchronousReadIndirectBufferFromRemoteFS>(reader, read_settings, std::move(hdfs_impl));
+    }
+    else
+    {
+        auto buf = std::make_unique<ReadIndirectBufferFromRemoteFS>(std::move(hdfs_impl));
+        return std::make_unique<SeekAvoidingReadBuffer>(std::move(buf), settings->min_bytes_for_seek);
+    }
 }
 
 
diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp
index 1a6e4eb73a1..cf1baafce6c 100644
--- a/src/Disks/IDiskRemote.cpp
+++ b/src/Disks/IDiskRemote.cpp
@@ -12,6 +12,7 @@
 #include <Common/checkStackSize.h>
 #include <boost/algorithm/string.hpp>
 #include <Common/filesystemHelpers.h>
+#include <Disks/IO/ThreadPoolRemoteFSReader.h>
 
 
 namespace DB
@@ -496,4 +497,13 @@ String IDiskRemote::getUniqueId(const String & path) const
     return id;
 }
 
+
+AsynchronousReaderPtr IDiskRemote::getThreadPoolReader()
+{
+    constexpr size_t pool_size = 50;
+    constexpr size_t queue_size = 1000000;
+    static AsynchronousReaderPtr reader = std::make_shared<ThreadPoolRemoteFSReader>(pool_size, queue_size);
+    return reader;
+}
+
 }
diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h
index 7453fce747e..50c8d73c048 100644
--- a/src/Disks/IDiskRemote.h
+++ b/src/Disks/IDiskRemote.h
@@ -3,8 +3,8 @@
 #include <Common/config.h>
 
 #include <atomic>
-#include "Disks/DiskFactory.h"
-#include "Disks/Executor.h"
+#include <Disks/DiskFactory.h>
+#include <Disks/Executor.h>
 #include <utility>
 #include <Common/MultiVersion.h>
 #include <Common/ThreadPool.h>
@@ -33,6 +33,10 @@ protected:
 using RemoteFSPathKeeperPtr = std::shared_ptr<RemoteFSPathKeeper>;
 
 
+class IAsynchronousReader;
+using AsynchronousReaderPtr = std::shared_ptr<IAsynchronousReader>;
+
+
 /// Base Disk class for remote FS's, which are not posix-compatible (DiskS3 and DiskHDFS)
 class IDiskRemote : public IDisk
 {
@@ -125,6 +129,8 @@ public:
 
     virtual RemoteFSPathKeeperPtr createFSPathKeeper() const = 0;
 
+    static AsynchronousReaderPtr getThreadPoolReader();
+
 protected:
     Poco::Logger * log;
     const String name;
diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
new file mode 100644
index 00000000000..c283e0ea159
--- /dev/null
+++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
@@ -0,0 +1,262 @@
+#include "AsynchronousReadIndirectBufferFromRemoteFS.h"
+
+#include <Common/Stopwatch.h>
+#include <Disks/IO/ThreadPoolRemoteFSReader.h>
+#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
+#include <IO/ReadSettings.h>
+#include <base/logger_useful.h>
+
+
+namespace CurrentMetrics
+{
+    extern const Metric AsynchronousReadWait;
+}
+
+namespace ProfileEvents
+{
+    extern const Event AsynchronousReadWaitMicroseconds;
+    extern const Event RemoteFSSeeks;
+    extern const Event RemoteFSPrefetches;
+    extern const Event RemoteFSCancelledPrefetches;
+    extern const Event RemoteFSUnusedPrefetches;
+    extern const Event RemoteFSPrefetchedReads;
+    extern const Event RemoteFSUnprefetchedReads;
+    extern const Event RemoteFSBuffers;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int CANNOT_SEEK_THROUGH_FILE;
+}
+
+
+AsynchronousReadIndirectBufferFromRemoteFS::AsynchronousReadIndirectBufferFromRemoteFS(
+        AsynchronousReaderPtr reader_,
+        const ReadSettings & settings_,
+        std::shared_ptr<ReadBufferFromRemoteFSGather> impl_,
+        size_t min_bytes_for_seek_)
+    : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0)
+    , reader(reader_)
+    , priority(settings_.priority)
+    , impl(impl_)
+    , prefetch_buffer(settings_.remote_fs_buffer_size)
+    , min_bytes_for_seek(min_bytes_for_seek_)
+    , must_read_until_position(settings_.must_read_until_position)
+{
+    ProfileEvents::increment(ProfileEvents::RemoteFSBuffers);
+}
+
+
+String AsynchronousReadIndirectBufferFromRemoteFS::getFileName() const
+{
+    return impl->getFileName();
+}
+
+
+bool AsynchronousReadIndirectBufferFromRemoteFS::hasPendingDataToRead()
+{
+    /// Position is set only for MergeTree tables.
+    if (read_until_position)
+    {
+        /// Everything is already read.
+        if (file_offset_of_buffer_end == read_until_position)
+            return false;
+
+        if (file_offset_of_buffer_end > read_until_position)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Read beyond last offset ({} > {})",
+                            file_offset_of_buffer_end, read_until_position);
+    }
+    else if (must_read_until_position)
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+                        "Reading for MergeTree family tables must be done with last position boundary");
+
+    return true;
+}
+
+
+std::future<IAsynchronousReader::Result> AsynchronousReadIndirectBufferFromRemoteFS::readInto(char * data, size_t size)
+{
+    IAsynchronousReader::Request request;
+    request.descriptor = std::make_shared<ThreadPoolRemoteFSReader::RemoteFSFileDescriptor>(impl);
+    request.buf = data;
+    request.size = size;
+    request.offset = file_offset_of_buffer_end;
+    request.priority = priority;
+
+    if (bytes_to_ignore)
+    {
+        request.ignore = bytes_to_ignore;
+        bytes_to_ignore = 0;
+    }
+    return reader->submit(request);
+}
+
+
+void AsynchronousReadIndirectBufferFromRemoteFS::prefetch()
+{
+    if (prefetch_future.valid())
+        return;
+
+    /// Check boundary, which was set in readUntilPosition().
+    if (!hasPendingDataToRead())
+        return;
+
+    /// Prefetch even in case hasPendingData() == true.
+    prefetch_future = readInto(prefetch_buffer.data(), prefetch_buffer.size());
+    ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches);
+}
+
+
+void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t position)
+{
+    if (prefetch_future.valid())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Prefetch is valid in readUntilPosition");
+
+    read_until_position = position;
+    impl->setReadUntilPosition(read_until_position);
+}
+
+
+void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilEnd()
+{
+    if (prefetch_future.valid())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Prefetch is valid in readUntilEnd");
+
+    read_until_position = impl->getFileSize();
+    impl->setReadUntilPosition(read_until_position);
+}
+
+
+bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl()
+{
+    if (!hasPendingDataToRead())
+        return false;
+
+    size_t size = 0;
+
+    if (prefetch_future.valid())
+    {
+        ProfileEvents::increment(ProfileEvents::RemoteFSPrefetchedReads);
+
+        CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait};
+        Stopwatch watch;
+        {
+            size = prefetch_future.get();
+            if (size)
+            {
+                memory.swap(prefetch_buffer);
+                set(memory.data(), memory.size());
+                working_buffer.resize(size);
+                file_offset_of_buffer_end += size;
+            }
+        }
+
+        watch.stop();
+        ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds());
+    }
+    else
+    {
+        ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedReads);
+        size = readInto(memory.data(), memory.size()).get();
+
+        if (size)
+        {
+            set(memory.data(), memory.size());
+            working_buffer.resize(size);
+            file_offset_of_buffer_end += size;
+        }
+    }
+
+    prefetch_future = {};
+    return size;
+}
+
+
+off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence)
+{
+    ProfileEvents::increment(ProfileEvents::RemoteFSSeeks);
+
+    if (whence == SEEK_CUR)
+    {
+        /// If position within current working buffer - shift pos.
+        if (!working_buffer.empty() && static_cast<size_t>(getPosition() + offset_) < file_offset_of_buffer_end)
+        {
+            pos += offset_;
+            return getPosition();
+        }
+        else
+        {
+            file_offset_of_buffer_end += offset_;
+        }
+    }
+    else if (whence == SEEK_SET)
+    {
+        /// If position is within current working buffer - shift pos.
+        if (!working_buffer.empty()
+            && static_cast<size_t>(offset_) >= file_offset_of_buffer_end - working_buffer.size()
+            && size_t(offset_) < file_offset_of_buffer_end)
+        {
+            pos = working_buffer.end() - (file_offset_of_buffer_end - offset_);
+
+            assert(pos >= working_buffer.begin());
+            assert(pos <= working_buffer.end());
+
+            return getPosition();
+        }
+        else
+        {
+            file_offset_of_buffer_end = offset_;
+        }
+    }
+    else
+        throw Exception("Only SEEK_SET or SEEK_CUR modes are allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+
+    if (prefetch_future.valid())
+    {
+        ProfileEvents::increment(ProfileEvents::RemoteFSCancelledPrefetches);
+        prefetch_future.wait();
+        prefetch_future = {};
+    }
+
+    pos = working_buffer.end();
+
+    /// Note: we read in range [file_offset_of_buffer_end, read_until_position).
+    if (file_offset_of_buffer_end < read_until_position
+        && static_cast<off_t>(file_offset_of_buffer_end) >= getPosition()
+        && static_cast<off_t>(file_offset_of_buffer_end) < getPosition() + static_cast<off_t>(min_bytes_for_seek))
+    {
+       /**
+        * Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer.
+        */
+        bytes_to_ignore = file_offset_of_buffer_end - getPosition();
+    }
+    else
+    {
+        impl->reset();
+    }
+
+    return file_offset_of_buffer_end;
+}
+
+
+void AsynchronousReadIndirectBufferFromRemoteFS::finalize()
+{
+    if (prefetch_future.valid())
+    {
+        ProfileEvents::increment(ProfileEvents::RemoteFSUnusedPrefetches);
+        prefetch_future.wait();
+        prefetch_future = {};
+    }
+}
+
+
+AsynchronousReadIndirectBufferFromRemoteFS::~AsynchronousReadIndirectBufferFromRemoteFS()
+{
+    finalize();
+}
+
+}
diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h
new file mode 100644
index 00000000000..d8fad08bc8a
--- /dev/null
+++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#if !defined(ARCADIA_BUILD)
+#include <Common/config.h>
+#endif
+
+#include <IO/ReadBufferFromFile.h>
+#include <IO/AsynchronousReader.h>
+#include <utility>
+
+
+namespace DB
+{
+
+class ReadBufferFromRemoteFSGather;
+struct ReadSettings;
+
+/**
+ * Reads data from S3/HDFS/Web using stored paths in metadata.
+* This class is an asynchronous version of ReadIndirectBufferFromRemoteFS.
+*
+* Buffers chain for diskS3:
+* AsynchronousIndirectReadBufferFromRemoteFS -> ReadBufferFromRemoteFS ->
+* -> ReadBufferFromS3 -> ReadBufferFromIStream.
+*
+* Buffers chain for diskWeb:
+* AsynchronousIndirectReadBufferFromRemoteFS -> ReadBufferFromRemoteFS ->
+* -> ReadIndirectBufferFromWebServer -> ReadBufferFromHttp -> ReadBufferFromIStream.
+*
+* We pass either `memory` or `prefetch_buffer` through all this chain and return it back.
+*/
+class AsynchronousReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase
+{
+public:
+    explicit AsynchronousReadIndirectBufferFromRemoteFS(
+        AsynchronousReaderPtr reader_, const ReadSettings & settings_,
+        std::shared_ptr<ReadBufferFromRemoteFSGather> impl_,
+        size_t min_bytes_for_seek = 1024 * 1024);
+
+    ~AsynchronousReadIndirectBufferFromRemoteFS() override;
+
+    off_t seek(off_t offset_, int whence) override;
+
+    off_t getPosition() override { return file_offset_of_buffer_end - available(); }
+
+    String getFileName() const override;
+
+    void prefetch() override;
+
+    void setReadUntilPosition(size_t position) override;
+
+    void setReadUntilEnd() override;
+
+private:
+    bool nextImpl() override;
+
+    void finalize();
+
+    bool hasPendingDataToRead();
+
+    std::future<IAsynchronousReader::Result> readInto(char * data, size_t size);
+
+    AsynchronousReaderPtr reader;
+
+    Int32 priority;
+
+    std::shared_ptr<ReadBufferFromRemoteFSGather> impl;
+
+    std::future<IAsynchronousReader::Result> prefetch_future;
+
+    size_t file_offset_of_buffer_end = 0;
+
+    Memory<> prefetch_buffer;
+
+    size_t min_bytes_for_seek;
+
+    size_t bytes_to_ignore = 0;
+
+    size_t read_until_position = 0;
+
+    bool must_read_until_position;
+};
+
+}
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
new file mode 100644
index 00000000000..a9a94fa63e2
--- /dev/null
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -0,0 +1,182 @@
+#include "ReadBufferFromRemoteFSGather.h"
+
+#include <Disks/IDiskRemote.h>
+#include <IO/SeekableReadBuffer.h>
+#include <Disks/IO/ReadBufferFromWebServer.h>
+
+#if USE_AWS_S3
+#include <IO/ReadBufferFromS3.h>
+#endif
+
+#if USE_HDFS
+#include <Storages/HDFS/ReadBufferFromHDFS.h>
+#endif
+
+#include <base/logger_useful.h>
+#include <filesystem>
+#include <iostream>
+
+namespace fs = std::filesystem;
+
+namespace DB
+{
+
+#if USE_AWS_S3
+SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const String & path, size_t read_until_position_) const
+{
+    return std::make_unique<ReadBufferFromS3>(client_ptr, bucket,
+        fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries, settings, threadpool_read, read_until_position_);
+}
+#endif
+
+
+SeekableReadBufferPtr ReadBufferFromWebServerGather::createImplementationBuffer(const String & path, size_t read_until_position_) const
+{
+    return std::make_unique<ReadBufferFromWebServer>(fs::path(uri) / path, context, settings, threadpool_read, read_until_position_);
+}
+
+
+#if USE_HDFS
+SeekableReadBufferPtr ReadBufferFromHDFSGather::createImplementationBuffer(const String & path, size_t read_until_position_) const
+{
+    return std::make_unique<ReadBufferFromHDFS>(hdfs_uri, fs::path(hdfs_directory) / path, config, buf_size, read_until_position_);
+}
+#endif
+
+
+ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(const RemoteMetadata & metadata_, const String & path_)
+    : ReadBuffer(nullptr, 0)
+    , metadata(metadata_)
+    , canonical_path(path_)
+{
+}
+
+
+size_t ReadBufferFromRemoteFSGather::readInto(char * data, size_t size, size_t offset, size_t ignore)
+{
+    /**
+     * Set `data` to current working and internal buffers.
+     * Internal buffer with size `size`. Working buffer with size 0.
+     */
+    set(data, size);
+
+    absolute_position = offset;
+    bytes_to_ignore = ignore;
+
+    auto result = nextImpl();
+    bytes_to_ignore = 0;
+
+    if (result)
+        return working_buffer.size();
+
+    return 0;
+}
+
+
+void ReadBufferFromRemoteFSGather::initialize()
+{
+    /// One clickhouse file can be split into multiple files in remote fs.
+    auto current_buf_offset = absolute_position;
+    for (size_t i = 0; i < metadata.remote_fs_objects.size(); ++i)
+    {
+        const auto & [file_path, size] = metadata.remote_fs_objects[i];
+
+        if (size > current_buf_offset)
+        {
+            /// Do not create a new buffer if we already have what we need.
+            if (!current_buf || current_buf_idx != i)
+            {
+                current_buf = createImplementationBuffer(file_path, read_until_position);
+                current_buf_idx = i;
+            }
+
+            current_buf->seek(current_buf_offset, SEEK_SET);
+            return;
+        }
+
+        current_buf_offset -= size;
+    }
+    current_buf_idx = metadata.remote_fs_objects.size();
+    current_buf = nullptr;
+}
+
+
+bool ReadBufferFromRemoteFSGather::nextImpl()
+{
+    /// Find first available buffer that fits to given offset.
+    if (!current_buf)
+        initialize();
+
+    /// If current buffer has remaining data - use it.
+    if (current_buf)
+    {
+        if (readImpl())
+            return true;
+    }
+    else
+        return false;
+
+    /// If there is no available buffers - nothing to read.
+    if (current_buf_idx + 1 >= metadata.remote_fs_objects.size())
+        return false;
+
+    ++current_buf_idx;
+
+    const auto & current_path = metadata.remote_fs_objects[current_buf_idx].first;
+    current_buf = createImplementationBuffer(current_path, read_until_position);
+
+    return readImpl();
+}
+
+
+bool ReadBufferFromRemoteFSGather::readImpl()
+{
+    swap(*current_buf);
+
+    /**
+     * Lazy seek is performed here.
+     * In asynchronous buffer when seeking to offset in range [pos, pos + min_bytes_for_seek]
+     * we save how many bytes need to be ignored (new_offset - position() bytes).
+     */
+    if (bytes_to_ignore)
+        current_buf->ignore(bytes_to_ignore);
+
+    auto result = current_buf->next();
+
+    swap(*current_buf);
+
+    if (result)
+        absolute_position += working_buffer.size();
+
+    return result;
+}
+
+
+void ReadBufferFromRemoteFSGather::setReadUntilPosition(size_t position)
+{
+    read_until_position = position;
+    reset();
+}
+
+
+void ReadBufferFromRemoteFSGather::reset()
+{
+    current_buf.reset();
+}
+
+
+String ReadBufferFromRemoteFSGather::getFileName() const
+{
+    return canonical_path;
+}
+
+
+size_t ReadBufferFromRemoteFSGather::getFileSize() const
+{
+    size_t size = 0;
+    for (const auto & object : metadata.remote_fs_objects)
+        size += object.second;
+    return size;
+}
+
+}
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h
new file mode 100644
index 00000000000..5bc7d4e4819
--- /dev/null
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h
@@ -0,0 +1,161 @@
+#pragma once
+
+#if !defined(ARCADIA_BUILD)
+#include <Common/config.h>
+#endif
+
+#include <Disks/IDiskRemote.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadSettings.h>
+
+namespace Aws
+{
+namespace S3
+{
+class S3Client;
+}
+}
+
+namespace DB
+{
+
+/**
+ * Remote disk might need to split one clickhouse file into multiple files in remote fs.
+ * This class works like a proxy to allow transition from one file into multiple.
+ */
+class ReadBufferFromRemoteFSGather : public ReadBuffer
+{
+friend class ReadIndirectBufferFromRemoteFS;
+
+public:
+    explicit ReadBufferFromRemoteFSGather(const RemoteMetadata & metadata_, const String & path_);
+
+    String getFileName() const;
+
+    void reset();
+
+    void setReadUntilPosition(size_t position) override;
+
+    size_t readInto(char * data, size_t size, size_t offset, size_t ignore = 0);
+
+    size_t getFileSize() const;
+
+protected:
+    virtual SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const = 0;
+
+    RemoteMetadata metadata;
+
+private:
+    bool nextImpl() override;
+
+    void initialize();
+
+    bool readImpl();
+
+    SeekableReadBufferPtr current_buf;
+
+    size_t current_buf_idx = 0;
+
+    size_t absolute_position = 0;
+
+    size_t bytes_to_ignore = 0;
+
+    size_t read_until_position = 0;
+
+    String canonical_path;
+};
+
+
+#if USE_AWS_S3
+/// Reads data from S3 using stored paths in metadata.
+class ReadBufferFromS3Gather final : public ReadBufferFromRemoteFSGather
+{
+public:
+    ReadBufferFromS3Gather(
+        const String & path_,
+        std::shared_ptr<Aws::S3::S3Client> client_ptr_,
+        const String & bucket_,
+        IDiskRemote::Metadata metadata_,
+        size_t max_single_read_retries_,
+        const ReadSettings & settings_,
+        bool threadpool_read_ = false)
+        : ReadBufferFromRemoteFSGather(metadata_, path_)
+        , client_ptr(std::move(client_ptr_))
+        , bucket(bucket_)
+        , max_single_read_retries(max_single_read_retries_)
+        , settings(settings_)
+        , threadpool_read(threadpool_read_)
+    {
+    }
+
+    SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override;
+
+private:
+    std::shared_ptr<Aws::S3::S3Client> client_ptr;
+    String bucket;
+    UInt64 max_single_read_retries;
+    ReadSettings settings;
+    bool threadpool_read;
+};
+#endif
+
+
+class ReadBufferFromWebServerGather final : public ReadBufferFromRemoteFSGather
+{
+public:
+    ReadBufferFromWebServerGather(
+            const String & path_,
+            const String & uri_,
+            RemoteMetadata metadata_,
+            ContextPtr context_,
+            size_t threadpool_read_,
+            const ReadSettings & settings_)
+        : ReadBufferFromRemoteFSGather(metadata_, path_)
+        , uri(uri_)
+        , context(context_)
+        , threadpool_read(threadpool_read_)
+        , settings(settings_)
+    {
+    }
+
+    SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override;
+
+private:
+    String uri;
+    ContextPtr context;
+    bool threadpool_read;
+    ReadSettings settings;
+};
+
+
+#if USE_HDFS
+/// Reads data from HDFS using stored paths in metadata.
+class ReadBufferFromHDFSGather final : public ReadBufferFromRemoteFSGather
+{
+public:
+    ReadBufferFromHDFSGather(
+            const String & path_,
+            const Poco::Util::AbstractConfiguration & config_,
+            const String & hdfs_uri_,
+            IDiskRemote::Metadata metadata_,
+            size_t buf_size_)
+        : ReadBufferFromRemoteFSGather(metadata_, path_)
+        , config(config_)
+        , buf_size(buf_size_)
+    {
+        const size_t begin_of_path = hdfs_uri_.find('/', hdfs_uri_.find("//") + 2);
+        hdfs_directory = hdfs_uri_.substr(begin_of_path);
+        hdfs_uri = hdfs_uri_.substr(0, begin_of_path);
+    }
+
+    SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override;
+
+private:
+    const Poco::Util::AbstractConfiguration & config;
+    String hdfs_uri;
+    String hdfs_directory;
+    size_t buf_size;
+};
+#endif
+
+}
diff --git a/src/Disks/IO/ReadBufferFromWebServer.cpp b/src/Disks/IO/ReadBufferFromWebServer.cpp
new file mode 100644
index 00000000000..bda20f78e79
--- /dev/null
+++ b/src/Disks/IO/ReadBufferFromWebServer.cpp
@@ -0,0 +1,198 @@
+#include "ReadBufferFromWebServer.h"
+
+#include <base/logger_useful.h>
+#include <base/sleep.h>
+#include <Core/Types.h>
+#include <IO/ReadWriteBufferFromHTTP.h>
+#include <IO/ConnectionTimeoutsContext.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+#include <thread>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_SEEK_THROUGH_FILE;
+    extern const int SEEK_POSITION_OUT_OF_BOUND;
+    extern const int LOGICAL_ERROR;
+}
+
+
+static constexpr size_t HTTP_MAX_TRIES = 10;
+static constexpr size_t WAIT_INIT = 100;
+
+ReadBufferFromWebServer::ReadBufferFromWebServer(
+    const String & url_,
+    ContextPtr context_,
+    const ReadSettings & settings_,
+    bool use_external_buffer_,
+    size_t last_offset_)
+    : SeekableReadBuffer(nullptr, 0)
+    , log(&Poco::Logger::get("ReadBufferFromWebServer"))
+    , context(context_)
+    , url(url_)
+    , buf_size(settings_.remote_fs_buffer_size)
+    , read_settings(settings_)
+    , use_external_buffer(use_external_buffer_)
+    , last_offset(last_offset_)
+{
+}
+
+
+std::unique_ptr<ReadBuffer> ReadBufferFromWebServer::initialize()
+{
+    Poco::URI uri(url);
+
+    ReadWriteBufferFromHTTP::HTTPHeaderEntries headers;
+
+    if (last_offset)
+    {
+        if (last_offset < offset)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, last_offset - 1);
+
+        headers.emplace_back(std::make_pair("Range", fmt::format("bytes={}-{}", offset, last_offset - 1)));
+        LOG_DEBUG(log, "Reading with range: {}-{}", offset, last_offset);
+    }
+    else
+    {
+        headers.emplace_back(std::make_pair("Range", fmt::format("bytes={}-", offset)));
+        LOG_DEBUG(log, "Reading from offset: {}", offset);
+    }
+
+    const auto & settings = context->getSettingsRef();
+    const auto & config = context->getConfigRef();
+    Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", 20), 0};
+
+    return std::make_unique<ReadWriteBufferFromHTTP>(
+        uri,
+        Poco::Net::HTTPRequest::HTTP_GET,
+        ReadWriteBufferFromHTTP::OutStreamCallback(),
+        ConnectionTimeouts(std::max(Poco::Timespan(settings.http_connection_timeout.totalSeconds(), 0), Poco::Timespan(20, 0)),
+                           settings.http_send_timeout,
+                           std::max(Poco::Timespan(settings.http_receive_timeout.totalSeconds(), 0), Poco::Timespan(20, 0)),
+                           settings.tcp_keep_alive_timeout,
+                           http_keep_alive_timeout),
+        0,
+        Poco::Net::HTTPBasicCredentials{},
+        buf_size,
+        read_settings,
+        headers,
+        context->getRemoteHostFilter(),
+        use_external_buffer);
+}
+
+
+void ReadBufferFromWebServer::initializeWithRetry()
+{
+    /// Initialize impl with retry.
+    size_t milliseconds_to_wait = WAIT_INIT;
+    for (size_t i = 0; i < HTTP_MAX_TRIES; ++i)
+    {
+        try
+        {
+            impl = initialize();
+
+            if (use_external_buffer)
+            {
+                /**
+                 * See comment 30 lines lower.
+                 */
+                impl->set(internal_buffer.begin(), internal_buffer.size());
+                assert(working_buffer.begin() != nullptr);
+                assert(!internal_buffer.empty());
+            }
+
+            break;
+        }
+        catch (Poco::Exception & e)
+        {
+            if (i == HTTP_MAX_TRIES - 1)
+                throw;
+
+            LOG_ERROR(&Poco::Logger::get("ReadBufferFromWeb"), "Error: {}, code: {}", e.what(), e.code());
+            sleepForMilliseconds(milliseconds_to_wait);
+            milliseconds_to_wait *= 2;
+        }
+    }
+}
+
+
+bool ReadBufferFromWebServer::nextImpl()
+{
+    if (last_offset)
+    {
+        if (last_offset == offset)
+            return false;
+
+        if (last_offset < offset)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, last_offset - 1);
+    }
+
+    if (impl)
+    {
+        if (use_external_buffer)
+        {
+            /**
+            * use_external_buffer -- means we read into the buffer which
+            * was passed to us from somewhere else. We do not check whether
+            * previously returned buffer was read or not, because this branch
+            * means we are prefetching data, each nextImpl() call we can fill
+            * a different buffer.
+            */
+            impl->set(internal_buffer.begin(), internal_buffer.size());
+            assert(working_buffer.begin() != nullptr);
+            assert(!internal_buffer.empty());
+        }
+        else
+        {
+            /**
+            * impl was initialized before, pass position() to it to make
+            * sure there is no pending data which was not read, because
+            * this branch means we read sequentially.
+            */
+            impl->position() = position();
+            assert(!impl->hasPendingData());
+        }
+    }
+    else
+    {
+        initializeWithRetry();
+    }
+
+    auto result = impl->next();
+    if (result)
+    {
+        BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset());
+        offset += working_buffer.size();
+    }
+
+    return result;
+}
+
+
+off_t ReadBufferFromWebServer::seek(off_t offset_, int whence)
+{
+    if (impl)
+        throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Seek is allowed only before first read attempt from the buffer");
+
+    if (whence != SEEK_SET)
+        throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed");
+
+    if (offset_ < 0)
+        throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", std::to_string(offset_));
+
+    offset = offset_;
+
+    return offset;
+}
+
+
+off_t ReadBufferFromWebServer::getPosition()
+{
+    return offset - available();
+}
+
+}
diff --git a/src/Disks/ReadIndirectBufferFromWebServer.h b/src/Disks/IO/ReadBufferFromWebServer.h
similarity index 56%
rename from src/Disks/ReadIndirectBufferFromWebServer.h
rename to src/Disks/IO/ReadBufferFromWebServer.h
index 04bb155f83b..1ffb8589392 100644
--- a/src/Disks/ReadIndirectBufferFromWebServer.h
+++ b/src/Disks/IO/ReadBufferFromWebServer.h
@@ -2,6 +2,7 @@
 
 #include <IO/SeekableReadBuffer.h>
 #include <IO/BufferWithOwnMemory.h>
+#include <IO/ReadSettings.h>
 #include <Interpreters/Context.h>
 
 
@@ -11,15 +12,16 @@ namespace DB
 /* Read buffer, which reads via http, but is used as ReadBufferFromFileBase.
  * Used to read files, hosted on a web server with static files.
  *
- * Usage: ReadIndirectBufferFromRemoteFS -> SeekAvoidingReadBuffer -> ReadIndirectBufferFromWebServer -> ReadWriteBufferFromHTTP.
+ * Usage: ReadIndirectBufferFromRemoteFS -> SeekAvoidingReadBuffer -> ReadBufferFromWebServer -> ReadWriteBufferFromHTTP.
  */
-class ReadIndirectBufferFromWebServer : public BufferWithOwnMemory<SeekableReadBuffer>
+class ReadBufferFromWebServer : public SeekableReadBuffer
 {
 public:
-    explicit ReadIndirectBufferFromWebServer(const String & url_,
-                                             ContextPtr context_,
-                                             size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE,
-                                             size_t backoff_threshold_ = 10000, size_t max_tries_ = 4);
+    explicit ReadBufferFromWebServer(
+        const String & url_, ContextPtr context_,
+        const ReadSettings & settings_ = {},
+        bool use_external_buffer_ = false,
+        size_t last_offset = 0);
 
     bool nextImpl() override;
 
@@ -30,6 +32,8 @@ public:
 private:
     std::unique_ptr<ReadBuffer> initialize();
 
+    void initializeWithRetry();
+
     Poco::Logger * log;
     ContextPtr context;
 
@@ -40,8 +44,11 @@ private:
 
     off_t offset = 0;
 
-    size_t backoff_threshold_ms;
-    size_t max_tries;
+    ReadSettings read_settings;
+
+    bool use_external_buffer;
+
+    off_t last_offset = 0;
 };
 
 }
diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp
new file mode 100644
index 00000000000..112124d9fd7
--- /dev/null
+++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp
@@ -0,0 +1,85 @@
+#include "ReadIndirectBufferFromRemoteFS.h"
+
+#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_SEEK_THROUGH_FILE;
+}
+
+
+ReadIndirectBufferFromRemoteFS::ReadIndirectBufferFromRemoteFS(
+    std::shared_ptr<ReadBufferFromRemoteFSGather> impl_) : impl(std::move(impl_))
+{
+}
+
+
+off_t ReadIndirectBufferFromRemoteFS::getPosition()
+{
+    return impl->absolute_position - available();
+}
+
+
+String ReadIndirectBufferFromRemoteFS::getFileName() const
+{
+    return impl->getFileName();
+}
+
+
+off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence)
+{
+    if (whence == SEEK_CUR)
+    {
+        /// If position within current working buffer - shift pos.
+        if (!working_buffer.empty() && size_t(getPosition() + offset_) < impl->absolute_position)
+        {
+            pos += offset_;
+            return getPosition();
+        }
+        else
+        {
+            impl->absolute_position += offset_;
+        }
+    }
+    else if (whence == SEEK_SET)
+    {
+        /// If position within current working buffer - shift pos.
+        if (!working_buffer.empty()
+            && size_t(offset_) >= impl->absolute_position - working_buffer.size()
+            && size_t(offset_) < impl->absolute_position)
+        {
+            pos = working_buffer.end() - (impl->absolute_position - offset_);
+            return getPosition();
+        }
+        else
+        {
+            impl->absolute_position = offset_;
+        }
+    }
+    else
+        throw Exception("Only SEEK_SET or SEEK_CUR modes are allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+
+    impl->reset();
+    pos = working_buffer.end();
+
+    return impl->absolute_position;
+}
+
+
+bool ReadIndirectBufferFromRemoteFS::nextImpl()
+{
+    /// Transfer current position and working_buffer to actual ReadBuffer
+    swap(*impl);
+    /// Position and working_buffer will be updated in next() call
+    auto result = impl->next();
+    /// and assigned to current buffer.
+    swap(*impl);
+
+    return result;
+}
+
+}
diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h
new file mode 100644
index 00000000000..0c8b1b4dd21
--- /dev/null
+++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <Common/config.h>
+#include <IO/ReadBufferFromFile.h>
+#include <Disks/IDiskRemote.h>
+#include <utility>
+
+
+namespace DB
+{
+
+class ReadBufferFromRemoteFSGather;
+
+/**
+* Reads data from S3/HDFS/Web using stored paths in metadata.
+* There is asynchronous version of this class -- AsynchronousReadIndirectBufferFromRemoteFS.
+*/
+class ReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase
+{
+
+public:
+    explicit ReadIndirectBufferFromRemoteFS(std::shared_ptr<ReadBufferFromRemoteFSGather> impl_);
+
+    off_t seek(off_t offset_, int whence) override;
+
+    off_t getPosition() override;
+
+    String getFileName() const override;
+
+private:
+    bool nextImpl() override;
+
+    std::shared_ptr<ReadBufferFromRemoteFSGather> impl;
+};
+
+}
diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
new file mode 100644
index 00000000000..945b2d3eb7e
--- /dev/null
+++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
@@ -0,0 +1,68 @@
+#include "ThreadPoolRemoteFSReader.h"
+
+#include <Common/Exception.h>
+#include <Common/ProfileEvents.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/Stopwatch.h>
+#include <Common/assert_cast.h>
+#include <Common/setThreadName.h>
+
+#include <IO/SeekableReadBuffer.h>
+#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
+
+#include <future>
+#include <iostream>
+
+
+namespace ProfileEvents
+{
+    extern const Event RemoteFSReadMicroseconds;
+    extern const Event RemoteFSReadBytes;
+}
+
+namespace CurrentMetrics
+{
+    extern const Metric Read;
+}
+
+namespace DB
+{
+
+size_t ThreadPoolRemoteFSReader::RemoteFSFileDescriptor::readInto(char * data, size_t size, size_t offset, size_t ignore)
+{
+    return reader->readInto(data, size, offset, ignore);
+}
+
+
+ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_)
+    : pool(pool_size, pool_size, queue_size_)
+{
+}
+
+
+std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Request request)
+{
+    auto task = std::make_shared<std::packaged_task<Result()>>([request]
+    {
+        setThreadName("ThreadPoolRemoteFSRead");
+        CurrentMetrics::Increment metric_increment{CurrentMetrics::Read};
+        auto * remote_fs_fd = assert_cast<RemoteFSFileDescriptor *>(request.descriptor.get());
+
+        Stopwatch watch(CLOCK_MONOTONIC);
+        auto bytes_read = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore);
+        watch.stop();
+
+        ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds());
+        ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, bytes_read);
+
+        return bytes_read;
+    });
+
+    auto future = task->get_future();
+
+    /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority".
+    pool.scheduleOrThrow([task]{ (*task)(); }, -request.priority);
+
+    return future;
+}
+}
diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h
new file mode 100644
index 00000000000..c300162e214
--- /dev/null
+++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <IO/AsynchronousReader.h>
+#include <IO/SeekableReadBuffer.h>
+#include <Common/ThreadPool.h>
+#include <Disks/IDiskRemote.h>
+
+
+namespace DB
+{
+class ReadBufferFromRemoteFSGather;
+
+class ThreadPoolRemoteFSReader : public IAsynchronousReader
+{
+
+private:
+    ThreadPool pool;
+
+public:
+    ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_);
+
+    std::future<Result> submit(Request request) override;
+
+    struct RemoteFSFileDescriptor;
+};
+
+
+struct ThreadPoolRemoteFSReader::RemoteFSFileDescriptor : public IFileDescriptor
+{
+public:
+    RemoteFSFileDescriptor(std::shared_ptr<ReadBufferFromRemoteFSGather> reader_) : reader(reader_) {}
+
+    size_t readInto(char * data, size_t size, size_t offset, size_t ignore = 0);
+
+private:
+    std::shared_ptr<ReadBufferFromRemoteFSGather> reader;
+};
+
+}
diff --git a/src/Disks/WriteIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp
similarity index 100%
rename from src/Disks/WriteIndirectBufferFromRemoteFS.cpp
rename to src/Disks/IO/WriteIndirectBufferFromRemoteFS.cpp
diff --git a/src/Disks/WriteIndirectBufferFromRemoteFS.h b/src/Disks/IO/WriteIndirectBufferFromRemoteFS.h
similarity index 100%
rename from src/Disks/WriteIndirectBufferFromRemoteFS.h
rename to src/Disks/IO/WriteIndirectBufferFromRemoteFS.h
diff --git a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp
deleted file mode 100644
index 3bc22167f50..00000000000
--- a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-#include "ReadIndirectBufferFromRemoteFS.h"
-
-#include <IO/ReadBufferFromS3.h>
-#include <Storages/HDFS/ReadBufferFromHDFS.h>
-#include <Disks/ReadIndirectBufferFromWebServer.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int CANNOT_SEEK_THROUGH_FILE;
-}
-
-
-template<typename T>
-ReadIndirectBufferFromRemoteFS<T>::ReadIndirectBufferFromRemoteFS(
-    RemoteMetadata metadata_)
-    : metadata(std::move(metadata_))
-{
-}
-
-
-template<typename T>
-off_t ReadIndirectBufferFromRemoteFS<T>::seek(off_t offset_, int whence)
-{
-    if (whence == SEEK_CUR)
-    {
-        /// If position within current working buffer - shift pos.
-        if (!working_buffer.empty() && size_t(getPosition() + offset_) < absolute_position)
-        {
-            pos += offset_;
-            return getPosition();
-        }
-        else
-        {
-            absolute_position += offset_;
-        }
-    }
-    else if (whence == SEEK_SET)
-    {
-        /// If position within current working buffer - shift pos.
-        if (!working_buffer.empty() && size_t(offset_) >= absolute_position - working_buffer.size()
-            && size_t(offset_) < absolute_position)
-        {
-            pos = working_buffer.end() - (absolute_position - offset_);
-            return getPosition();
-        }
-        else
-        {
-            absolute_position = offset_;
-        }
-    }
-    else
-        throw Exception("Only SEEK_SET or SEEK_CUR modes are allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
-
-    current_buf = initialize();
-    pos = working_buffer.end();
-
-    return absolute_position;
-}
-
-
-template<typename T>
-std::unique_ptr<T> ReadIndirectBufferFromRemoteFS<T>::initialize()
-{
-    size_t offset = absolute_position;
-    for (size_t i = 0; i < metadata.remote_fs_objects.size(); ++i)
-    {
-        current_buf_idx = i;
-        const auto & [file_path, size] = metadata.remote_fs_objects[i];
-        if (size > offset)
-        {
-            auto buf = createReadBuffer(file_path);
-            buf->seek(offset, SEEK_SET);
-            return buf;
-        }
-        offset -= size;
-    }
-    return nullptr;
-}
-
-
-template<typename T>
-bool ReadIndirectBufferFromRemoteFS<T>::nextImpl()
-{
-    /// Find first available buffer that fits to given offset.
-    if (!current_buf)
-        current_buf = initialize();
-
-    /// If current buffer has remaining data - use it.
-    if (current_buf)
-    {
-        bool result = nextAndShiftPosition();
-        if (result)
-            return true;
-    }
-
-    /// If there is no available buffers - nothing to read.
-    if (current_buf_idx + 1 >= metadata.remote_fs_objects.size())
-        return false;
-
-    ++current_buf_idx;
-    const auto & path = metadata.remote_fs_objects[current_buf_idx].first;
-
-    current_buf = createReadBuffer(path);
-
-    return nextAndShiftPosition();
-}
-
-template <typename T>
-bool ReadIndirectBufferFromRemoteFS<T>::nextAndShiftPosition()
-{
-    /// Transfer current position and working_buffer to actual ReadBuffer
-    swap(*current_buf);
-    /// Position and working_buffer will be updated in next() call
-    auto result = current_buf->next();
-    /// and assigned to current buffer.
-    swap(*current_buf);
-
-    /// absolute position is shifted by a data size that was read in next() call above.
-    if (result)
-        absolute_position += working_buffer.size();
-
-    return result;
-}
-
-
-#if USE_AWS_S3
-template
-class ReadIndirectBufferFromRemoteFS<ReadBufferFromS3>;
-#endif
-
-#if USE_HDFS
-template
-class ReadIndirectBufferFromRemoteFS<ReadBufferFromHDFS>;
-#endif
-
-template
-class ReadIndirectBufferFromRemoteFS<ReadIndirectBufferFromWebServer>;
-
-}
diff --git a/src/Disks/ReadIndirectBufferFromRemoteFS.h b/src/Disks/ReadIndirectBufferFromRemoteFS.h
deleted file mode 100644
index bf7f95c7987..00000000000
--- a/src/Disks/ReadIndirectBufferFromRemoteFS.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#pragma once
-
-#include <Common/config.h>
-
-#include <IO/ReadBufferFromFile.h>
-#include <Disks/IDiskRemote.h>
-#include <utility>
-
-
-namespace DB
-{
-
-/// Reads data from S3/HDFS using stored paths in metadata.
-template <typename T>
-class ReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase
-{
-public:
-    explicit ReadIndirectBufferFromRemoteFS(RemoteMetadata metadata_);
-
-    off_t seek(off_t offset_, int whence) override;
-
-    off_t getPosition() override { return absolute_position - available(); }
-
-    String getFileName() const override { return metadata.metadata_file_path; }
-
-    virtual std::unique_ptr<T> createReadBuffer(const String & path) = 0;
-
-protected:
-    RemoteMetadata metadata;
-
-private:
-    std::unique_ptr<T> initialize();
-
-    bool nextAndShiftPosition();
-
-    bool nextImpl() override;
-
-    size_t absolute_position = 0;
-
-    size_t current_buf_idx = 0;
-
-    std::unique_ptr<T> current_buf;
-};
-
-}
diff --git a/src/Disks/ReadIndirectBufferFromWebServer.cpp b/src/Disks/ReadIndirectBufferFromWebServer.cpp
deleted file mode 100644
index f4c01784542..00000000000
--- a/src/Disks/ReadIndirectBufferFromWebServer.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-#include "ReadIndirectBufferFromWebServer.h"
-
-#include <base/logger_useful.h>
-#include <base/sleep.h>
-#include <Core/Types.h>
-#include <IO/ReadWriteBufferFromHTTP.h>
-#include <IO/ConnectionTimeoutsContext.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/Operators.h>
-#include <thread>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int CANNOT_SEEK_THROUGH_FILE;
-    extern const int SEEK_POSITION_OUT_OF_BOUND;
-    extern const int NETWORK_ERROR;
-}
-
-static const auto WAIT_MS = 10;
-
-
-ReadIndirectBufferFromWebServer::ReadIndirectBufferFromWebServer(
-    const String & url_, ContextPtr context_, size_t buf_size_, size_t backoff_threshold_, size_t max_tries_)
-    : BufferWithOwnMemory<SeekableReadBuffer>(buf_size_)
-    , log(&Poco::Logger::get("ReadIndirectBufferFromWebServer"))
-    , context(context_)
-    , url(url_)
-    , buf_size(buf_size_)
-    , backoff_threshold_ms(backoff_threshold_)
-    , max_tries(max_tries_)
-{
-}
-
-
-std::unique_ptr<ReadBuffer> ReadIndirectBufferFromWebServer::initialize()
-{
-    Poco::URI uri(url);
-
-    ReadWriteBufferFromHTTP::HTTPHeaderEntries headers;
-    headers.emplace_back(std::make_pair("Range", fmt::format("bytes={}-", offset)));
-    const auto & settings = context->getSettingsRef();
-    LOG_DEBUG(log, "Reading from offset: {}", offset);
-    const auto & config = context->getConfigRef();
-    Poco::Timespan http_keep_alive_timeout{config.getUInt("keep_alive_timeout", 20), 0};
-
-    return std::make_unique<ReadWriteBufferFromHTTP>(
-        uri,
-        Poco::Net::HTTPRequest::HTTP_GET,
-        ReadWriteBufferFromHTTP::OutStreamCallback(),
-        ConnectionTimeouts(std::max(Poco::Timespan(settings.http_connection_timeout.totalSeconds(), 0), Poco::Timespan(20, 0)),
-                           settings.http_send_timeout,
-                           std::max(Poco::Timespan(settings.http_receive_timeout.totalSeconds(), 0), Poco::Timespan(20, 0)),
-                           settings.tcp_keep_alive_timeout,
-                           http_keep_alive_timeout),
-        0,
-        Poco::Net::HTTPBasicCredentials{},
-        buf_size,
-        headers);
-}
-
-
-bool ReadIndirectBufferFromWebServer::nextImpl()
-{
-    bool next_result = false, successful_read = false;
-    UInt16 milliseconds_to_wait = WAIT_MS;
-
-    if (impl)
-    {
-        /// Restore correct position at the needed offset.
-        impl->position() = position();
-        assert(!impl->hasPendingData());
-    }
-
-    WriteBufferFromOwnString error_msg;
-    for (size_t i = 0; (i < max_tries) && !successful_read && !next_result; ++i)
-    {
-        while (milliseconds_to_wait < backoff_threshold_ms)
-        {
-            try
-            {
-                if (!impl)
-                {
-                    impl = initialize();
-                    next_result = impl->hasPendingData();
-                    if (next_result)
-                        break;
-                }
-
-                next_result = impl->next();
-                successful_read = true;
-                break;
-            }
-            catch (const Poco::Exception & e)
-            {
-                LOG_WARNING(log, "Read attempt failed for url: {}. Error: {}", url, e.what());
-                error_msg << fmt::format("Error: {}\n", e.what());
-
-                sleepForMilliseconds(milliseconds_to_wait);
-                milliseconds_to_wait *= 2;
-                impl.reset();
-            }
-        }
-        milliseconds_to_wait = WAIT_MS;
-    }
-
-    if (!successful_read)
-        throw Exception(ErrorCodes::NETWORK_ERROR,
-                        "All read attempts failed for url: {}. Reason:\n{}", url, error_msg.str());
-
-    if (next_result)
-    {
-        BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset());
-        offset += working_buffer.size();
-    }
-
-    return next_result;
-}
-
-
-off_t ReadIndirectBufferFromWebServer::seek(off_t offset_, int whence)
-{
-    if (impl)
-        throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Seek is allowed only before first read attempt from the buffer");
-
-    if (whence != SEEK_SET)
-        throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed");
-
-    if (offset_ < 0)
-        throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", std::to_string(offset_));
-
-    offset = offset_;
-
-    return offset;
-}
-
-
-off_t ReadIndirectBufferFromWebServer::getPosition()
-{
-    return offset - available();
-}
-
-}
diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp
index c8e248538c0..ef8bb8e0feb 100644
--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@@ -17,11 +17,7 @@
 #include <Common/quoteString.h>
 #include <Common/thread_local_rng.h>
 
-#include <Disks/ReadIndirectBufferFromRemoteFS.h>
-#include <Disks/WriteIndirectBufferFromRemoteFS.h>
-
 #include <Interpreters/Context.h>
-
 #include <IO/ReadBufferFromS3.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
@@ -29,6 +25,12 @@
 #include <IO/WriteBufferFromS3.h>
 #include <IO/WriteHelpers.h>
 
+#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
+#include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h>
+#include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
+#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
+#include <Disks/IO/ThreadPoolRemoteFSReader.h>
+
 #include <aws/s3/model/CopyObjectRequest.h>
 #include <aws/s3/model/DeleteObjectsRequest.h>
 #include <aws/s3/model/GetObjectRequest.h>
@@ -127,47 +129,19 @@ void throwIfError(const Aws::Utils::Outcome<Result, Error> & response)
     }
 }
 
-/// Reads data from S3 using stored paths in metadata.
-class ReadIndirectBufferFromS3 final : public ReadIndirectBufferFromRemoteFS<ReadBufferFromS3>
-{
-public:
-    ReadIndirectBufferFromS3(
-        std::shared_ptr<Aws::S3::S3Client> client_ptr_,
-        const String & bucket_,
-        DiskS3::Metadata metadata_,
-        size_t max_single_read_retries_,
-        size_t buf_size_)
-        : ReadIndirectBufferFromRemoteFS<ReadBufferFromS3>(metadata_)
-        , client_ptr(std::move(client_ptr_))
-        , bucket(bucket_)
-        , max_single_read_retries(max_single_read_retries_)
-        , buf_size(buf_size_)
-    {
-    }
-
-    std::unique_ptr<ReadBufferFromS3> createReadBuffer(const String & path) override
-    {
-        return std::make_unique<ReadBufferFromS3>(client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries, buf_size);
-    }
-
-private:
-    std::shared_ptr<Aws::S3::S3Client> client_ptr;
-    const String & bucket;
-    UInt64 max_single_read_retries;
-    size_t buf_size;
-};
-
 DiskS3::DiskS3(
     String name_,
     String bucket_,
     String s3_root_path_,
     String metadata_path_,
+    ContextPtr context_,
     SettingsPtr settings_,
     GetDiskSettings settings_getter_)
     : IDiskRemote(name_, s3_root_path_, metadata_path_, "DiskS3", settings_->thread_pool_size)
     , bucket(std::move(bucket_))
     , current_settings(std::move(settings_))
     , settings_getter(settings_getter_)
+    , context(context_)
 {
 }
 
@@ -230,9 +204,23 @@ std::unique_ptr<ReadBufferFromFileBase> DiskS3::readFile(const String & path, co
     LOG_TRACE(log, "Read from file by path: {}. Existing S3 objects: {}",
         backQuote(metadata_path + path), metadata.remote_fs_objects.size());
 
-    auto reader = std::make_unique<ReadIndirectBufferFromS3>(
-        settings->client, bucket, metadata, settings->s3_max_single_read_retries, read_settings.remote_fs_buffer_size);
-    return std::make_unique<SeekAvoidingReadBuffer>(std::move(reader), settings->min_bytes_for_seek);
+    bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::read_threadpool;
+
+    auto s3_impl = std::make_unique<ReadBufferFromS3Gather>(
+        path,
+        settings->client, bucket, metadata,
+        settings->s3_max_single_read_retries, read_settings, threadpool_read);
+
+    if (threadpool_read)
+    {
+        auto reader = getThreadPoolReader();
+        return std::make_unique<AsynchronousReadIndirectBufferFromRemoteFS>(reader, read_settings, std::move(s3_impl));
+    }
+    else
+    {
+        auto buf = std::make_unique<ReadIndirectBufferFromRemoteFS>(std::move(s3_impl));
+        return std::make_unique<SeekAvoidingReadBuffer>(std::move(buf), settings->min_bytes_for_seek);
+    }
 }
 
 std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path, size_t buf_size, WriteMode mode)
@@ -378,7 +366,7 @@ int DiskS3::readSchemaVersion(const String & source_bucket, const String & sourc
         source_bucket,
         source_path + SCHEMA_VERSION_OBJECT,
         settings->s3_max_single_read_retries,
-        DBMS_DEFAULT_BUFFER_SIZE);
+        context->getReadSettings());
 
     readIntText(version, buffer);
 
@@ -1033,9 +1021,9 @@ void DiskS3::onFreeze(const String & path)
     revision_file_buf.finalize();
 }
 
-void DiskS3::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String &, const DisksMap &)
+void DiskS3::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &)
 {
-    auto new_settings = settings_getter(config, "storage_configuration.disks." + name, context);
+    auto new_settings = settings_getter(config, "storage_configuration.disks." + name, context_);
 
     current_settings.set(std::move(new_settings));
 
diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h
index b8f83b4763d..19bcb925cb4 100644
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@@ -69,6 +69,7 @@ public:
         String bucket_,
         String s3_root_path_,
         String metadata_path_,
+        ContextPtr context_,
         SettingsPtr settings_,
         GetDiskSettings settings_getter_);
 
@@ -175,6 +176,8 @@ private:
     static constexpr int RESTORABLE_SCHEMA_VERSION = 1;
     /// Directories with data.
     const std::vector<String> data_roots {"data", "store"};
+
+    ContextPtr context;
 };
 
 }
diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp
index 57c2cf17239..8174ccea330 100644
--- a/src/Disks/S3/registerDiskS3.cpp
+++ b/src/Disks/S3/registerDiskS3.cpp
@@ -184,6 +184,7 @@ void registerDiskS3(DiskFactory & factory)
             uri.bucket,
             uri.key,
             metadata_path,
+            context,
             getSettings(config, config_prefix, context),
             getSettings);
 
diff --git a/src/IO/AsynchronousReader.h b/src/IO/AsynchronousReader.h
index 77b4a2f5b22..e4a81623205 100644
--- a/src/IO/AsynchronousReader.h
+++ b/src/IO/AsynchronousReader.h
@@ -46,6 +46,7 @@ public:
         size_t size = 0;
         char * buf = nullptr;
         int64_t priority = 0;
+        size_t ignore = 0;
     };
 
     /// Less than requested amount of data can be returned.
diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h
index 609065feb7e..be456ea398c 100644
--- a/src/IO/ReadBuffer.h
+++ b/src/IO/ReadBuffer.h
@@ -202,6 +202,12 @@ public:
       */
     virtual void prefetch() {}
 
+    /**
+     * For reading from remote filesystem, when it matters how much we read.
+     */
+    virtual void setReadUntilPosition(size_t /* position */) {}
+    virtual void setReadUntilEnd() {}
+
 protected:
     /// The number of bytes to ignore from the initial position of `working_buffer`
     /// buffer. Apparently this is an additional out-parameter for nextImpl(),
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index b1c4cdc3bb9..78d296be60e 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -2,15 +2,17 @@
 
 #if USE_AWS_S3
 
-#    include <IO/ReadBufferFromIStream.h>
-#    include <IO/ReadBufferFromS3.h>
-#    include <Common/Stopwatch.h>
+#include <IO/ReadBufferFromIStream.h>
+#include <IO/ReadBufferFromS3.h>
+#include <Common/Stopwatch.h>
 
-#    include <aws/s3/S3Client.h>
-#    include <aws/s3/model/GetObjectRequest.h>
-#    include <base/logger_useful.h>
+#include <aws/s3/S3Client.h>
+#include <aws/s3/model/GetObjectRequest.h>
 
-#    include <utility>
+#include <base/logger_useful.h>
+#include <base/sleep.h>
+
+#include <utility>
 
 
 namespace ProfileEvents
@@ -27,43 +29,81 @@ namespace ErrorCodes
     extern const int S3_ERROR;
     extern const int CANNOT_SEEK_THROUGH_FILE;
     extern const int SEEK_POSITION_OUT_OF_BOUND;
+    extern const int LOGICAL_ERROR;
 }
 
 
 ReadBufferFromS3::ReadBufferFromS3(
-    std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, const String & key_, UInt64 max_single_read_retries_, size_t buffer_size_)
+    std::shared_ptr<Aws::S3::S3Client> client_ptr_, const String & bucket_, const String & key_,
+    UInt64 max_single_read_retries_, const ReadSettings & settings_, bool use_external_buffer_, size_t read_until_position_)
     : SeekableReadBuffer(nullptr, 0)
     , client_ptr(std::move(client_ptr_))
     , bucket(bucket_)
     , key(key_)
     , max_single_read_retries(max_single_read_retries_)
-    , buffer_size(buffer_size_)
+    , read_settings(settings_)
+    , use_external_buffer(use_external_buffer_)
+    , read_until_position(read_until_position_)
 {
 }
 
 bool ReadBufferFromS3::nextImpl()
 {
+    if (read_until_position)
+    {
+        if (read_until_position == offset)
+            return false;
+
+        if (read_until_position < offset)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1);
+    }
+
     bool next_result = false;
 
     if (impl)
     {
-        /// `impl` has been initialized earlier and now we're at the end of the current portion of data.
-        impl->position() = position();
-        assert(!impl->hasPendingData());
-    }
-    else
-    {
-        /// `impl` is not initialized and we're about to read the first portion of data.
-        impl = initialize();
-        next_result = impl->hasPendingData();
+        if (use_external_buffer)
+        {
+            /**
+            * use_external_buffer -- means we read into the buffer which
+            * was passed to us from somewhere else. We do not check whether
+            * previously returned buffer was read or not (no hasPendingData() check is needed),
+            * because this branch means we are prefetching data,
+            * each nextImpl() call we can fill a different buffer.
+            */
+            impl->set(internal_buffer.begin(), internal_buffer.size());
+            assert(working_buffer.begin() != nullptr);
+            assert(!internal_buffer.empty());
+        }
+        else
+        {
+            /**
+            * impl was initialized before, pass position() to it to make
+            * sure there is no pending data which was not read.
+            */
+            impl->position() = position();
+            assert(!impl->hasPendingData());
+        }
     }
 
-    auto sleep_time_with_backoff_milliseconds = std::chrono::milliseconds(100);
+    size_t sleep_time_with_backoff_milliseconds = 100;
     for (size_t attempt = 0; (attempt < max_single_read_retries) && !next_result; ++attempt)
     {
         Stopwatch watch;
         try
         {
+            if (!impl)
+            {
+                impl = initialize();
+
+                if (use_external_buffer)
+                {
+                    impl->set(internal_buffer.begin(), internal_buffer.size());
+                    assert(working_buffer.begin() != nullptr);
+                    assert(!internal_buffer.empty());
+                }
+            }
+
             /// Try to read a next portion of data.
             next_result = impl->next();
             watch.stop();
@@ -83,13 +123,11 @@ bool ReadBufferFromS3::nextImpl()
                 throw;
 
             /// Pause before next attempt.
-            std::this_thread::sleep_for(sleep_time_with_backoff_milliseconds);
+            sleepForMilliseconds(sleep_time_with_backoff_milliseconds);
             sleep_time_with_backoff_milliseconds *= 2;
 
             /// Try to reinitialize `impl`.
             impl.reset();
-            impl = initialize();
-            next_result = impl->hasPendingData();
         }
     }
 
@@ -127,19 +165,34 @@ off_t ReadBufferFromS3::getPosition()
 
 std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
 {
-    LOG_TRACE(log, "Read S3 object. Bucket: {}, Key: {}, Offset: {}", bucket, key, offset);
-
     Aws::S3::Model::GetObjectRequest req;
     req.SetBucket(bucket);
     req.SetKey(key);
-    req.SetRange(fmt::format("bytes={}-", offset));
+
+    /**
+     * If remote_filesystem_read_method = 'read_threadpool', then for MergeTree family tables
+     * exact byte ranges to read are always passed here.
+     */
+    if (read_until_position)
+    {
+        if (offset >= read_until_position)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1);
+
+        req.SetRange(fmt::format("bytes={}-{}", offset, read_until_position - 1));
+        LOG_DEBUG(log, "Read S3 object. Bucket: {}, Key: {}, Range: {}-{}", bucket, key, offset, read_until_position - 1);
+    }
+    else
+    {
+        req.SetRange(fmt::format("bytes={}-", offset));
+        LOG_DEBUG(log, "Read S3 object. Bucket: {}, Key: {}, Offset: {}", bucket, key, offset);
+    }
 
     Aws::S3::Model::GetObjectOutcome outcome = client_ptr->GetObject(req);
 
     if (outcome.IsSuccess())
     {
         read_result = outcome.GetResultWithOwnership();
-        return std::make_unique<ReadBufferFromIStream>(read_result.GetBody(), buffer_size);
+        return std::make_unique<ReadBufferFromIStream>(read_result.GetBody(), read_settings.remote_fs_buffer_size);
     }
     else
         throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h
index e557e3bc324..d0df5c59eb3 100644
--- a/src/IO/ReadBufferFromS3.h
+++ b/src/IO/ReadBufferFromS3.h
@@ -4,12 +4,14 @@
 
 #if USE_AWS_S3
 
-#    include <memory>
+#include <memory>
 
-#    include <IO/HTTPCommon.h>
-#    include <IO/ReadBuffer.h>
-#    include <aws/s3/model/GetObjectResult.h>
-#    include "SeekableReadBuffer.h"
+#include <IO/HTTPCommon.h>
+#include <IO/ReadBuffer.h>
+#include <IO/ReadSettings.h>
+#include <IO/SeekableReadBuffer.h>
+
+#include <aws/s3/model/GetObjectResult.h>
 
 namespace Aws::S3
 {
@@ -28,7 +30,6 @@ private:
     String bucket;
     String key;
     UInt64 max_single_read_retries;
-    size_t buffer_size;
     off_t offset = 0;
     Aws::S3::Model::GetObjectResult read_result;
     std::unique_ptr<ReadBuffer> impl;
@@ -41,7 +42,9 @@ public:
         const String & bucket_,
         const String & key_,
         UInt64 max_single_read_retries_,
-        size_t buffer_size_);
+        const ReadSettings & settings_,
+        bool use_external_buffer = false,
+        size_t read_until_position_ = 0);
 
     bool nextImpl() override;
 
@@ -50,6 +53,10 @@ public:
 
 private:
     std::unique_ptr<ReadBuffer> initialize();
+
+    ReadSettings read_settings;
+    bool use_external_buffer;
+    off_t read_until_position = 0;
 };
 
 }
diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h
index 379b7bc2216..be328e28118 100644
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@@ -6,7 +6,7 @@
 
 namespace DB
 {
-enum class ReadMethod
+enum class LocalFSReadMethod
 {
     /**
      * Simple synchronous reads with 'read'.
@@ -43,12 +43,20 @@ enum class ReadMethod
     pread_fake_async
 };
 
+enum class RemoteFSReadMethod
+{
+    read,
+    read_threadpool,
+};
+
 class MMappedFileCache;
 
 struct ReadSettings
 {
     /// Method to use reading from local filesystem.
-    ReadMethod local_fs_method = ReadMethod::pread;
+    LocalFSReadMethod local_fs_method = LocalFSReadMethod::pread;
+    /// Method to use reading from remote filesystem.
+    RemoteFSReadMethod remote_fs_method = RemoteFSReadMethod::read;
 
     size_t local_fs_buffer_size = DBMS_DEFAULT_BUFFER_SIZE;
     size_t remote_fs_buffer_size = DBMS_DEFAULT_BUFFER_SIZE;
@@ -66,8 +74,14 @@ struct ReadSettings
     /// For 'pread_threadpool' method. Lower is more priority.
     size_t priority = 0;
 
-    size_t remote_fs_backoff_threshold = 10000;
-    size_t remote_fs_backoff_max_tries = 4;
+    size_t remote_fs_read_max_backoff_ms = 10000;
+    size_t remote_fs_read_backoff_max_tries = 4;
+
+    /// Set to true for MergeTree tables to make sure
+    /// that last position (offset in compressed file) is always passed.
+    /// (Otherwise asynchronous reading from remote fs is not efficient).
+    /// If reading is done without final position set, throw logical_error.
+    bool must_read_until_position = false;
 
     ReadSettings adjustBufferSize(size_t file_size) const
     {
diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h
index 55fcdca89b3..5254b140679 100644
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@@ -2,10 +2,12 @@
 
 #include <functional>
 #include <base/types.h>
+#include <base/sleep.h>
 #include <IO/ConnectionTimeouts.h>
 #include <IO/HTTPCommon.h>
 #include <IO/ReadBuffer.h>
 #include <IO/ReadBufferFromIStream.h>
+#include <IO/ReadSettings.h>
 #include <Poco/Any.h>
 #include <Poco/Net/HTTPBasicCredentials.h>
 #include <Poco/Net/HTTPClientSession.h>
@@ -99,6 +101,9 @@ namespace detail
         RemoteHostFilter remote_host_filter;
         std::function<void(size_t)> next_callback;
 
+        size_t buffer_size;
+        ReadSettings settings;
+
         std::istream * call(Poco::URI uri_, Poco::Net::HTTPResponse & response)
         {
             // With empty path poco will send "POST  HTTP/1.1" its bug.
@@ -146,6 +151,9 @@ namespace detail
             }
         }
 
+    private:
+        bool use_external_buffer;
+
     public:
         using NextCallback = std::function<void(size_t)>;
         using OutStreamCallback = std::function<void(std::ostream &)>;
@@ -157,8 +165,10 @@ namespace detail
             OutStreamCallback out_stream_callback_ = {},
             const Poco::Net::HTTPBasicCredentials & credentials_ = {},
             size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
+            const ReadSettings & settings_ = {},
             HTTPHeaderEntries http_header_entries_ = {},
-            const RemoteHostFilter & remote_host_filter_ = {})
+            const RemoteHostFilter & remote_host_filter_ = {},
+            bool use_external_buffer_ = false)
             : ReadBuffer(nullptr, 0)
             , uri {uri_}
             , method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
@@ -167,9 +177,17 @@ namespace detail
             , credentials {credentials_}
             , http_header_entries {http_header_entries_}
             , remote_host_filter {remote_host_filter_}
+            , buffer_size {buffer_size_}
+            , settings {settings_}
+            , use_external_buffer {use_external_buffer_}
         {
-            Poco::Net::HTTPResponse response;
+            initialize();
+        }
 
+        void initialize()
+        {
+
+            Poco::Net::HTTPResponse response;
             istr = call(uri, response);
 
             while (isRedirect(response.getStatus()))
@@ -184,7 +202,17 @@ namespace detail
 
             try
             {
-                impl = std::make_unique<ReadBufferFromIStream>(*istr, buffer_size_);
+                impl = std::make_unique<ReadBufferFromIStream>(*istr, buffer_size);
+
+                if (use_external_buffer)
+                {
+                    /**
+                    * See comment 30 lines below.
+                    */
+                    impl->set(internal_buffer.begin(), internal_buffer.size());
+                    assert(working_buffer.begin() != nullptr);
+                    assert(!internal_buffer.empty());
+                }
             }
             catch (const Poco::Exception & e)
             {
@@ -200,10 +228,36 @@ namespace detail
         {
             if (next_callback)
                 next_callback(count());
+
+            if (use_external_buffer)
+            {
+                /**
+                * use_external_buffer -- means we read into the buffer which
+                * was passed to us from somewhere else. We do not check whether
+                * previously returned buffer was read or not (no hasPendingData() check is needed),
+                * because this branch means we are prefetching data,
+                * each nextImpl() call we can fill a different buffer.
+                */
+                impl->set(internal_buffer.begin(), internal_buffer.size());
+                assert(working_buffer.begin() != nullptr);
+                assert(!internal_buffer.empty());
+            }
+            else
+            {
+                /**
+                * impl was initialized before, pass position() to it to make
+                * sure there is no pending data which was not read.
+                */
+                if (!working_buffer.empty())
+                    impl->position() = position();
+            }
+
             if (!working_buffer.empty())
                 impl->position() = position();
+
             if (!impl->next())
                 return false;
+
             internal_buffer = impl->buffer();
             working_buffer = internal_buffer;
             return true;
@@ -268,10 +322,13 @@ public:
         const UInt64 max_redirects = 0,
         const Poco::Net::HTTPBasicCredentials & credentials_ = {},
         size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
+        const ReadSettings & settings_ = {},
         const HTTPHeaderEntries & http_header_entries_ = {},
-        const RemoteHostFilter & remote_host_filter_ = {})
+        const RemoteHostFilter & remote_host_filter_ = {},
+        bool use_external_buffer_ = false)
         : Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
-            uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_, remote_host_filter_)
+                 uri_, method_, out_stream_callback_, credentials_, buffer_size_,
+                 settings_, http_header_entries_, remote_host_filter_, use_external_buffer_)
     {
     }
 };
diff --git a/src/IO/SeekAvoidingReadBuffer.h b/src/IO/SeekAvoidingReadBuffer.h
index 5896efc4462..6662675cdfc 100644
--- a/src/IO/SeekAvoidingReadBuffer.h
+++ b/src/IO/SeekAvoidingReadBuffer.h
@@ -17,6 +17,8 @@ public:
 
     off_t seek(off_t off, int whence) override;
 
+    void prefetch() override { impl->prefetch(); }
+
 private:
     UInt64 min_bytes_for_seek; /// Minimum positive seek offset which shall be executed using seek operation.
 };
diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h
index 97620f0c03c..400fb99db16 100644
--- a/src/IO/SeekableReadBuffer.h
+++ b/src/IO/SeekableReadBuffer.h
@@ -33,4 +33,5 @@ public:
     virtual off_t getPosition() = 0;
 };
 
+using SeekableReadBufferPtr = std::shared_ptr<SeekableReadBuffer>;
 }
diff --git a/src/IO/createReadBufferFromFileBase.cpp b/src/IO/createReadBufferFromFileBase.cpp
index 1e06d701afc..05de97c5c92 100644
--- a/src/IO/createReadBufferFromFileBase.cpp
+++ b/src/IO/createReadBufferFromFileBase.cpp
@@ -36,7 +36,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     size_t estimated_size = size.has_value() ? *size : 0;
 
     if (!existing_memory
-        && settings.local_fs_method == ReadMethod::mmap
+        && settings.local_fs_method == LocalFSReadMethod::mmap
         && settings.mmap_threshold
         && settings.mmap_cache
         && estimated_size >= settings.mmap_threshold)
@@ -58,21 +58,21 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     {
         std::unique_ptr<ReadBufferFromFileBase> res;
 
-        if (settings.local_fs_method == ReadMethod::read)
+        if (settings.local_fs_method == LocalFSReadMethod::read)
         {
             res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment);
         }
-        else if (settings.local_fs_method == ReadMethod::pread || settings.local_fs_method == ReadMethod::mmap)
+        else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
         {
             res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment);
         }
-        else if (settings.local_fs_method == ReadMethod::pread_fake_async)
+        else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async)
         {
             static AsynchronousReaderPtr reader = std::make_shared<SynchronousReader>();
             res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
                 reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment);
         }
-        else if (settings.local_fs_method == ReadMethod::pread_threadpool)
+        else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
         {
             static AsynchronousReaderPtr reader = std::make_shared<ThreadPoolReader>(16, 1000000);
             res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 983e2c5afea..eb2642e1838 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1970,6 +1970,9 @@ zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const
     auto zookeeper = shared->auxiliary_zookeepers.find(name);
     if (zookeeper == shared->auxiliary_zookeepers.end())
     {
+        if (name.find(':') != std::string::npos || name.find('/') != std::string::npos)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid auxiliary ZooKeeper name {}: ':' and '/' are not allowed", name);
+
         const auto & config = shared->auxiliary_zookeepers_config ? *shared->auxiliary_zookeepers_config : getConfigRef();
         if (!config.has("auxiliary_zookeepers." + name))
             throw Exception(
@@ -3062,16 +3065,23 @@ ReadSettings Context::getReadSettings() const
 
     std::string_view read_method_str = settings.local_filesystem_read_method.value;
 
-    if (auto opt_method = magic_enum::enum_cast<ReadMethod>(read_method_str))
+    if (auto opt_method = magic_enum::enum_cast<LocalFSReadMethod>(read_method_str))
         res.local_fs_method = *opt_method;
     else
-        throw Exception(ErrorCodes::UNKNOWN_READ_METHOD, "Unknown read method '{}'", read_method_str);
+        throw Exception(ErrorCodes::UNKNOWN_READ_METHOD, "Unknown read method '{}' for local filesystem", read_method_str);
+
+    read_method_str = settings.remote_filesystem_read_method.value;
+
+    if (auto opt_method = magic_enum::enum_cast<RemoteFSReadMethod>(read_method_str))
+        res.remote_fs_method = *opt_method;
+    else
+        throw Exception(ErrorCodes::UNKNOWN_READ_METHOD, "Unknown read method '{}' for remote filesystem", read_method_str);
 
     res.local_fs_prefetch = settings.local_filesystem_read_prefetch;
     res.remote_fs_prefetch = settings.remote_filesystem_read_prefetch;
 
-    res.remote_fs_backoff_threshold = settings.remote_fs_read_backoff_threshold;
-    res.remote_fs_backoff_max_tries = settings.remote_fs_read_backoff_max_tries;
+    res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms;
+    res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries;
 
     res.local_fs_buffer_size = settings.max_read_buffer_size;
     res.direct_io_threshold = settings.min_bytes_to_use_direct_io;
diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp
index 96d67ad0e08..2fc72c58b64 100644
--- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp
+++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp
@@ -15,6 +15,7 @@ namespace ErrorCodes
     extern const int CANNOT_OPEN_FILE;
     extern const int CANNOT_SEEK_THROUGH_FILE;
     extern const int SEEK_POSITION_OUT_OF_BOUND;
+    extern const int LOGICAL_ERROR;
 }
 
 ReadBufferFromHDFS::~ReadBufferFromHDFS() = default;
@@ -33,16 +34,18 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
 
     off_t offset = 0;
     bool initialized = false;
+    off_t read_until_position = 0;
 
     explicit ReadBufferFromHDFSImpl(
         const std::string & hdfs_uri_,
         const std::string & hdfs_file_path_,
         const Poco::Util::AbstractConfiguration & config_,
-        size_t buf_size_)
+        size_t buf_size_, size_t read_until_position_)
         : BufferWithOwnMemory<SeekableReadBuffer>(buf_size_)
         , hdfs_uri(hdfs_uri_)
         , hdfs_file_path(hdfs_file_path_)
         , builder(createHDFSBuilder(hdfs_uri_, config_))
+        , read_until_position(read_until_position_)
     {
         std::lock_guard lock(hdfs_init_mutex);
 
@@ -79,7 +82,23 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
             initialized = true;
         }
 
-        int bytes_read = hdfsRead(fs.get(), fin, internal_buffer.begin(), internal_buffer.size());
+        size_t num_bytes_to_read;
+        if (read_until_position)
+        {
+            if (read_until_position == offset)
+                return false;
+
+            if (read_until_position < offset)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1);
+
+            num_bytes_to_read = read_until_position - offset;
+        }
+        else
+        {
+            num_bytes_to_read = internal_buffer.size();
+        }
+
+        int bytes_read = hdfsRead(fs.get(), fin, internal_buffer.begin(), num_bytes_to_read);
         if (bytes_read < 0)
             throw Exception(ErrorCodes::NETWORK_ERROR,
                 "Fail to read from HDFS: {}, file path: {}. Error: {}",
@@ -125,9 +144,9 @@ ReadBufferFromHDFS::ReadBufferFromHDFS(
         const String & hdfs_uri_,
         const String & hdfs_file_path_,
         const Poco::Util::AbstractConfiguration & config_,
-        size_t buf_size_)
+        size_t buf_size_, size_t read_until_position_)
     : SeekableReadBuffer(nullptr, 0)
-    , impl(std::make_unique<ReadBufferFromHDFSImpl>(hdfs_uri_, hdfs_file_path_, config_, buf_size_))
+    , impl(std::make_unique<ReadBufferFromHDFSImpl>(hdfs_uri_, hdfs_file_path_, config_, buf_size_, read_until_position_))
 {
 }
 
diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h
index 09cf4d290fd..6c112ac5bb4 100644
--- a/src/Storages/HDFS/ReadBufferFromHDFS.h
+++ b/src/Storages/HDFS/ReadBufferFromHDFS.h
@@ -25,7 +25,9 @@ struct ReadBufferFromHDFSImpl;
 
 public:
     ReadBufferFromHDFS(const String & hdfs_uri_, const String & hdfs_file_path_,
-        const Poco::Util::AbstractConfiguration & config_, size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE);
+                       const Poco::Util::AbstractConfiguration & config_,
+                       size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE,
+                       size_t read_until_position_ = 0);
 
     ~ReadBufferFromHDFS() override;
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index c889d76c2b4..f0dff289ced 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -97,6 +97,8 @@ public:
 
     virtual bool isStoredOnDisk() const = 0;
 
+    virtual bool isStoredOnRemoteDisk() const = 0;
+
     virtual bool supportsVerticalMerge() const { return false; }
 
     /// NOTE: Returns zeros if column files are not found in checksums.
diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h
index 696cc2f105b..28334b9a8bb 100644
--- a/src/Storages/MergeTree/IMergeTreeReader.h
+++ b/src/Storages/MergeTree/IMergeTreeReader.h
@@ -30,8 +30,10 @@ public:
         const ValueSizeMap & avg_value_size_hints_ = ValueSizeMap{});
 
     /// Return the number of rows has been read or zero if there is no columns to read.
-    /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark
-    virtual size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns) = 0;
+    /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark.
+    /// current_task_last mark is needed for asynchronous reading (mainly from remote fs).
+    virtual size_t readRows(size_t from_mark, size_t current_task_last_mark,
+                            bool continue_reading, size_t max_rows_to_read, Columns & res_columns) = 0;
 
     virtual bool canReadIncompleteGranules() const = 0;
 
diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp
new file mode 100644
index 00000000000..7f097cd7106
--- /dev/null
+++ b/src/Storages/MergeTree/MarkRange.cpp
@@ -0,0 +1,14 @@
+#include "MarkRange.h"
+
+namespace DB
+{
+
+size_t getLastMark(const MarkRanges & ranges)
+{
+    size_t current_task_last_mark = 0;
+    for (const auto & mark_range : ranges)
+        current_task_last_mark = std::max(current_task_last_mark, mark_range.end);
+    return current_task_last_mark;
+}
+
+}
diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h
index 8380914a455..b46913db30c 100644
--- a/src/Storages/MergeTree/MarkRange.h
+++ b/src/Storages/MergeTree/MarkRange.h
@@ -8,8 +8,9 @@ namespace DB
 {
 
 
-/** A pair of marks that defines the range of rows in a part. Specifically, the range has the form [begin * index_granularity, end * index_granularity).
-  */
+/** A pair of marks that defines the range of rows in a part. Specifically,
+ * the range has the form [begin * index_granularity, end * index_granularity).
+ */
 struct MarkRange
 {
     size_t begin;
@@ -21,5 +22,8 @@ struct MarkRange
 
 using MarkRanges = std::deque<MarkRange>;
 
+/** Get max range.end from ranges.
+ */
+size_t getLastMark(const MarkRanges & ranges);
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
index ad1c2abeee7..c4c2e65547b 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
@@ -182,6 +182,11 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons
     }
 }
 
+bool MergeTreeDataPartCompact::isStoredOnRemoteDisk() const
+{
+    return volume->getDisk()->isRemote();
+}
+
 MergeTreeDataPartCompact::~MergeTreeDataPartCompact()
 {
     removeIfNeeded();
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
index 2fcc7b7034b..38bfa11652a 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
@@ -56,6 +56,8 @@ public:
 
     bool isStoredOnDisk() const override { return true; }
 
+    bool isStoredOnRemoteDisk() const override;
+
     bool hasColumnFiles(const NameAndTypePair & column) const override;
 
     String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return DATA_FILE_NAME; }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
index 118340f0233..d1a0344859d 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
@@ -44,6 +44,7 @@ public:
         const MergeTreeIndexGranularity & computed_index_granularity) const override;
 
     bool isStoredOnDisk() const override { return false; }
+    bool isStoredOnRemoteDisk() const override { return false; }
     bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.name); }
     String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; }
     void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) const override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
index 2f25cf7d12a..312f5b435d6 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
@@ -142,6 +142,11 @@ void MergeTreeDataPartWide::loadIndexGranularity()
     index_granularity.setInitialized();
 }
 
+bool MergeTreeDataPartWide::isStoredOnRemoteDisk() const
+{
+    return volume->getDisk()->isRemote();
+}
+
 MergeTreeDataPartWide::~MergeTreeDataPartWide()
 {
     removeIfNeeded();
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h
index a43396f8cd5..4796143e11d 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h
@@ -50,6 +50,8 @@ public:
 
     bool isStoredOnDisk() const override { return true; }
 
+    bool isStoredOnRemoteDisk() const override;
+
     bool supportsVerticalMerge() const override { return true; }
 
     String getFileNameForColumn(const NameAndTypePair & column) const override;
diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
index fd7358967f3..03a3b811de6 100644
--- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
@@ -54,6 +54,7 @@ MergeTreeIndexReader::MergeTreeIndexReader(
         std::move(settings));
     version = index_format.version;
 
+    stream->adjustForRange(MarkRange(0, getLastMark(all_mark_ranges_)));
     stream->seekToStart();
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
index c5454f33c79..124f13b14a8 100644
--- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp
@@ -54,9 +54,21 @@ static void filterColumns(Columns & columns, const ColumnPtr & filter)
 }
 
 
+static size_t getLastMark(const MergeTreeRangeReader::ReadResult::RangesInfo & ranges)
+{
+    size_t current_task_last_mark = 0;
+    for (const auto & mark_range : ranges)
+        current_task_last_mark = std::max(current_task_last_mark, mark_range.range.end);
+    return current_task_last_mark;
+}
+
+
 MergeTreeRangeReader::DelayedStream::DelayedStream(
-        size_t from_mark, IMergeTreeReader * merge_tree_reader_)
+    size_t from_mark,
+    size_t current_task_last_mark_,
+    IMergeTreeReader * merge_tree_reader_)
         : current_mark(from_mark), current_offset(0), num_delayed_rows(0)
+        , current_task_last_mark(current_task_last_mark_)
         , merge_tree_reader(merge_tree_reader_)
         , index_granularity(&(merge_tree_reader->data_part->index_granularity))
         , continue_reading(false), is_finished(false)
@@ -73,7 +85,8 @@ size_t MergeTreeRangeReader::DelayedStream::readRows(Columns & columns, size_t n
 {
     if (num_rows)
     {
-        size_t rows_read = merge_tree_reader->readRows(current_mark, continue_reading, num_rows, columns);
+        size_t rows_read = merge_tree_reader->readRows(
+            current_mark, current_task_last_mark, continue_reading, num_rows, columns);
         continue_reading = true;
 
         /// Zero rows_read maybe either because reading has finished
@@ -151,13 +164,13 @@ size_t MergeTreeRangeReader::DelayedStream::finalize(Columns & columns)
 
 
 MergeTreeRangeReader::Stream::Stream(
-        size_t from_mark, size_t to_mark, IMergeTreeReader * merge_tree_reader_)
+        size_t from_mark, size_t to_mark, size_t current_task_last_mark, IMergeTreeReader * merge_tree_reader_)
         : current_mark(from_mark), offset_after_current_mark(0)
         , last_mark(to_mark)
         , merge_tree_reader(merge_tree_reader_)
         , index_granularity(&(merge_tree_reader->data_part->index_granularity))
         , current_mark_index_granularity(index_granularity->getMarkRows(from_mark))
-        , stream(from_mark, merge_tree_reader)
+        , stream(from_mark, current_task_last_mark, merge_tree_reader)
 {
     size_t marks_count = index_granularity->getMarksCount();
     if (from_mark >= marks_count)
@@ -280,9 +293,9 @@ void MergeTreeRangeReader::ReadResult::adjustLastGranule()
         throw Exception("Can't adjust last granule because no granules were added.", ErrorCodes::LOGICAL_ERROR);
 
     if (num_rows_to_subtract > rows_per_granule.back())
-        throw Exception("Can't adjust last granule because it has " + toString(rows_per_granule.back())
-                        + " rows, but try to subtract " + toString(num_rows_to_subtract) + " rows.",
-                        ErrorCodes::LOGICAL_ERROR);
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+                        "Can't adjust last granule because it has {} rows, but try to subtract {} rows.",
+                        toString(rows_per_granule.back()), toString(num_rows_to_subtract));
 
     rows_per_granule.back() -= num_rows_to_subtract;
     total_rows_per_granule -= num_rows_to_subtract;
@@ -750,6 +763,8 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t
     ReadResult result;
     result.columns.resize(merge_tree_reader->getColumns().size());
 
+    size_t current_task_last_mark = getLastMark(ranges);
+
     /// Stream is lazy. result.num_added_rows is the number of rows added to block which is not equal to
     /// result.num_rows_read until call to stream.finalize(). Also result.num_added_rows may be less than
     /// result.num_rows_read if the last granule in range also the last in part (so we have to adjust last granule).
@@ -760,7 +775,7 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t
             if (stream.isFinished())
             {
                 result.addRows(stream.finalize(result.columns));
-                stream = Stream(ranges.front().begin, ranges.front().end, merge_tree_reader);
+                stream = Stream(ranges.front().begin, ranges.front().end, current_task_last_mark, merge_tree_reader);
                 result.addRange(ranges.front());
                 ranges.pop_front();
             }
@@ -807,6 +822,7 @@ Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t &
     const auto & rows_per_granule = result.rowsPerGranule();
     const auto & started_ranges = result.startedRanges();
 
+    size_t current_task_last_mark = getLastMark(started_ranges);
     size_t next_range_to_start = 0;
 
     auto size = rows_per_granule.size();
@@ -818,7 +834,7 @@ Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t &
             num_rows += stream.finalize(columns);
             const auto & range = started_ranges[next_range_to_start].range;
             ++next_range_to_start;
-            stream = Stream(range.begin, range.end, merge_tree_reader);
+            stream = Stream(range.begin, range.end, current_task_last_mark, merge_tree_reader);
         }
 
         bool last = i + 1 == size;
diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.h b/src/Storages/MergeTree/MergeTreeRangeReader.h
index d099d2475d2..c913b476b73 100644
--- a/src/Storages/MergeTree/MergeTreeRangeReader.h
+++ b/src/Storages/MergeTree/MergeTreeRangeReader.h
@@ -62,7 +62,7 @@ public:
     {
     public:
         DelayedStream() = default;
-        DelayedStream(size_t from_mark, IMergeTreeReader * merge_tree_reader);
+        DelayedStream(size_t from_mark, size_t current_task_last_mark_, IMergeTreeReader * merge_tree_reader);
 
         /// Read @num_rows rows from @from_mark starting from @offset row
         /// Returns the number of rows added to block.
@@ -81,6 +81,8 @@ public:
         size_t current_offset = 0;
         /// Num of rows we have to read
         size_t num_delayed_rows = 0;
+        /// Last mark from all ranges of current task.
+        size_t current_task_last_mark = 0;
 
         /// Actual reader of data from disk
         IMergeTreeReader * merge_tree_reader = nullptr;
@@ -99,7 +101,8 @@ public:
     {
     public:
         Stream() = default;
-        Stream(size_t from_mark, size_t to_mark, IMergeTreeReader * merge_tree_reader);
+        Stream(size_t from_mark, size_t to_mark,
+               size_t current_task_last_mark, IMergeTreeReader * merge_tree_reader);
 
         /// Returns the number of rows added to block.
         size_t read(Columns & columns, size_t num_rows, bool skip_remaining_rows_in_current_granule);
@@ -122,6 +125,7 @@ public:
         /// Invariant: offset_after_current_mark + skipped_rows_after_offset < index_granularity
         size_t offset_after_current_mark = 0;
 
+        /// Last mark in current range.
         size_t last_mark = 0;
 
         IMergeTreeReader * merge_tree_reader = nullptr;
diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp
index df3496c8876..09542c30636 100644
--- a/src/Storages/MergeTree/MergeTreeReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp
@@ -88,8 +88,11 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const size_t min_marks_to_read,
     auto & part = parts_with_idx[part_idx];
     auto & marks_in_part = thread_tasks.sum_marks_in_parts.back();
 
-    /// Get whole part to read if it is small enough.
-    auto need_marks = std::min(marks_in_part, min_marks_to_read);
+    size_t need_marks;
+    if (is_part_on_remote_disk[part_idx]) /// For better performance with remote disks
+        need_marks = marks_in_part;
+    else /// Get whole part to read if it is small enough.
+        need_marks = std::min(marks_in_part, min_marks_to_read);
 
     /// Do not leave too little rows in part for next time.
     if (marks_in_part > need_marks &&
@@ -190,10 +193,12 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(const RangesInDataParts &
 {
     std::vector<size_t> per_part_sum_marks;
     Block sample_block = metadata_snapshot->getSampleBlock();
+    is_part_on_remote_disk.resize(parts.size());
 
     for (const auto i : collections::range(0, parts.size()))
     {
         const auto & part = parts[i];
+        is_part_on_remote_disk[i] = part.data_part->isStoredOnRemoteDisk();
 
         /// Read marks for every data part.
         size_t sum_marks = 0;
diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h
index 3c7ee37b37e..aac4d5016a2 100644
--- a/src/Storages/MergeTree/MergeTreeReadPool.h
+++ b/src/Storages/MergeTree/MergeTreeReadPool.h
@@ -135,6 +135,8 @@ private:
     mutable std::mutex mutex;
 
     Poco::Logger * log = &Poco::Logger::get("MergeTreeReadPool");
+
+    std::vector<bool> is_part_on_remote_disk;
 };
 
 using MergeTreeReadPoolPtr = std::shared_ptr<MergeTreeReadPool>;
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index 33acb4610eb..f000b43f61a 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -120,7 +120,8 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
     }
 }
 
-size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
+size_t MergeTreeReaderCompact::readRows(
+    size_t from_mark, size_t current_task_last_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
 {
     if (continue_reading)
         from_mark = next_mark;
@@ -156,7 +157,7 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
                 auto & column = res_columns[pos];
                 size_t column_size_before_reading = column->size();
 
-                readData(column_from_part, column, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]);
+                readData(column_from_part, column, from_mark, current_task_last_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]);
 
                 size_t read_rows_in_column = column->size() - column_size_before_reading;
                 if (read_rows_in_column != rows_to_read)
@@ -191,7 +192,7 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading,
 
 void MergeTreeReaderCompact::readData(
     const NameAndTypePair & name_and_type, ColumnPtr & column,
-    size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets)
+    size_t from_mark, size_t current_task_last_mark, size_t column_position, size_t rows_to_read, bool only_offsets)
 {
     const auto & [name, type] = name_and_type;
 
@@ -203,6 +204,8 @@ void MergeTreeReaderCompact::readData(
         if (only_offsets && (substream_path.size() != 1 || substream_path[0].type != ISerialization::Substream::ArraySizes))
             return nullptr;
 
+        /// For asynchronous reading from remote fs.
+        data_buffer->setReadUntilPosition(marks_loader.getMark(current_task_last_mark).offset_in_compressed_file);
         return data_buffer;
     };
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h
index dbfaa7868fa..350c8427eff 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.h
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h
@@ -32,7 +32,8 @@ public:
 
     /// Return the number of rows has been read or zero if there is no columns to read.
     /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark
-    size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns) override;
+    size_t readRows(size_t from_mark, size_t current_task_last_mark,
+                    bool continue_reading, size_t max_rows_to_read, Columns & res_columns) override;
 
     bool canReadIncompleteGranules() const override { return false; }
 
@@ -57,7 +58,7 @@ private:
     void seekToMark(size_t row_index, size_t column_index);
 
     void readData(const NameAndTypePair & name_and_type, ColumnPtr & column, size_t from_mark,
-        size_t column_position, size_t rows_to_read, bool only_offsets);
+        size_t current_task_last_mark, size_t column_position, size_t rows_to_read, bool only_offsets);
 
     /// Returns maximal value of granule size in compressed file from @mark_ranges.
     /// This value is used as size of read buffer.
diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp
index 3e81fec5145..8a69183e858 100644
--- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp
@@ -37,7 +37,8 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory(
     }
 }
 
-size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
+size_t MergeTreeReaderInMemory::readRows(
+    size_t from_mark, size_t /* current_task_last_mark */, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
 {
     if (!continue_reading)
         total_rows_read = 0;
diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.h b/src/Storages/MergeTree/MergeTreeReaderInMemory.h
index 4526b19c4a8..ff6eb92d9c3 100644
--- a/src/Storages/MergeTree/MergeTreeReaderInMemory.h
+++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.h
@@ -23,7 +23,8 @@ public:
 
     /// Return the number of rows has been read or zero if there is no columns to read.
     /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark
-    size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns) override;
+    size_t readRows(size_t from_mark, size_t current_tasl_last_mark,
+                    bool continue_reading, size_t max_rows_to_read, Columns & res_columns) override;
 
     bool canReadIncompleteGranules() const override { return true; }
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
index c305131af21..c679d6eb869 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
@@ -1,6 +1,7 @@
 #include <Storages/MergeTree/MergeTreeReaderStream.h>
 #include <Compression/CachedCompressedReadBuffer.h>
 
+#include <base/getThreadId.h>
 #include <utility>
 
 
@@ -13,21 +14,25 @@ namespace ErrorCodes
     extern const int CANNOT_READ_ALL_DATA;
 }
 
-
 MergeTreeReaderStream::MergeTreeReaderStream(
         DiskPtr disk_,
         const String & path_prefix_, const String & data_file_extension_, size_t marks_count_,
         const MarkRanges & all_mark_ranges,
         const MergeTreeReaderSettings & settings,
         MarkCache * mark_cache_,
-        UncompressedCache * uncompressed_cache, size_t file_size,
+        UncompressedCache * uncompressed_cache, size_t file_size_,
         const MergeTreeIndexGranularityInfo * index_granularity_info_,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type)
-        : disk(std::move(disk_)), path_prefix(path_prefix_), data_file_extension(data_file_extension_), marks_count(marks_count_)
-        , mark_cache(mark_cache_), save_marks_in_cache(settings.save_marks_in_cache)
-        , index_granularity_info(index_granularity_info_)
-        , marks_loader(disk, mark_cache, index_granularity_info->getMarksFilePath(path_prefix),
-            marks_count, *index_granularity_info, save_marks_in_cache)
+    : disk(std::move(disk_))
+    , path_prefix(path_prefix_)
+    , data_file_extension(data_file_extension_)
+    , marks_count(marks_count_)
+    , file_size(file_size_)
+    , mark_cache(mark_cache_)
+    , save_marks_in_cache(settings.save_marks_in_cache)
+    , index_granularity_info(index_granularity_info_)
+    , marks_loader(disk, mark_cache, index_granularity_info->getMarksFilePath(path_prefix),
+        marks_count, *index_granularity_info, save_marks_in_cache)
 {
     /// Compute the size of the buffer.
     size_t max_mark_range_bytes = 0;
@@ -37,35 +42,7 @@ MergeTreeReaderStream::MergeTreeReaderStream(
     {
         size_t left_mark = mark_range.begin;
         size_t right_mark = mark_range.end;
-
-        /// NOTE: if we are reading the whole file, then right_mark == marks_count
-        /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
-
-        /// If the end of range is inside the block, we will need to read it too.
-        if (right_mark < marks_count && marks_loader.getMark(right_mark).offset_in_decompressed_block > 0)
-        {
-            auto indices = collections::range(right_mark, marks_count);
-            auto it = std::upper_bound(indices.begin(), indices.end(), right_mark, [this](size_t i, size_t j)
-            {
-                return marks_loader.getMark(i).offset_in_compressed_file < marks_loader.getMark(j).offset_in_compressed_file;
-            });
-
-            right_mark = (it == indices.end() ? marks_count : *it);
-        }
-
-        size_t mark_range_bytes;
-
-        /// If there are no marks after the end of range, just use file size
-        if (right_mark >= marks_count
-            || (right_mark + 1 == marks_count
-                && marks_loader.getMark(right_mark).offset_in_compressed_file == marks_loader.getMark(mark_range.end).offset_in_compressed_file))
-        {
-            mark_range_bytes = file_size - (left_mark < marks_count ? marks_loader.getMark(left_mark).offset_in_compressed_file : 0);
-        }
-        else
-        {
-            mark_range_bytes = marks_loader.getMark(right_mark).offset_in_compressed_file - marks_loader.getMark(left_mark).offset_in_compressed_file;
-        }
+        auto [right_offset, mark_range_bytes] = getRightOffsetAndBytesRange(left_mark, right_mark);
 
         max_mark_range_bytes = std::max(max_mark_range_bytes, mark_range_bytes);
         sum_mark_range_bytes += mark_range_bytes;
@@ -78,6 +55,7 @@ MergeTreeReaderStream::MergeTreeReaderStream(
     /// Avoid empty buffer. May happen while reading dictionary for DataTypeLowCardinality.
     /// For example: part has single dictionary and all marks point to the same position.
     ReadSettings read_settings = settings.read_settings;
+    read_settings.must_read_until_position = true;
     if (max_mark_range_bytes != 0)
         read_settings = read_settings.adjustBufferSize(max_mark_range_bytes);
 
@@ -128,6 +106,45 @@ MergeTreeReaderStream::MergeTreeReaderStream(
 }
 
 
+std::pair<size_t, size_t> MergeTreeReaderStream::getRightOffsetAndBytesRange(size_t left_mark, size_t right_mark)
+{
+    /// NOTE: if we are reading the whole file, then right_mark == marks_count
+    /// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
+
+    /// If the end of range is inside the block, we will need to read it too.
+    size_t result_right_mark = right_mark;
+    if (right_mark < marks_count && marks_loader.getMark(right_mark).offset_in_decompressed_block > 0)
+    {
+        auto indices = collections::range(right_mark, marks_count);
+        auto it = std::upper_bound(indices.begin(), indices.end(), right_mark, [this](size_t i, size_t j)
+        {
+            return marks_loader.getMark(i).offset_in_compressed_file < marks_loader.getMark(j).offset_in_compressed_file;
+        });
+
+        result_right_mark = (it == indices.end() ? marks_count : *it);
+    }
+
+    size_t right_offset;
+    size_t mark_range_bytes;
+
+    /// If there are no marks after the end of range, just use file size
+    if (result_right_mark >= marks_count
+        || (result_right_mark + 1 == marks_count
+            && marks_loader.getMark(result_right_mark).offset_in_compressed_file == marks_loader.getMark(right_mark).offset_in_compressed_file))
+    {
+        right_offset = file_size;
+        mark_range_bytes = right_offset - (left_mark < marks_count ? marks_loader.getMark(left_mark).offset_in_compressed_file : 0);
+    }
+    else
+    {
+        right_offset = marks_loader.getMark(result_right_mark).offset_in_compressed_file;
+        mark_range_bytes = right_offset - marks_loader.getMark(left_mark).offset_in_compressed_file;
+    }
+
+    return std::make_pair(right_offset, mark_range_bytes);
+}
+
+
 void MergeTreeReaderStream::seekToMark(size_t index)
 {
     MarkInCompressedFile mark = marks_loader.getMark(index);
@@ -172,4 +189,25 @@ void MergeTreeReaderStream::seekToStart()
     }
 }
 
+
+void MergeTreeReaderStream::adjustForRange(MarkRange range)
+{
+    auto [right_offset, mark_range_bytes] = getRightOffsetAndBytesRange(range.begin, range.end);
+    if (!right_offset)
+    {
+        if (cached_buffer)
+            cached_buffer->setReadUntilEnd();
+        if (non_cached_buffer)
+            non_cached_buffer->setReadUntilEnd();
+    }
+    else if (right_offset > last_right_offset)
+    {
+        last_right_offset = right_offset;
+        if (cached_buffer)
+            cached_buffer->setReadUntilPosition(last_right_offset);
+        if (non_cached_buffer)
+            non_cached_buffer->setReadUntilPosition(last_right_offset);
+    }
+}
+
 }
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h
index 32b9c45ccab..601abdc5f1e 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.h
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.h
@@ -23,25 +23,36 @@ public:
         const MarkRanges & all_mark_ranges,
         const MergeTreeReaderSettings & settings_,
         MarkCache * mark_cache, UncompressedCache * uncompressed_cache,
-        size_t file_size, const MergeTreeIndexGranularityInfo * index_granularity_info_,
+        size_t file_size_, const MergeTreeIndexGranularityInfo * index_granularity_info_,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type);
 
     void seekToMark(size_t index);
 
     void seekToStart();
 
+    /**
+     * Does buffer need to know something about mark ranges bounds it is going to read?
+     * (In case of MergeTree* tables). Mostly needed for reading from remote fs.
+     */
+    void adjustForRange(MarkRange range);
+
     ReadBuffer * data_buffer;
 
 private:
+    std::pair<size_t, size_t> getRightOffsetAndBytesRange(size_t left_mark, size_t right_mark);
+
     DiskPtr disk;
     std::string path_prefix;
     std::string data_file_extension;
 
     size_t marks_count;
+    size_t file_size;
 
     MarkCache * mark_cache;
     bool save_marks_in_cache;
 
+    size_t last_right_offset = 0;
+
     const MergeTreeIndexGranularityInfo * index_granularity_info;
 
     std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index 29cc45a5c60..50650ef66e5 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -61,7 +61,8 @@ MergeTreeReaderWide::MergeTreeReaderWide(
 }
 
 
-size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
+size_t MergeTreeReaderWide::readRows(
+    size_t from_mark, size_t current_task_last_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
 {
     size_t read_rows = 0;
     try
@@ -83,7 +84,7 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si
                 try
                 {
                     auto & cache = caches[column_from_part.getNameInStorage()];
-                    prefetch(column_from_part, from_mark, continue_reading, cache, prefetched_streams);
+                    prefetch(column_from_part, from_mark, continue_reading, current_task_last_mark, cache, prefetched_streams);
                 }
                 catch (Exception & e)
                 {
@@ -113,7 +114,7 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si
                 auto & cache = caches[column_from_part.getNameInStorage()];
 
                 readData(
-                    column_from_part, column, from_mark, continue_reading,
+                    column_from_part, column, from_mark, continue_reading, current_task_last_mark,
                     max_rows_to_read, cache, /* was_prefetched =*/ !prefetched_streams.empty());
 
                 /// For elements of Nested, column_size_before_reading may be greater than column size
@@ -195,6 +196,7 @@ static ReadBuffer * getStream(
     MergeTreeReaderWide::FileStreams & streams,
     const NameAndTypePair & name_and_type,
     size_t from_mark, bool seek_to_mark,
+    size_t current_task_last_mark,
     ISerialization::SubstreamsCache & cache)
 {
     /// If substream have already been read.
@@ -208,6 +210,7 @@ static ReadBuffer * getStream(
         return nullptr;
 
     MergeTreeReaderStream & stream = *it->second;
+    stream.adjustForRange(MarkRange(seek_to_start ? 0 : from_mark, current_task_last_mark));
 
     if (seek_to_start)
         stream.seekToStart();
@@ -222,6 +225,7 @@ void MergeTreeReaderWide::prefetch(
     const NameAndTypePair & name_and_type,
     size_t from_mark,
     bool continue_reading,
+    size_t current_task_last_mark,
     ISerialization::SubstreamsCache & cache,
     std::unordered_set<std::string> & prefetched_streams)
 {
@@ -235,7 +239,7 @@ void MergeTreeReaderWide::prefetch(
         if (!prefetched_streams.count(stream_name))
         {
             bool seek_to_mark = !continue_reading;
-            if (ReadBuffer * buf = getStream(false, substream_path, streams, name_and_type, from_mark, seek_to_mark, cache))
+            if (ReadBuffer * buf = getStream(false, substream_path, streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache))
                 buf->prefetch();
 
             prefetched_streams.insert(stream_name);
@@ -246,8 +250,8 @@ void MergeTreeReaderWide::prefetch(
 
 void MergeTreeReaderWide::readData(
     const NameAndTypePair & name_and_type, ColumnPtr & column,
-    size_t from_mark, bool continue_reading, size_t max_rows_to_read,
-    ISerialization::SubstreamsCache & cache, bool was_prefetched)
+    size_t from_mark, bool continue_reading, size_t current_task_last_mark,
+    size_t max_rows_to_read, ISerialization::SubstreamsCache & cache, bool was_prefetched)
 {
     double & avg_value_size_hint = avg_value_size_hints[name_and_type.name];
     ISerialization::DeserializeBinaryBulkSettings deserialize_settings;
@@ -260,7 +264,7 @@ void MergeTreeReaderWide::readData(
     {
         deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path)
         {
-            return getStream(/* seek_to_start = */true, substream_path, streams, name_and_type, from_mark, /* seek_to_mark = */false, cache);
+            return getStream(/* seek_to_start = */true, substream_path, streams, name_and_type, from_mark, /* seek_to_mark = */false, current_task_last_mark, cache);
         };
         serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]);
     }
@@ -271,7 +275,7 @@ void MergeTreeReaderWide::readData(
 
         return getStream(
             /* seek_to_start = */false, substream_path, streams, name_and_type, from_mark,
-            seek_to_mark, cache);
+            seek_to_mark, current_task_last_mark, cache);
     };
     deserialize_settings.continuous_reading = continue_reading;
     auto & deserialize_state = deserialize_binary_bulk_state_map[name];
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h
index 08d743370a9..e27dd85643b 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.h
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.h
@@ -28,7 +28,8 @@ public:
 
     /// Return the number of rows has been read or zero if there is no columns to read.
     /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark
-    size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns) override;
+    size_t readRows(size_t from_mark, size_t current_task_last_mark,
+                    bool continue_reading, size_t max_rows_to_read, Columns & res_columns) override;
 
     bool canReadIncompleteGranules() const override { return true; }
 
@@ -45,7 +46,7 @@ private:
 
     void readData(
         const NameAndTypePair & name_and_type, ColumnPtr & column,
-        size_t from_mark, bool continue_reading, size_t max_rows_to_read,
+        size_t from_mark, bool continue_reading, size_t current_task_last_mark, size_t max_rows_to_read,
         ISerialization::SubstreamsCache & cache, bool was_prefetched);
 
     /// Make next readData more simple by calling 'prefetch' of all related ReadBuffers (column streams).
@@ -53,6 +54,7 @@ private:
         const NameAndTypePair & name_and_type,
         size_t from_mark,
         bool continue_reading,
+        size_t current_task_last_mark,
         ISerialization::SubstreamsCache & cache,
         std::unordered_set<std::string> & prefetched_streams); /// if stream was already prefetched do nothing
 };
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 9ea9383c7f0..df8d6a7c127 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -78,7 +78,8 @@ try
 
         const auto & sample = reader->getColumns();
         Columns columns(sample.size());
-        size_t rows_read = reader->readRows(current_mark, continue_reading, rows_to_read, columns);
+        /// TODO: pass stream size instead of zero?
+        size_t rows_read = reader->readRows(current_mark, 0, continue_reading, rows_to_read, columns);
 
         if (rows_read)
         {
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index a9197b436e9..f4a50f2e553 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -192,43 +192,54 @@ zkutil::ZooKeeperPtr StorageReplicatedMergeTree::getZooKeeper() const
     return res;
 }
 
-static std::string normalizeZooKeeperPath(std::string zookeeper_path)
+static std::string normalizeZooKeeperPath(std::string zookeeper_path, bool check_starts_with_slash, Poco::Logger * log = nullptr)
 {
     if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
         zookeeper_path.resize(zookeeper_path.size() - 1);
     /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
     if (!zookeeper_path.empty() && zookeeper_path.front() != '/')
+    {
+        /// Do not allow this for new tables, print warning for tables created in old versions
+        if (check_starts_with_slash)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "ZooKeeper path must starts with '/', got '{}'", zookeeper_path);
+        if (log)
+            LOG_WARNING(log, "ZooKeeper path ('{}') does not start with '/'. It will not be supported in future releases");
         zookeeper_path = "/" + zookeeper_path;
+    }
 
     return zookeeper_path;
 }
 
 static String extractZooKeeperName(const String & path)
 {
+    static constexpr auto default_zookeeper_name = "default";
     if (path.empty())
-        throw Exception("ZooKeeper path should not be empty", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-    auto pos = path.find(':');
-    if (pos != String::npos)
+        throw Exception("ZooKeeper path should not be empty", ErrorCodes::BAD_ARGUMENTS);
+    if (path[0] == '/')
+        return default_zookeeper_name;
+    auto pos = path.find(":/");
+    if (pos != String::npos && pos < path.find('/'))
     {
         auto zookeeper_name = path.substr(0, pos);
         if (zookeeper_name.empty())
-            throw Exception("Zookeeper path should start with '/' or '<auxiliary_zookeeper_name>:/'", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            throw Exception("Zookeeper path should start with '/' or '<auxiliary_zookeeper_name>:/'", ErrorCodes::BAD_ARGUMENTS);
         return zookeeper_name;
     }
-    static constexpr auto default_zookeeper_name = "default";
     return default_zookeeper_name;
 }
 
-static String extractZooKeeperPath(const String & path)
+static String extractZooKeeperPath(const String & path, bool check_starts_with_slash, Poco::Logger * log = nullptr)
 {
     if (path.empty())
-        throw Exception("ZooKeeper path should not be empty", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
-    auto pos = path.find(':');
-    if (pos != String::npos)
+        throw Exception("ZooKeeper path should not be empty", ErrorCodes::BAD_ARGUMENTS);
+    if (path[0] == '/')
+        return normalizeZooKeeperPath(path, check_starts_with_slash, log);
+    auto pos = path.find(":/");
+    if (pos != String::npos && pos < path.find('/'))
     {
-        return normalizeZooKeeperPath(path.substr(pos + 1, String::npos));
+        return normalizeZooKeeperPath(path.substr(pos + 1, String::npos), check_starts_with_slash, log);
     }
-    return normalizeZooKeeperPath(path);
+    return normalizeZooKeeperPath(path, check_starts_with_slash, log);
 }
 
 static MergeTreePartInfo makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(const String & partition_id)
@@ -275,7 +286,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
                     attach,
                     [this] (const std::string & name) { enqueuePartForCheck(name); })
     , zookeeper_name(extractZooKeeperName(zookeeper_path_))
-    , zookeeper_path(extractZooKeeperPath(zookeeper_path_))
+    , zookeeper_path(extractZooKeeperPath(zookeeper_path_, /* check_starts_with_slash */ !attach, log))
     , replica_name(replica_name_)
     , replica_path(fs::path(zookeeper_path) / "replicas" / replica_name_)
     , reader(*this)
@@ -5556,7 +5567,7 @@ void StorageReplicatedMergeTree::fetchPartition(
     info.table_id.uuid = UUIDHelpers::Nil;
     auto expand_from = query_context->getMacros()->expand(from_, info);
     String auxiliary_zookeeper_name = extractZooKeeperName(expand_from);
-    String from = extractZooKeeperPath(expand_from);
+    String from = extractZooKeeperPath(expand_from, /* check_starts_with_slash */ true);
     if (from.empty())
         throw Exception("ZooKeeper path should not be empty", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
@@ -6621,7 +6632,7 @@ void StorageReplicatedMergeTree::movePartitionToShard(
     if (!move_part)
         throw Exception("MOVE PARTITION TO SHARD is not supported, use MOVE PART instead", ErrorCodes::NOT_IMPLEMENTED);
 
-    if (normalizeZooKeeperPath(zookeeper_path) == normalizeZooKeeperPath(to))
+    if (normalizeZooKeeperPath(zookeeper_path, /* check_starts_with_slash */ true) == normalizeZooKeeperPath(to, /* check_starts_with_slash */ true))
         throw Exception("Source and destination are the same", ErrorCodes::BAD_ARGUMENTS);
 
     auto zookeeper = getZooKeeper();
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index aa036e9b955..5d8645b677d 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -234,7 +234,7 @@ bool StorageS3Source::initialize()
     file_path = fs::path(bucket) / current_key;
 
     read_buf = wrapReadBufferWithCompressionMethod(
-        std::make_unique<ReadBufferFromS3>(client, bucket, current_key, max_single_read_retries, DBMS_DEFAULT_BUFFER_SIZE),
+        std::make_unique<ReadBufferFromS3>(client, bucket, current_key, max_single_read_retries, getContext()->getReadSettings()),
         chooseCompressionMethod(current_key, compression_hint));
     auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings);
     QueryPipelineBuilder builder;
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 174ee58ee42..4d8f1d8c492 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -124,6 +124,7 @@ namespace
                                 context->getSettingsRef().max_http_get_redirects,
                                 Poco::Net::HTTPBasicCredentials{},
                                 DBMS_DEFAULT_BUFFER_SIZE,
+                                context->getReadSettings(),
                                 headers,
                                 context->getRemoteHostFilter()),
                             chooseCompressionMethod(request_uri.getPath(), compression_method));
diff --git a/tests/integration/test_merge_tree_s3/configs/config.d/async_read.xml b/tests/integration/test_merge_tree_s3/configs/config.d/async_read.xml
new file mode 100644
index 00000000000..4449d83779a
--- /dev/null
+++ b/tests/integration/test_merge_tree_s3/configs/config.d/async_read.xml
@@ -0,0 +1,7 @@
+<yandex>
+    <profiles>
+        <default>
+            <remote_filesystem_read_method>read_threadpool</remote_filesystem_read_method>
+        </default>
+    </profiles>
+</yandex>
diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index c66b44c5639..e99adf01ec5 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -50,6 +50,11 @@ def cluster():
                              main_configs=["configs/config.d/storage_conf.xml",
                                            "configs/config.d/bg_processing_pool_conf.xml"],
                              with_minio=True)
+        cluster.add_instance("node_async_read",
+                             main_configs=["configs/config.d/storage_conf.xml",
+                                           "configs/config.d/bg_processing_pool_conf.xml"],
+                             user_configs=["configs/config.d/async_read.xml"],
+                             with_minio=True)
         logging.info("Starting cluster...")
         cluster.start()
         logging.info("Cluster started")
@@ -77,8 +82,7 @@ def generate_values(date_str, count, sign=1):
     return ",".join(["('{}',{},'{}')".format(x, y, z) for x, y, z in data])
 
 
-def create_table(cluster, table_name, **additional_settings):
-    node = cluster.instances["node"]
+def create_table(node, table_name, **additional_settings):
     settings = {
         "storage_policy": "s3",
         "old_parts_lifetime": 0,
@@ -97,6 +101,7 @@ def create_table(cluster, table_name, **additional_settings):
         ORDER BY (dt, id)
         SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}"""
 
+    node.query(f"DROP TABLE IF EXISTS {table_name}")
     node.query(create_table_statement)
 
 
@@ -140,9 +145,10 @@ def wait_for_delete_s3_objects(cluster, expected, timeout=30):
 
 
 @pytest.fixture(autouse=True)
-def drop_table(cluster):
+@pytest.mark.parametrize("node_name", ["node", "node_async_read"])
+def drop_table(cluster, node_name):
     yield
-    node = cluster.instances["node"]
+    node = cluster.instances[node_name]
     minio = cluster.minio_client
 
     node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
@@ -156,16 +162,17 @@ def drop_table(cluster):
 
 
 @pytest.mark.parametrize(
-    "min_rows_for_wide_part,files_per_part",
+    "min_rows_for_wide_part,files_per_part,node_name",
     [
-        (0, FILES_OVERHEAD_PER_PART_WIDE),
-        (8192, FILES_OVERHEAD_PER_PART_COMPACT)
+        (0, FILES_OVERHEAD_PER_PART_WIDE, "node"),
+        (8192, FILES_OVERHEAD_PER_PART_COMPACT, "node"),
+        (0, FILES_OVERHEAD_PER_PART_WIDE, "node_async_read"),
+        (8192, FILES_OVERHEAD_PER_PART_COMPACT, "node_async_read")
     ]
 )
-def test_simple_insert_select(cluster, min_rows_for_wide_part, files_per_part):
-    create_table(cluster, "s3_test", min_rows_for_wide_part=min_rows_for_wide_part)
-
-    node = cluster.instances["node"]
+def test_simple_insert_select(cluster, min_rows_for_wide_part, files_per_part, node_name):
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test", min_rows_for_wide_part=min_rows_for_wide_part)
     minio = cluster.minio_client
 
     values1 = generate_values('2020-01-03', 4096)
@@ -182,17 +189,20 @@ def test_simple_insert_select(cluster, min_rows_for_wide_part, files_per_part):
 
 
 @pytest.mark.parametrize(
-    "merge_vertical", [False, True]
-)
-def test_insert_same_partition_and_merge(cluster, merge_vertical):
+    "merge_vertical,node_name", [
+        (True, "node"),
+        (False, "node"),
+        (True, "node_async_read"),
+        (False, "node_async_read")
+])
+def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name):
     settings = {}
     if merge_vertical:
         settings['vertical_merge_algorithm_min_rows_to_activate'] = 0
         settings['vertical_merge_algorithm_min_columns_to_activate'] = 0
 
-    create_table(cluster, "s3_test", **settings)
-
-    node = cluster.instances["node"]
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test", **settings)
     minio = cluster.minio_client
 
     node.query("SYSTEM STOP MERGES s3_test")
@@ -225,10 +235,10 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical):
     wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD)
 
 
-def test_alter_table_columns(cluster):
-    create_table(cluster, "s3_test")
-
-    node = cluster.instances["node"]
+@pytest.mark.parametrize("node_name", ["node", "node_async_read"])
+def test_alter_table_columns(cluster, node_name):
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test")
     minio = cluster.minio_client
 
     node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
@@ -254,10 +264,10 @@ def test_alter_table_columns(cluster):
     wait_for_delete_s3_objects(cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + 2)
 
 
-def test_attach_detach_partition(cluster):
-    create_table(cluster, "s3_test")
-
-    node = cluster.instances["node"]
+@pytest.mark.parametrize("node_name", ["node", "node_async_read"])
+def test_attach_detach_partition(cluster, node_name):
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test")
     minio = cluster.minio_client
 
     node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
@@ -286,10 +296,10 @@ def test_attach_detach_partition(cluster):
     assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD
 
 
-def test_move_partition_to_another_disk(cluster):
-    create_table(cluster, "s3_test")
-
-    node = cluster.instances["node"]
+@pytest.mark.parametrize("node_name", ["node", "node_async_read"])
+def test_move_partition_to_another_disk(cluster, node_name):
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test")
     minio = cluster.minio_client
 
     node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
@@ -308,10 +318,10 @@ def test_move_partition_to_another_disk(cluster):
         list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
 
 
-def test_table_manipulations(cluster):
-    create_table(cluster, "s3_test")
-
-    node = cluster.instances["node"]
+@pytest.mark.parametrize("node_name", ["node"])
+def test_table_manipulations(cluster, node_name):
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test")
     minio = cluster.minio_client
 
     node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
@@ -336,10 +346,10 @@ def test_table_manipulations(cluster):
     assert len(list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD
 
 
-def test_move_replace_partition_to_another_table(cluster):
-    create_table(cluster, "s3_test")
-
-    node = cluster.instances["node"]
+@pytest.mark.parametrize("node_name", ["node", "node_async_read"])
+def test_move_replace_partition_to_another_table(cluster, node_name):
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test")
     minio = cluster.minio_client
 
     node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
@@ -351,7 +361,7 @@ def test_move_replace_partition_to_another_table(cluster):
     assert len(
         list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4
 
-    create_table(cluster, "s3_clone")
+    create_table(node, "s3_clone")
 
     node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-03' TO TABLE s3_clone")
     node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-05' TO TABLE s3_clone")
@@ -402,10 +412,10 @@ def test_move_replace_partition_to_another_table(cluster):
         minio.remove_object(cluster.minio_bucket, obj.object_name)
 
 
-def test_freeze_unfreeze(cluster):
-    create_table(cluster, "s3_test")
-
-    node = cluster.instances["node"]
+@pytest.mark.parametrize("node_name", ["node"])
+def test_freeze_unfreeze(cluster, node_name):
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test")
     minio = cluster.minio_client
 
     node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-03', 4096)))
@@ -427,9 +437,10 @@ def test_freeze_unfreeze(cluster):
         list(minio.list_objects(cluster.minio_bucket, 'data/'))) == FILES_OVERHEAD
 
 
-def test_s3_disk_apply_new_settings(cluster):
-    create_table(cluster, "s3_test")
-    node = cluster.instances["node"]
+@pytest.mark.parametrize("node_name", ["node"])
+def test_s3_disk_apply_new_settings(cluster, node_name):
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test")
 
     def get_s3_requests():
         node.query("SYSTEM FLUSH LOGS")
@@ -452,10 +463,10 @@ def test_s3_disk_apply_new_settings(cluster):
     assert get_s3_requests() - s3_requests_before == s3_requests_to_write_partition * 3
 
 
-def test_s3_disk_restart_during_load(cluster):
-    create_table(cluster, "s3_test")
-
-    node = cluster.instances["node"]
+@pytest.mark.parametrize("node_name", ["node"])
+def test_s3_disk_restart_during_load(cluster, node_name):
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test")
 
     node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-04', 1024 * 1024)))
     node.query("INSERT INTO s3_test VALUES {}".format(generate_values('2020-01-05', 1024 * 1024, -1)))
@@ -487,11 +498,11 @@ def test_s3_disk_restart_during_load(cluster):
         thread.join()
 
 
-def test_s3_disk_reads_on_unstable_connection(cluster):
-    create_table(cluster, "s3_test", storage_policy='unstable_s3')
-    node = cluster.instances["node"]
+@pytest.mark.parametrize("node_name", ["node", "node_async_read"])
+def test_s3_disk_reads_on_unstable_connection(cluster, node_name):
+    node = cluster.instances[node_name]
+    create_table(node, "s3_test", storage_policy='unstable_s3')
     node.query("INSERT INTO s3_test SELECT today(), *, toString(*) FROM system.numbers LIMIT 9000000")
     for i in range(30):
         print(f"Read sequence {i}")
         assert node.query("SELECT sum(id) FROM s3_test").splitlines() == ["40499995500000"]
-
diff --git a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py
index a9dcce1b9d4..4644790ff94 100644
--- a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py
+++ b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py
@@ -101,3 +101,13 @@ def test_drop_replicated_merge_tree_with_auxiliary_zookeeper(started_cluster):
     assert zk.exists('/clickhouse/tables/test/test_auxiliary_zookeeper')
     drop_table([node1, node2], "test_auxiliary_zookeeper")
     assert zk.exists('/clickhouse/tables/test/test_auxiliary_zookeeper') is None
+
+def test_path_ambiguity(started_cluster):
+    drop_table([node1, node2], "test_path_ambiguity1")
+    drop_table([node1, node2], "test_path_ambiguity2")
+    node1.query("create table test_path_ambiguity1 (n int) engine=ReplicatedMergeTree('/test:bad:/path', '1') order by n")
+    assert "Invalid auxiliary ZooKeeper name" in node1.query_and_get_error("create table test_path_ambiguity2 (n int) engine=ReplicatedMergeTree('test:bad:/path', '1') order by n")
+    assert "ZooKeeper path must starts with '/'" in node1.query_and_get_error("create table test_path_ambiguity2 (n int) engine=ReplicatedMergeTree('test/bad:/path', '1') order by n")
+    node1.query("create table test_path_ambiguity2 (n int) engine=ReplicatedMergeTree('zookeeper2:/bad:/path', '1') order by n")
+    drop_table([node1, node2], "test_path_ambiguity1")
+    drop_table([node1, node2], "test_path_ambiguity2")
diff --git a/website/blog/en/2021/clickhouse-raises-250m-series-b.md b/website/blog/en/2021/clickhouse-raises-250m-series-b.md
index a443ad61cb7..b476371013e 100644
--- a/website/blog/en/2021/clickhouse-raises-250m-series-b.md
+++ b/website/blog/en/2021/clickhouse-raises-250m-series-b.md
@@ -12,4 +12,4 @@ For the past five years, ClickHouse thrived as a popular open-source product—
 
 Fueled by this large investment, we are hiring. We plan to double our team this year, and double it again next year. We are calling on all visionaries, builders, and contributors to join us as we build ClickHouse into a transformative, paradigm-shifting company. Our mission is to become the first choice of analytical database management systems. We are relentlessly focused on building an enterprise-grade, highly secure, and fault tolerant, multi-tenant service in the cloud that is accessible to organizations across sizes and sectors. 
 
-If this sounds like something you want to be a part of, now’s the time to join us. To learn more about the positions we are hiring for, and what we are looking for in new hires, visit [www.clickhouse.com/careers](/careers/)
+If this sounds like something you want to be a part of, now’s the time to join us. To learn more about the positions we are hiring for, and what we are looking for in new hires, visit [clickhouse.com/careers](/careers/).
diff --git a/website/templates/company/investors.html b/website/templates/company/investors.html
index cf7b3631060..fbe15ccd8e9 100644
--- a/website/templates/company/investors.html
+++ b/website/templates/company/investors.html
@@ -55,10 +55,10 @@
 </div>
 
 <div class="d-flex flex-wrap align-items-center justify-content-center mb-n5">
+	<img src="/images/logos/logo-yandex.png" height="36" width="220" class="logo mb-5 mx-3" style="max-width: 132px;">
 	<img src="/images/logos/logo-lightspeed.png" height="36" width="220" class="logo mb-5 mx-3" style="max-width: 178px;">
 	<img src="/images/logos/logo-redpoint.png" height="36" width="220" class="logo mb-5 mx-3" style="max-width: 207px;">
 	<img src="/images/logos/logo-almaz-capital.svg" height="48" width="220" class="logo mb-5 mx-3" style="height: 48px; max-width: 273px;">
-	<img src="/images/logos/logo-yandex.png" height="36" width="220" class="logo mb-5 mx-3" style="max-width: 132px;">
 	<img src="/images/logos/logo-firstmark.svg" height="36" width="220" class="logo mb-5 mx-3" style="max-width: 264px;">
 	<img src="/images/logos/logo-leadedge-capital.png" height="48" width="220" class="logo mb-5 mx-3" style="height: 48px; max-width: 192px;">
 </div>
diff --git a/website/templates/index/hero.html b/website/templates/index/hero.html
index 35799d3b8d7..b01e79703df 100644
--- a/website/templates/index/hero.html
+++ b/website/templates/index/hero.html
@@ -34,7 +34,7 @@
 
                 <div class="btns" role="group">
                     <a href="/blog/en/2021/clickhouse-raises-250m-series-b/" class="btn btn-secondary" role="button" style="width: 228px;">Read the Blog Post</a>
-                    <a href="https://www.bloomberg.com/news/articles/2021-10-28/clickhouse-valued-at-2-billion-in-round-after-yandex-spinout" class="btn btn-secondary" role="button" rel="external nofollow" target="_blank" style="width: 228px;">Read the News</a>
+                    <a href="https://www.bloomberg.com/news/articles/2021-10-28/clickhouse-valued-at-2-billion-in-round-after-yandex-spinout" class="btn btn-outline-secondary" role="button" rel="external nofollow" target="_blank" style="width: 228px;">Read the News</a>
 		    <a href="https://www.businesswire.com/news/home/20211028005287/en" class="btn btn-outline-secondary" role="button" rel="external nofollow" target="_blank" style="width: 228px;">Read the Press Release</a>
                 </div>