diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 4b201802cae..db170c3e28f 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -2,7 +2,7 @@
 A technical comment, you are free to remove or leave it as it is when PR is created
 The following categories are used in the next scripts, update them accordingly
 utils/changelog/changelog.py
-tests/ci/run_check.py
+tests/ci/cancel_and_rerun_workflow_lambda/app.py
 -->
 ### Changelog category (leave one):
 - New Feature
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 782cfad43f8..506ed451b6d 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -1308,6 +1308,40 @@ jobs:
           docker ps --quiet | xargs --no-run-if-empty docker kill ||:
           docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
           sudo rm -fr "$TEMP_PATH"
+  FunctionalStatelessTestReleaseAnalyzer:
+    needs: [BuilderDebRelease]
+    runs-on: [self-hosted, func-tester]
+    steps:
+      - name: Set envs
+        run: |
+          cat >> "$GITHUB_ENV" << 'EOF'
+          TEMP_PATH=${{runner.temp}}/stateless_analyzer
+          REPORTS_PATH=${{runner.temp}}/reports_dir
+          CHECK_NAME=Stateless tests (release, analyzer)
+          REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse
+          KILL_TIMEOUT=10800
+          EOF
+      - name: Download json reports
+        uses: actions/download-artifact@v3
+        with:
+          path: ${{ env.REPORTS_PATH }}
+      - name: Check out repository code
+        uses: ClickHouse/checkout@v1
+        with:
+          clear-repository: true
+      - name: Functional test
+        run: |
+          sudo rm -fr "$TEMP_PATH"
+          mkdir -p "$TEMP_PATH"
+          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
+          cd "$REPO_COPY/tests/ci"
+          python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
+      - name: Cleanup
+        if: always()
+        run: |
+          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
+          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
+          sudo rm -fr "$TEMP_PATH"
   FunctionalStatelessTestReleaseS3_0:
     needs: [BuilderDebRelease]
     runs-on: [self-hosted, func-tester]
@@ -4755,6 +4789,7 @@ jobs:
       - FunctionalStatelessTestReleaseDatabaseReplicated2
       - FunctionalStatelessTestReleaseDatabaseReplicated3
       - FunctionalStatelessTestReleaseWideParts
+      - FunctionalStatelessTestReleaseAnalyzer
       - FunctionalStatelessTestAarch64
       - FunctionalStatelessTestAsan0
       - FunctionalStatelessTestAsan1
diff --git a/.gitmodules b/.gitmodules
index ca55281e643..ed61ddb96ba 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -253,9 +253,6 @@
 [submodule "contrib/qpl"]
 	path = contrib/qpl
 	url = https://github.com/intel/qpl
-[submodule "contrib/idxd-config"]
-	path = contrib/idxd-config
-	url = https://github.com/intel/idxd-config
 [submodule "contrib/wyhash"]
 	path = contrib/wyhash
 	url = https://github.com/wangyi-fudan/wyhash
@@ -296,6 +293,9 @@
 [submodule "contrib/libdivide"]
 	path = contrib/libdivide
 	url = https://github.com/ridiculousfish/libdivide
+[submodule "contrib/libbcrypt"]
+	path = contrib/libbcrypt
+	url = https://github.com/rg3/libbcrypt.git
 [submodule "contrib/ulid-c"]
 	path = contrib/ulid-c
 	url = https://github.com/ClickHouse/ulid-c.git
@@ -335,3 +335,6 @@
 [submodule "contrib/liburing"]
 	path = contrib/liburing
 	url = https://github.com/axboe/liburing
+[submodule "contrib/isa-l"]
+	path = contrib/isa-l
+	url = https://github.com/ClickHouse/isa-l.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 47320208f02..1ccd4f9846d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
 ### Table of Contents
+**[ClickHouse release v23.4, 2023-04-26](#234)**<br/>
 **[ClickHouse release v23.3 LTS, 2023-03-30](#233)**<br/>
 **[ClickHouse release v23.2, 2023-02-23](#232)**<br/>
 **[ClickHouse release v23.1, 2023-01-25](#231)**<br/>
@@ -6,6 +7,153 @@
 
 # 2023 Changelog
 
+### <a id="234"></a> ClickHouse release 23.4, 2023-04-26
+
+#### Backward Incompatible Change
+* Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)).
+* This change makes sense only if you are using the virtual filesystem cache. If `path` in the virtual filesystem cache configuration is not empty and is not an absolute path, then it will be put in `<clickhouse server data directory>/caches/<path_from_cache_config>`. [#48784](https://github.com/ClickHouse/ClickHouse/pull/48784) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Primary/secondary indices and sorting keys with identical expressions are now rejected. This behavior can be disabled using setting `allow_suspicious_indices`. [#48536](https://github.com/ClickHouse/ClickHouse/pull/48536) ([凌涛](https://github.com/lingtaolf)).
+
+#### New Feature
+* Support new aggregate function `quantileGK`/`quantilesGK`, like [approx_percentile](https://spark.apache.org/docs/latest/api/sql/index.html#approx_percentile) in spark. Greenwald-Khanna algorithm refer to http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf. [#46428](https://github.com/ClickHouse/ClickHouse/pull/46428) ([李扬](https://github.com/taiyang-li)).
+* Add a statement `SHOW COLUMNS` which shows distilled information from system.columns. [#48017](https://github.com/ClickHouse/ClickHouse/pull/48017) ([Robert Schulze](https://github.com/rschu1ze)).
+* Added `LIGHTWEIGHT` and `PULL` modifiers for `SYSTEM SYNC REPLICA` query. `LIGHTWEIGHT` version waits for fetches and drop-ranges only (merges and mutations are ignored). `PULL` version pulls new entries from ZooKeeper and does not wait for them. Fixes [#47794](https://github.com/ClickHouse/ClickHouse/issues/47794). [#48085](https://github.com/ClickHouse/ClickHouse/pull/48085) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Add `kafkaMurmurHash` function for compatibility with Kafka DefaultPartitioner. Closes [#47834](https://github.com/ClickHouse/ClickHouse/issues/47834). [#48185](https://github.com/ClickHouse/ClickHouse/pull/48185) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Allow to easily create a user with the same grants as the current user by using `GRANT CURRENT GRANTS`. [#48262](https://github.com/ClickHouse/ClickHouse/pull/48262) ([pufit](https://github.com/pufit)).
+* Add statistical aggregate function `kolmogorovSmirnovTest`. Close [#48228](https://github.com/ClickHouse/ClickHouse/issues/48228). [#48325](https://github.com/ClickHouse/ClickHouse/pull/48325) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
+* Added a `lost_part_count` column to the `system.replicas` table. The column value shows the total number of lost parts in the corresponding table. Value is stored in zookeeper and can be used instead of not persistent `ReplicatedDataLoss` profile event for monitoring. [#48526](https://github.com/ClickHouse/ClickHouse/pull/48526) ([Sergei Trifonov](https://github.com/serxa)).
+* Add `soundex` function for compatibility. Closes [#39880](https://github.com/ClickHouse/ClickHouse/issues/39880). [#48567](https://github.com/ClickHouse/ClickHouse/pull/48567) ([FriendLey](https://github.com/FriendLey)).
+* Support `Map` type for JSONExtract. [#48629](https://github.com/ClickHouse/ClickHouse/pull/48629) ([李扬](https://github.com/taiyang-li)).
+* Add `PrettyJSONEachRow` format to output pretty JSON with new line delimiters and 4 space indents. [#48898](https://github.com/ClickHouse/ClickHouse/pull/48898) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add `ParquetMetadata` input format to read Parquet file metadata. [#48911](https://github.com/ClickHouse/ClickHouse/pull/48911) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add `extractKeyValuePairs` function to extract key value pairs from strings. Input strings might contain noise (i.e. log files / do not need to be 100% formatted in key-value-pair format), the algorithm will look for key value pairs matching the arguments passed to the function. As of now, function accepts the following arguments: `data_column` (mandatory), `key_value_pair_delimiter` (defaults to `:`), `pair_delimiters` (defaults to `\space \, \;`) and `quoting_character` (defaults to double quotes). [#43606](https://github.com/ClickHouse/ClickHouse/pull/43606) ([Arthur Passos](https://github.com/arthurpassos)).
+* Functions replaceOne(), replaceAll(), replaceRegexpOne() and replaceRegexpAll() can now be called with non-const pattern and replacement arguments. [#46589](https://github.com/ClickHouse/ClickHouse/pull/46589) ([Robert Schulze](https://github.com/rschu1ze)).
+* Added functions to work with columns of type `Map`: `mapConcat`, `mapSort`, `mapExists`. [#48071](https://github.com/ClickHouse/ClickHouse/pull/48071) ([Anton Popov](https://github.com/CurtizJ)).
+
+#### Performance Improvement
+* Reading files in `Parquet` format is now much faster. IO and decoding are parallelized (controlled by `max_threads` setting), and only required data ranges are read. [#47964](https://github.com/ClickHouse/ClickHouse/pull/47964) ([Michael Kolupaev](https://github.com/al13n321)).
+* If we run a mutation with IN (subquery) like this: `ALTER TABLE t UPDATE col='new value' WHERE id IN (SELECT id FROM huge_table)` and the table `t` has multiple parts than for each part a set for subquery `SELECT id FROM huge_table` is built in memory. And if there are many parts then this might consume a lot of memory (and lead to an OOM) and CPU. The solution is to introduce a short-lived cache of sets that are currently being built by mutation tasks. If another task of the same mutation is executed concurrently it can look up the set in the cache, wait for it to be built and reuse it. [#46835](https://github.com/ClickHouse/ClickHouse/pull/46835) ([Alexander Gololobov](https://github.com/davenger)).
+* Only check dependencies if necessary when applying `ALTER TABLE` queries. [#48062](https://github.com/ClickHouse/ClickHouse/pull/48062) ([Raúl Marín](https://github.com/Algunenano)).
+* Optimize function `mapUpdate`. [#48118](https://github.com/ClickHouse/ClickHouse/pull/48118) ([Anton Popov](https://github.com/CurtizJ)).
+* Now an internal query to local replica is sent explicitly and data from it received through loopback interface. Setting `prefer_localhost_replica` is not respected for parallel replicas. This is needed for better scheduling and makes the code cleaner: the initiator is only responsible for coordinating of the reading process and merging results, continuously answering for requests while all the secondary queries read the data. Note: Using loopback interface is not so performant, otherwise some replicas could starve for tasks which could lead to even slower query execution and not utilizing all possible resources. The initialization of the coordinator is now even more lazy. All incoming requests contain the information about the reading algorithm we initialize the coordinator with it when first request comes. If any replica decides to read with a different algorithm–an exception will be thrown and a query will be aborted. [#48246](https://github.com/ClickHouse/ClickHouse/pull/48246) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Do not build set for the right side of `IN` clause with subquery when it is used only for analysis of skip indexes, and they are disabled by setting (`use_skip_indexes=0`). Previously it might affect the performance of queries. [#48299](https://github.com/ClickHouse/ClickHouse/pull/48299) ([Anton Popov](https://github.com/CurtizJ)).
+* Query processing is parallelized right after reading `FROM file(...)`. Related to [#38755](https://github.com/ClickHouse/ClickHouse/issues/38755). [#48525](https://github.com/ClickHouse/ClickHouse/pull/48525) ([Igor Nikonov](https://github.com/devcrafter)). Query processing is parallelized right after reading from any data source. Affected data sources are mostly simple or external storages like table functions `url`, `file`. [#48727](https://github.com/ClickHouse/ClickHouse/pull/48727) ([Igor Nikonov](https://github.com/devcrafter)). This is controlled by the setting `parallelize_output_from_storages` which is not enabled by default.
+* Lowered contention of ThreadPool mutex (may increase performance for a huge amount of small jobs). [#48750](https://github.com/ClickHouse/ClickHouse/pull/48750) ([Sergei Trifonov](https://github.com/serxa)).
+* Reduce memory usage for multiple `ALTER DELETE` mutations. [#48522](https://github.com/ClickHouse/ClickHouse/pull/48522) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Remove the excessive connection attempts if the `skip_unavailable_shards` setting is enabled. [#48771](https://github.com/ClickHouse/ClickHouse/pull/48771) ([Azat Khuzhin](https://github.com/azat)).
+
+#### Experimental Feature
+* Entries in the query cache are now squashed to max_block_size and compressed. [#45912](https://github.com/ClickHouse/ClickHouse/pull/45912) ([Robert Schulze](https://github.com/rschu1ze)).
+* It is now possible to define per-user quotas in the query cache. [#48284](https://github.com/ClickHouse/ClickHouse/pull/48284) ([Robert Schulze](https://github.com/rschu1ze)).
+* Some fixes for parallel replicas [#48433](https://github.com/ClickHouse/ClickHouse/pull/48433) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Implement zero-copy-replication (an experimental feature) on encrypted disks. [#48741](https://github.com/ClickHouse/ClickHouse/pull/48741) ([Vitaly Baranov](https://github.com/vitlibar)).
+
+#### Improvement
+* Increase default value for `connect_timeout_with_failover_ms` to 1000 ms (because of adding async connections in https://github.com/ClickHouse/ClickHouse/pull/47229) . Closes [#5188](https://github.com/ClickHouse/ClickHouse/issues/5188). [#49009](https://github.com/ClickHouse/ClickHouse/pull/49009) ([Kruglov Pavel](https://github.com/Avogar)).
+* Several improvements around data lakes: - Make `Iceberg` work with non-partitioned data. - Support `Iceberg` format version v2 (previously only v1 was supported) - Support reading partitioned data for `DeltaLake`/`Hudi` - Faster reading of `DeltaLake` metadata by using Delta's checkpoint files - Fixed incorrect `Hudi` reads: previously it incorrectly chose which data to read and therefore was able to read correctly only small size tables - Made these engines to pickup updates of changed data (previously the state was set on table creation) - Make proper testing for `Iceberg`/`DeltaLake`/`Hudi` using spark. [#47307](https://github.com/ClickHouse/ClickHouse/pull/47307) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Add async connection to socket and async writing to socket. Make creating connections and sending query/external tables async across shards. Refactor code with fibers. Closes [#46931](https://github.com/ClickHouse/ClickHouse/issues/46931). We will be able to increase `connect_timeout_with_failover_ms` by default after this PR (https://github.com/ClickHouse/ClickHouse/issues/5188). [#47229](https://github.com/ClickHouse/ClickHouse/pull/47229) ([Kruglov Pavel](https://github.com/Avogar)).
+* Support config sections `keeper`/`keeper_server` as an alternative to `zookeeper`. Close [#34766](https://github.com/ClickHouse/ClickHouse/issues/34766) , [#34767](https://github.com/ClickHouse/ClickHouse/issues/34767). [#35113](https://github.com/ClickHouse/ClickHouse/pull/35113) ([李扬](https://github.com/taiyang-li)).
+* It is possible to set _secure_ flag in named_collections for a dictionary with a ClickHouse table source. Addresses [#38450](https://github.com/ClickHouse/ClickHouse/issues/38450) . [#46323](https://github.com/ClickHouse/ClickHouse/pull/46323) ([Ilya Golshtein](https://github.com/ilejn)).
+* `bitCount` function support `FixedString` and `String` data type. [#49044](https://github.com/ClickHouse/ClickHouse/pull/49044) ([flynn](https://github.com/ucasfl)).
+* Added configurable retries for all operations with [Zoo]Keeper for Backup queries. [#47224](https://github.com/ClickHouse/ClickHouse/pull/47224) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Enable `use_environment_credentials` for S3 by default, so the entire provider chain is constructed by default. [#47397](https://github.com/ClickHouse/ClickHouse/pull/47397) ([Antonio Andelic](https://github.com/antonio2368)).
+* Currently, the JSON_VALUE function is similar as spark's get_json_object function, which support to get value from JSON string by a path like '$.key'. But still has something different - 1. in spark's get_json_object will return null while the path is not exist, but in JSON_VALUE will return empty string; - 2. in spark's get_json_object will return a complex type value, such as a JSON object/array value, but in JSON_VALUE will return empty string. [#47494](https://github.com/ClickHouse/ClickHouse/pull/47494) ([KevinyhZou](https://github.com/KevinyhZou)).
+* For `use_structure_from_insertion_table_in_table_functions` more flexible insert table structure propagation to table function. Fixed an issue with name mapping and using virtual columns. No more need for 'auto' setting. [#47962](https://github.com/ClickHouse/ClickHouse/pull/47962) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Do not continue retrying to connect to Keeper if the query is killed or over limits. [#47985](https://github.com/ClickHouse/ClickHouse/pull/47985) ([Raúl Marín](https://github.com/Algunenano)).
+* Support Enum output/input in `BSONEachRow`, allow all map key types and avoid extra calculations on output. [#48122](https://github.com/ClickHouse/ClickHouse/pull/48122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Support more ClickHouse types in `ORC`/`Arrow`/`Parquet` formats: Enum(8|16), (U)Int(128|256), Decimal256 (for ORC), allow reading IPv4 from Int32 values (ORC outputs IPv4 as Int32, and we couldn't read it back), fix reading Nullable(IPv6) from binary data for `ORC`. [#48126](https://github.com/ClickHouse/ClickHouse/pull/48126) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add columns `perform_ttl_move_on_insert`, `load_balancing` for table `system.storage_policies`, modify column `volume_type` type to `Enum8`. [#48167](https://github.com/ClickHouse/ClickHouse/pull/48167) ([lizhuoyu5](https://github.com/lzydmxy)).
+* Added support for `BACKUP ALL` command which backups all tables and databases, including temporary and system ones. [#48189](https://github.com/ClickHouse/ClickHouse/pull/48189) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Function mapFromArrays supports `Map` type as an input. [#48207](https://github.com/ClickHouse/ClickHouse/pull/48207) ([李扬](https://github.com/taiyang-li)).
+* The output of some SHOW PROCESSLIST is now sorted. [#48241](https://github.com/ClickHouse/ClickHouse/pull/48241) ([Robert Schulze](https://github.com/rschu1ze)).
+* Per-query/per-server throttling for remote IO/local IO/BACKUPs (server settings: `max_remote_read_network_bandwidth_for_server`, `max_remote_write_network_bandwidth_for_server`, `max_local_read_bandwidth_for_server`, `max_local_write_bandwidth_for_server`, `max_backup_bandwidth_for_server`, settings: `max_remote_read_network_bandwidth`, `max_remote_write_network_bandwidth`, `max_local_read_bandwidth`, `max_local_write_bandwidth`, `max_backup_bandwidth`). [#48242](https://github.com/ClickHouse/ClickHouse/pull/48242) ([Azat Khuzhin](https://github.com/azat)).
+* Support more types in `CapnProto` format: Map, (U)Int(128|256), Decimal(128|256). Allow integer conversions during input/output. [#48257](https://github.com/ClickHouse/ClickHouse/pull/48257) ([Kruglov Pavel](https://github.com/Avogar)).
+* Don't throw CURRENT_WRITE_BUFFER_IS_EXHAUSTED for normal behaviour. [#48288](https://github.com/ClickHouse/ClickHouse/pull/48288) ([Raúl Marín](https://github.com/Algunenano)).
+* Add new setting `keeper_map_strict_mode` which enforces extra guarantees on operations made on top of `KeeperMap` tables. [#48293](https://github.com/ClickHouse/ClickHouse/pull/48293) ([Antonio Andelic](https://github.com/antonio2368)).
+* Check primary key type for simple dictionary is native unsigned integer type Add setting `check_dictionary_primary_key ` for compatibility(set `check_dictionary_primary_key =false` to disable checking). [#48335](https://github.com/ClickHouse/ClickHouse/pull/48335) ([lizhuoyu5](https://github.com/lzydmxy)).
+* Don't replicate mutations for `KeeperMap` because it's unnecessary. [#48354](https://github.com/ClickHouse/ClickHouse/pull/48354) ([Antonio Andelic](https://github.com/antonio2368)).
+* Allow to write/read unnamed tuple as nested Message in Protobuf format. Tuple elements and Message fields are matched by position. [#48390](https://github.com/ClickHouse/ClickHouse/pull/48390) ([Kruglov Pavel](https://github.com/Avogar)).
+* Support `additional_table_filters` and `additional_result_filter` settings in the new planner. Also, add a documentation entry for `additional_result_filter`. [#48405](https://github.com/ClickHouse/ClickHouse/pull/48405) ([Dmitry Novik](https://github.com/novikd)).
+* `parseDateTime` now understands format string '%f' (fractional seconds). [#48420](https://github.com/ClickHouse/ClickHouse/pull/48420) ([Robert Schulze](https://github.com/rschu1ze)).
+* Format string "%f" in formatDateTime() now prints "000000" if the formatted value has no fractional seconds, the previous behavior (single zero) can be restored using setting "formatdatetime_f_prints_single_zero = 1". [#48422](https://github.com/ClickHouse/ClickHouse/pull/48422) ([Robert Schulze](https://github.com/rschu1ze)).
+* Don't replicate DELETE and TRUNCATE for KeeperMap. [#48434](https://github.com/ClickHouse/ClickHouse/pull/48434) ([Antonio Andelic](https://github.com/antonio2368)).
+* Generate valid Decimals and Bools in generateRandom function. [#48436](https://github.com/ClickHouse/ClickHouse/pull/48436) ([Kruglov Pavel](https://github.com/Avogar)).
+* Allow trailing commas in expression list of SELECT query, for example `SELECT a, b, c, FROM table`. Closes [#37802](https://github.com/ClickHouse/ClickHouse/issues/37802). [#48438](https://github.com/ClickHouse/ClickHouse/pull/48438) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Override `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables with `--user` and `--password` client parameters. Closes [#38909](https://github.com/ClickHouse/ClickHouse/issues/38909). [#48440](https://github.com/ClickHouse/ClickHouse/pull/48440) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Added retries to loading of data parts in `MergeTree` tables in case of retryable errors. [#48442](https://github.com/ClickHouse/ClickHouse/pull/48442) ([Anton Popov](https://github.com/CurtizJ)).
+* Add support for `Date`, `Date32`, `DateTime`, `DateTime64` data types to `arrayMin`, `arrayMax`, `arrayDifference` functions. Closes [#21645](https://github.com/ClickHouse/ClickHouse/issues/21645). [#48445](https://github.com/ClickHouse/ClickHouse/pull/48445) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Add support for `{server_uuid}` macro. It is useful for identifying replicas in autoscaled clusters when new replicas are constantly added and removed in runtime. This closes [#48554](https://github.com/ClickHouse/ClickHouse/issues/48554). [#48563](https://github.com/ClickHouse/ClickHouse/pull/48563) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The installation script will create a hard link instead of copying if it is possible. [#48578](https://github.com/ClickHouse/ClickHouse/pull/48578) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Support `SHOW TABLE` syntax meaning the same as `SHOW CREATE TABLE`. Closes [#48580](https://github.com/ClickHouse/ClickHouse/issues/48580). [#48591](https://github.com/ClickHouse/ClickHouse/pull/48591) ([flynn](https://github.com/ucasfl)).
+* HTTP temporary buffers now support working by evicting data from the virtual filesystem cache. [#48664](https://github.com/ClickHouse/ClickHouse/pull/48664) ([Vladimir C](https://github.com/vdimir)).
+* Make Schema inference works for `CREATE AS SELECT`. Closes [#47599](https://github.com/ClickHouse/ClickHouse/issues/47599). [#48679](https://github.com/ClickHouse/ClickHouse/pull/48679) ([flynn](https://github.com/ucasfl)).
+* Added a `replicated_max_mutations_in_one_entry` setting for `ReplicatedMergeTree` that allows limiting the number of mutation commands per one `MUTATE_PART` entry (default is 10000). [#48731](https://github.com/ClickHouse/ClickHouse/pull/48731) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* In AggregateFunction types, don't count unused arena bytes as `read_bytes`. [#48745](https://github.com/ClickHouse/ClickHouse/pull/48745) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix some MySQL-related settings not being handled with the MySQL dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* If a user set `max_single_part_upload_size` to a very large value, it can lead to a crash due to a bug in the AWS S3 SDK. This fixes [#47679](https://github.com/ClickHouse/ClickHouse/issues/47679). [#48816](https://github.com/ClickHouse/ClickHouse/pull/48816) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix data race in `RabbitMQ` ([report](https://pastila.nl/?004f7100/de1505289ab5bb355e67ebe6c7cc8707)), refactor the code. [#48845](https://github.com/ClickHouse/ClickHouse/pull/48845) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Add aliases `name` and `part_name` form `system.parts` and `system.part_log`. Closes [#48718](https://github.com/ClickHouse/ClickHouse/issues/48718). [#48850](https://github.com/ClickHouse/ClickHouse/pull/48850) ([sichenzhao](https://github.com/sichenzhao)).
+* Functions "arrayDifferenceSupport()", "arrayCumSum()" and "arrayCumSumNonNegative()" now support input arrays of wide integer types (U)Int128/256. [#48866](https://github.com/ClickHouse/ClickHouse/pull/48866) ([cluster](https://github.com/infdahai)).
+* Multi-line history in clickhouse-client is now no longer padded. This makes pasting more natural. [#48870](https://github.com/ClickHouse/ClickHouse/pull/48870) ([Joanna Hulboj](https://github.com/jh0x)).
+* Implement a slight improvement for the rare case when ClickHouse is run inside LXC and LXCFS is used. The LXCFS has an issue: sometimes it returns an error "Transport endpoint is not connected" on reading from the file inside `/proc`. This error was correctly logged into ClickHouse's server log. We have additionally workaround this issue by reopening a file. This is a minuscule change. [#48922](https://github.com/ClickHouse/ClickHouse/pull/48922) ([Real](https://github.com/RunningXie)).
+* Improve memory accounting for prefetches. Randomise prefetch settings In CI. [#48973](https://github.com/ClickHouse/ClickHouse/pull/48973) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Correctly set headers for native copy operations on GCS. [#48981](https://github.com/ClickHouse/ClickHouse/pull/48981) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add support for specifying setting names in the command line with dashes instead of underscores, for example, `--max-threads` instead of `--max_threads`. Additionally, support Unicode dash characters like `—` instead of `--` - this is useful when you communicate with a team in another company, and a manager from that team copy-pasted code from MS Word. [#48985](https://github.com/ClickHouse/ClickHouse/pull/48985) ([alekseygolub](https://github.com/alekseygolub)).
+* Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Improve the embedded dashboard. Close [#46671](https://github.com/ClickHouse/ClickHouse/issues/46671). [#49036](https://github.com/ClickHouse/ClickHouse/pull/49036) ([Kevin Zhang](https://github.com/Kinzeng)).
+* Add profile events for log messages, so you can easily see the count of log messages by severity. [#49042](https://github.com/ClickHouse/ClickHouse/pull/49042) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* In previous versions, the `LineAsString` format worked inconsistently when the parallel parsing was enabled or not, in presence of DOS or macOS Classic line breaks. This closes [#49039](https://github.com/ClickHouse/ClickHouse/issues/49039). [#49052](https://github.com/ClickHouse/ClickHouse/pull/49052) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The exception message about the unparsed query parameter will also tell about the name of the parameter. Reimplement [#48878](https://github.com/ClickHouse/ClickHouse/issues/48878). Close [#48772](https://github.com/ClickHouse/ClickHouse/issues/48772). [#49061](https://github.com/ClickHouse/ClickHouse/pull/49061) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### Build/Testing/Packaging Improvement
+* Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Reduce the number of dependencies in the header files to speed up the build. [#47984](https://github.com/ClickHouse/ClickHouse/pull/47984) ([Dmitry Novik](https://github.com/novikd)).
+* Randomize compression of marks and indices in tests. [#48286](https://github.com/ClickHouse/ClickHouse/pull/48286) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Bump internal ZSTD from 1.5.4 to 1.5.5. [#46797](https://github.com/ClickHouse/ClickHouse/pull/46797) ([Robert Schulze](https://github.com/rschu1ze)).
+* Randomize vertical merges from compact to wide parts in tests. [#48287](https://github.com/ClickHouse/ClickHouse/pull/48287) ([Raúl Marín](https://github.com/Algunenano)).
+* Support for CRC32 checksum in HDFS. Fix performance issues. [#48614](https://github.com/ClickHouse/ClickHouse/pull/48614) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove remainders of GCC support. [#48671](https://github.com/ClickHouse/ClickHouse/pull/48671) ([Robert Schulze](https://github.com/rschu1ze)).
+* Add CI run with new analyzer infrastructure enabled. [#48719](https://github.com/ClickHouse/ClickHouse/pull/48719) ([Dmitry Novik](https://github.com/novikd)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix system.query_views_log for MVs that are pushed from background threads [#46668](https://github.com/ClickHouse/ClickHouse/pull/46668) ([Azat Khuzhin](https://github.com/azat)).
+* Fix several `RENAME COLUMN` bugs [#46946](https://github.com/ClickHouse/ClickHouse/pull/46946) ([alesapin](https://github.com/alesapin)).
+* Fix minor hiliting issues in clickhouse-format [#47610](https://github.com/ClickHouse/ClickHouse/pull/47610) ([Natasha Murashkina](https://github.com/murfel)).
+* Fix a bug in LLVM's libc++ leading to a crash for uploading parts to S3 which size is greater than INT_MAX [#47693](https://github.com/ClickHouse/ClickHouse/pull/47693) ([Azat Khuzhin](https://github.com/azat)).
+* Fix overflow in the `sparkbar` function [#48121](https://github.com/ClickHouse/ClickHouse/pull/48121) ([Vladimir C](https://github.com/vdimir)).
+* Fix race in S3 [#48190](https://github.com/ClickHouse/ClickHouse/pull/48190) ([Anton Popov](https://github.com/CurtizJ)).
+* Disable JIT for aggregate functions due to inconsistent behavior [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix alter formatting (minor) [#48289](https://github.com/ClickHouse/ClickHouse/pull/48289) ([Natasha Murashkina](https://github.com/murfel)).
+* Fix CPU usage in RabbitMQ (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix crash in EXPLAIN PIPELINE for Merge over Distributed [#48320](https://github.com/ClickHouse/ClickHouse/pull/48320) ([Azat Khuzhin](https://github.com/azat)).
+* Fix serializing LowCardinality as Arrow dictionary [#48361](https://github.com/ClickHouse/ClickHouse/pull/48361) ([Kruglov Pavel](https://github.com/Avogar)).
+* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)).
+* Fix possible SYSTEM SYNC REPLICA stuck in case of DROP/REPLACE PARTITION [#48391](https://github.com/ClickHouse/ClickHouse/pull/48391) ([Azat Khuzhin](https://github.com/azat)).
+* Fix a startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Don't check dependencies when renaming system tables automatically [#48431](https://github.com/ClickHouse/ClickHouse/pull/48431) ([Raúl Marín](https://github.com/Algunenano)).
+* Update only affected rows in KeeperMap storage [#48435](https://github.com/ClickHouse/ClickHouse/pull/48435) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix possible segfault in the VFS cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* `toTimeZone` function throws an error when no constant string is provided [#48471](https://github.com/ClickHouse/ClickHouse/pull/48471) ([Jordi Villar](https://github.com/jrdi)).
+* Fix logical error with IPv4 in Protobuf, add support for Date32 [#48486](https://github.com/ClickHouse/ClickHouse/pull/48486) ([Kruglov Pavel](https://github.com/Avogar)).
+* "changed" flag in system.settings was calculated incorrectly for settings with multiple values [#48516](https://github.com/ClickHouse/ClickHouse/pull/48516) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Fix storage `Memory` with enabled compression [#48517](https://github.com/ClickHouse/ClickHouse/pull/48517) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix bracketed-paste mode messing up password input in the event of client reconnection [#48528](https://github.com/ClickHouse/ClickHouse/pull/48528) ([Michael Kolupaev](https://github.com/al13n321)).
+* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix an uncaught exception in case of parallel loader for hashed dictionaries [#48571](https://github.com/ClickHouse/ClickHouse/pull/48571) ([Azat Khuzhin](https://github.com/azat)).
+* The `groupArray` aggregate function correctly works for empty result over nullable types [#48593](https://github.com/ClickHouse/ClickHouse/pull/48593) ([lgbo](https://github.com/lgbo-ustc)).
+* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Allow IPv4 comparison operators with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix possible error from cache [#48636](https://github.com/ClickHouse/ClickHouse/pull/48636) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Async inserts with empty data will no longer throw exception. [#48663](https://github.com/ClickHouse/ClickHouse/pull/48663) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix table dependencies in case of failed RENAME TABLE [#48683](https://github.com/ClickHouse/ClickHouse/pull/48683) ([Azat Khuzhin](https://github.com/azat)).
+* If the primary key has duplicate columns (which is only possible for projections), in previous versions it might lead to a bug [#48838](https://github.com/ClickHouse/ClickHouse/pull/48838) ([Amos Bird](https://github.com/amosbird)).
+* Fix for a race condition in ZooKeeper when joining send_thread/receive_thread [#48849](https://github.com/ClickHouse/ClickHouse/pull/48849) ([Alexander Gololobov](https://github.com/davenger)).
+* Fix unexpected part name error when trying to drop a ignored detached part with zero copy replication [#48862](https://github.com/ClickHouse/ClickHouse/pull/48862) ([Michael Lex](https://github.com/mlex)).
+* Fix reading `Date32` Parquet/Arrow column into not a `Date32` column [#48864](https://github.com/ClickHouse/ClickHouse/pull/48864) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix `UNKNOWN_IDENTIFIER` error while selecting from table with row policy and column with dots [#48976](https://github.com/ClickHouse/ClickHouse/pull/48976) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix aggregation by empty nullable strings [#48999](https://github.com/ClickHouse/ClickHouse/pull/48999) ([LiuNeng](https://github.com/liuneng1994)).
+
 ### <a id="233"></a> ClickHouse release 23.3 LTS, 2023-03-30
 
 #### Upgrade Notes
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cc1a64a9e96..263b202049b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -170,12 +170,6 @@ else ()
     set(NO_WHOLE_ARCHIVE --no-whole-archive)
 endif ()
 
-option(ENABLE_CURL_BUILD "Enable curl, azure, sentry build on by default except MacOS." ON)
-if (OS_DARWIN)
-    # Disable the curl, azure, senry build on MacOS
-    set (ENABLE_CURL_BUILD OFF)
-endif ()
-
 if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
     # Can be lld or ld-lld or lld-13 or /path/to/lld.
     if (LINKER_NAME MATCHES "lld")
@@ -393,9 +387,9 @@ else()
 endif ()
 
 option (ENABLE_GWP_ASAN "Enable Gwp-Asan" ON)
-# We use mmap for allocations more heavily in debug builds, 
-# but GWP-ASan also wants to use mmap frequently, 
-# and due to a large number of memory mappings, 
+# We use mmap for allocations more heavily in debug builds,
+# but GWP-ASan also wants to use mmap frequently,
+# and due to a large number of memory mappings,
 # it does not work together well.
 if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG"))
     set(ENABLE_GWP_ASAN OFF)
@@ -421,8 +415,11 @@ endif ()
 
 set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX")
 
-set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
-if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X))
+if (NOT SANITIZE)
+    set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
+endif()
+
+if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X) AND NOT SANITIZE)
     # Slightly more efficient code can be generated
     # It's disabled for ARM because otherwise ClickHouse cannot run on Android.
     set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
diff --git a/SECURITY.md b/SECURITY.md
index 566a1820834..75c1a9d7d6a 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -13,21 +13,16 @@ The following versions of ClickHouse server are currently being supported with s
 
 | Version | Supported |
 |:-|:-|
+| 23.4 | ✔️ |
 | 23.3 | ✔️ |
 | 23.2 | ✔️ |
-| 23.1 | ✔️ |
+| 23.1 | ❌ |
 | 22.12 | ❌ |
 | 22.11 | ❌ |
 | 22.10 | ❌ |
 | 22.9 | ❌ |
 | 22.8 | ✔️ |
-| 22.7 | ❌ |
-| 22.6 | ❌ |
-| 22.5 | ❌ |
-| 22.4 | ❌ |
-| 22.3 | ❌ |
-| 22.2 | ❌ |
-| 22.1 | ❌ |
+| 22.* | ❌ |
 | 21.* | ❌ |
 | 20.* | ❌ |
 | 19.* | ❌ |
diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c
index 7e8ea5051d7..49bb81a58be 100644
--- a/base/glibc-compatibility/glibc-compatibility.c
+++ b/base/glibc-compatibility/glibc-compatibility.c
@@ -235,6 +235,17 @@ ssize_t getrandom(void *buf, size_t buflen, unsigned flags)
     return syscall(SYS_getrandom, buf, buflen, flags);
 }
 
+/* Structure for scatter/gather I/O.  */
+struct iovec
+{
+    void *iov_base;    /* Pointer to data.  */
+    size_t iov_len;    /* Length of data.  */
+};
+
+ssize_t preadv(int __fd, const struct iovec *__iovec, int __count, __off_t __offset)
+{
+    return syscall(SYS_preadv, __fd, __iovec, __count, (long)(__offset), (long)(__offset>>32));
+}
 
 #include <errno.h>
 #include <limits.h>
diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c
index 6112f9a339c..78796ca0c05 100644
--- a/base/harmful/harmful.c
+++ b/base/harmful/harmful.c
@@ -31,7 +31,8 @@ TRAP(argp_state_help)
 TRAP(argp_usage)
 TRAP(asctime)
 TRAP(clearenv)
-TRAP(crypt)
+// Redefined at contrib/libbcrypt/crypt_blowfish/wrapper.c:186
+// TRAP(crypt)
 TRAP(ctime)
 TRAP(cuserid)
 TRAP(drand48)
diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index 9bb148c12a9..462529fbc13 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -2,11 +2,11 @@
 
 # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54473)
+SET(VERSION_REVISION 54474)
 SET(VERSION_MAJOR 23)
-SET(VERSION_MINOR 4)
+SET(VERSION_MINOR 5)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 46e85357ce2da2a99f56ee83a079e892d7ec3726)
-SET(VERSION_DESCRIBE v23.4.1.1-testing)
-SET(VERSION_STRING 23.4.1.1)
+SET(VERSION_GITHASH 3920eb987f7ed837ada5de8907284adf123f0583)
+SET(VERSION_DESCRIBE v23.5.1.1-testing)
+SET(VERSION_STRING 23.5.1.1)
 # end of autochange
diff --git a/cmake/fuzzer.cmake b/cmake/fuzzer.cmake
index 578a9757270..52f301ab8ad 100644
--- a/cmake/fuzzer.cmake
+++ b/cmake/fuzzer.cmake
@@ -7,10 +7,6 @@ if (FUZZER)
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link")
         set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link")
 
-        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=fuzzer-no-link")
-        endif()
-
         # NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable
         if (NOT LIB_FUZZING_ENGINE)
             set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer")
diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index 13aaa414b93..bf5eddf09f5 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -16,50 +16,24 @@ if (SANITIZE)
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
         set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
 
-        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_FLAGS}")
-        endif()
-        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan")
-        endif ()
-
     elseif (SANITIZE STREQUAL "memory")
         # MemorySanitizer flags are set according to the official documentation:
         # https://clang.llvm.org/docs/MemorySanitizer.html#usage
-        #
-        # For now, it compiles with `cmake -DSANITIZE=memory -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_CXX_FLAGS_ADD="-O1" -DCMAKE_C_FLAGS_ADD="-O1"`
-        # Compiling with -DCMAKE_BUILD_TYPE=Debug leads to ld.lld failures because
-        # of large files (was not tested with ld.gold). This is why we compile with
-        # RelWithDebInfo, and downgrade optimizations to -O1 but not to -Og, to
-        # keep the binary size down.
-        # TODO: try compiling with -Og and with ld.gold.
-        set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt")
 
+        # Linking can fail due to relocation overflows (see #49145), caused by too big object files / libraries.
+        # Work around this with position-independent builds (-fPIC and -fpie), this is slightly slower than non-PIC/PIE but that's okay.
+        set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt")
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
         set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
 
-        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=memory")
-        endif()
-        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan")
-        endif ()
-
     elseif (SANITIZE STREQUAL "thread")
         set (TSAN_FLAGS "-fsanitize=thread")
         if (COMPILER_CLANG)
             set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
         endif()
 
-
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
         set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
-        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread")
-        endif()
-        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan")
-        endif ()
 
     elseif (SANITIZE STREQUAL "undefined")
         set (UBSAN_FLAGS "-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
@@ -78,12 +52,6 @@ if (SANITIZE)
 
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
         set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
-        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
-        endif()
-        if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-            set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan")
-        endif ()
 
         # llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
         set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index be3563d2c61..0c92ff17f11 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -141,20 +141,19 @@ add_contrib (libuv-cmake libuv)
 add_contrib (liburing-cmake liburing)
 add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv
 add_contrib (cassandra-cmake cassandra) # requires: libuv
-
-if (ENABLE_CURL_BUILD)
+if (NOT OS_DARWIN)
     add_contrib (curl-cmake curl)
     add_contrib (azure-cmake azure)
     add_contrib (sentry-native-cmake sentry-native) # requires: curl
 endif()
-
 add_contrib (fmtlib-cmake fmtlib)
 add_contrib (krb5-cmake krb5)
 add_contrib (cyrus-sasl-cmake cyrus-sasl) # for krb5
 add_contrib (libgsasl-cmake libgsasl) # requires krb5
 add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl
 add_contrib (nats-io-cmake nats-io)
-add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5
+add_contrib (isa-l-cmake isa-l)
+add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5, isa-l
 add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3
 add_contrib (cppkafka-cmake cppkafka)
 add_contrib (libpqxx-cmake libpqxx)
@@ -178,17 +177,15 @@ add_contrib (s2geometry-cmake s2geometry)
 add_contrib (c-ares-cmake c-ares)
 add_contrib (qpl-cmake qpl)
 add_contrib (morton-nd-cmake morton-nd)
-
 if (ARCH_S390X)
     add_contrib(crc32-s390x-cmake crc32-s390x)
 endif()
-
 add_contrib (annoy-cmake annoy)
-
 add_contrib (xxHash-cmake xxHash)
 
-add_contrib (google-benchmark-cmake google-benchmark)
+add_contrib (libbcrypt-cmake libbcrypt)
 
+add_contrib (google-benchmark-cmake google-benchmark)
 add_contrib (ulid-c-cmake ulid-c)
 
 # Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
diff --git a/contrib/curl b/contrib/curl
index c12fb3ddaf4..b0edf0b7dae 160000
--- a/contrib/curl
+++ b/contrib/curl
@@ -1 +1 @@
-Subproject commit c12fb3ddaf48e709a7a4deaa55ec485e4df163ee
+Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d
diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt
index 8a570bd267c..70d9c2816dc 100644
--- a/contrib/curl-cmake/CMakeLists.txt
+++ b/contrib/curl-cmake/CMakeLists.txt
@@ -12,6 +12,9 @@ set (SRCS
     "${LIBRARY_DIR}/lib/noproxy.c"
     "${LIBRARY_DIR}/lib/idn.c"
     "${LIBRARY_DIR}/lib/cfilters.c"
+    "${LIBRARY_DIR}/lib/cf-socket.c"
+    "${LIBRARY_DIR}/lib/cf-haproxy.c"
+    "${LIBRARY_DIR}/lib/cf-https-connect.c"
     "${LIBRARY_DIR}/lib/file.c"
     "${LIBRARY_DIR}/lib/timeval.c"
     "${LIBRARY_DIR}/lib/base64.c"
@@ -37,8 +40,8 @@ set (SRCS
     "${LIBRARY_DIR}/lib/strcase.c"
     "${LIBRARY_DIR}/lib/easy.c"
     "${LIBRARY_DIR}/lib/curl_fnmatch.c"
+    "${LIBRARY_DIR}/lib/curl_log.c"
     "${LIBRARY_DIR}/lib/fileinfo.c"
-    "${LIBRARY_DIR}/lib/wildcard.c"
     "${LIBRARY_DIR}/lib/krb5.c"
     "${LIBRARY_DIR}/lib/memdebug.c"
     "${LIBRARY_DIR}/lib/http_chunks.c"
@@ -96,6 +99,7 @@ set (SRCS
     "${LIBRARY_DIR}/lib/rand.c"
     "${LIBRARY_DIR}/lib/curl_multibyte.c"
     "${LIBRARY_DIR}/lib/conncache.c"
+    "${LIBRARY_DIR}/lib/cf-h1-proxy.c"
     "${LIBRARY_DIR}/lib/http2.c"
     "${LIBRARY_DIR}/lib/smb.c"
     "${LIBRARY_DIR}/lib/curl_endian.c"
@@ -113,12 +117,13 @@ set (SRCS
     "${LIBRARY_DIR}/lib/altsvc.c"
     "${LIBRARY_DIR}/lib/socketpair.c"
     "${LIBRARY_DIR}/lib/bufref.c"
+    "${LIBRARY_DIR}/lib/bufq.c"
     "${LIBRARY_DIR}/lib/dynbuf.c"
+    "${LIBRARY_DIR}/lib/dynhds.c"
     "${LIBRARY_DIR}/lib/hsts.c"
     "${LIBRARY_DIR}/lib/http_aws_sigv4.c"
     "${LIBRARY_DIR}/lib/mqtt.c"
     "${LIBRARY_DIR}/lib/rename.c"
-    "${LIBRARY_DIR}/lib/h2h3.c"
     "${LIBRARY_DIR}/lib/headers.c"
     "${LIBRARY_DIR}/lib/timediff.c"
     "${LIBRARY_DIR}/lib/vauth/vauth.c"
@@ -133,6 +138,7 @@ set (SRCS
     "${LIBRARY_DIR}/lib/vauth/oauth2.c"
     "${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c"
     "${LIBRARY_DIR}/lib/vauth/spnego_sspi.c"
+    "${LIBRARY_DIR}/lib/vquic/vquic.c"
     "${LIBRARY_DIR}/lib/vtls/openssl.c"
     "${LIBRARY_DIR}/lib/vtls/gtls.c"
     "${LIBRARY_DIR}/lib/vtls/vtls.c"
@@ -147,9 +153,6 @@ set (SRCS
     "${LIBRARY_DIR}/lib/vtls/keylog.c"
     "${LIBRARY_DIR}/lib/vtls/x509asn1.c"
     "${LIBRARY_DIR}/lib/vtls/hostcheck.c"
-    "${LIBRARY_DIR}/lib/vquic/ngtcp2.c"
-    "${LIBRARY_DIR}/lib/vquic/quiche.c"
-    "${LIBRARY_DIR}/lib/vquic/msh3.c"
     "${LIBRARY_DIR}/lib/vssh/libssh2.c"
     "${LIBRARY_DIR}/lib/vssh/libssh.c"
 )
diff --git a/contrib/idxd-config b/contrib/idxd-config
deleted file mode 160000
index f6605c41a73..00000000000
--- a/contrib/idxd-config
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99
diff --git a/contrib/isa-l b/contrib/isa-l
new file mode 160000
index 00000000000..9f2b68f0575
--- /dev/null
+++ b/contrib/isa-l
@@ -0,0 +1 @@
+Subproject commit 9f2b68f05752097f0f16632fc4a9a86950831efd
diff --git a/contrib/isa-l-cmake/CMakeLists.txt b/contrib/isa-l-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..d4d6d648268
--- /dev/null
+++ b/contrib/isa-l-cmake/CMakeLists.txt
@@ -0,0 +1,203 @@
+option(ENABLE_ISAL_LIBRARY "Enable ISA-L library" ${ENABLE_LIBRARIES})
+if (ARCH_AARCH64)
+    # Disable ISA-L libray on aarch64.
+    set (ENABLE_ISAL_LIBRARY OFF)
+endif ()
+
+if (NOT ENABLE_ISAL_LIBRARY)
+    message(STATUS "Not using isa-l")
+    return()
+endif()
+
+set(ISAL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/isa-l")
+
+# The YASM and NASM assembers are somewhat mutually compatible. ISAL specifically needs NASM. If only YASM is installed, then check_language(ASM_NASM)
+# below happily finds YASM, leading to weird errors at build time. Therefore, do an explicit check for NASM here.
+find_program(NASM_PATH NAMES nasm)
+if (NOT NASM_PATH)
+    message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
+endif ()
+
+include(CheckLanguage)
+check_language(ASM_NASM)
+if(NOT CMAKE_ASM_NASM_COMPILER)
+    message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
+endif()
+
+enable_language(ASM_NASM)
+
+set(ISAL_C_SRC
+        ${ISAL_SOURCE_DIR}/crc/crc_base_aliases.c
+        ${ISAL_SOURCE_DIR}/crc/crc_base.c
+        ${ISAL_SOURCE_DIR}/crc/crc64_base.c
+        ${ISAL_SOURCE_DIR}/erasure_code/ec_base.c
+        ${ISAL_SOURCE_DIR}/erasure_code/ec_base_aliases.c
+        ${ISAL_SOURCE_DIR}/erasure_code/ec_highlevel_func.c
+        ${ISAL_SOURCE_DIR}/erasure_code/gen_rs_matrix_limits.c
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_1tbl.c
+        ${ISAL_SOURCE_DIR}/igzip/adler32_base.c
+        ${ISAL_SOURCE_DIR}/igzip/encode_df.c
+        ${ISAL_SOURCE_DIR}/igzip/flatten_ll.c
+        ${ISAL_SOURCE_DIR}/igzip/generate_custom_hufftables.c
+        ${ISAL_SOURCE_DIR}/igzip/generate_static_inflate.c
+        ${ISAL_SOURCE_DIR}/igzip/huff_codes.c
+        ${ISAL_SOURCE_DIR}/igzip/hufftables_c.c
+        ${ISAL_SOURCE_DIR}/igzip/igzip_base_aliases.c
+        ${ISAL_SOURCE_DIR}/igzip/igzip_base.c
+        ${ISAL_SOURCE_DIR}/igzip/igzip_icf_base.c
+        ${ISAL_SOURCE_DIR}/igzip/igzip_icf_body.c
+        ${ISAL_SOURCE_DIR}/igzip/igzip_inflate.c
+        ${ISAL_SOURCE_DIR}/igzip/igzip.c
+        ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base_aliases.c
+        ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base.c
+        ${ISAL_SOURCE_DIR}/programs/igzip_cli.c
+        ${ISAL_SOURCE_DIR}/raid/raid_base_aliases.c
+        ${ISAL_SOURCE_DIR}/raid/raid_base.c
+)
+
+set(ISAL_ASM_SRC
+        ${ISAL_SOURCE_DIR}/crc/crc_multibinary.asm
+        ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_01.asm
+        ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_02.asm
+        ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_by4.asm
+        ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_by16_10.asm
+        ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_copy_by4_02.asm
+        ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_copy_by4.asm
+        ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by8_02.asm
+        ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by8.asm
+        ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by16_10.asm
+        ${ISAL_SOURCE_DIR}/crc/crc32_ieee_01.asm
+        ${ISAL_SOURCE_DIR}/crc/crc32_ieee_02.asm
+        ${ISAL_SOURCE_DIR}/crc/crc32_ieee_by4.asm
+        ${ISAL_SOURCE_DIR}/crc/crc32_ieee_by16_10.asm
+        ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_00.asm
+        ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_01.asm
+        ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_by16_10.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_ecma_norm_by8.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_ecma_norm_by16_10.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_ecma_refl_by8.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_ecma_refl_by16_10.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_iso_norm_by8.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_iso_norm_by16_10.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_iso_refl_by8.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_iso_refl_by16_10.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_jones_norm_by8.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_jones_norm_by16_10.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_jones_refl_by8.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_jones_refl_by16_10.asm
+        ${ISAL_SOURCE_DIR}/crc/crc64_multibinary.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/ec_multibinary.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx2.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx512.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_sse.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mul_avx.asm
+        ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mul_sse.asm
+        ${ISAL_SOURCE_DIR}/igzip/adler32_avx2_4.asm
+        ${ISAL_SOURCE_DIR}/igzip/adler32_sse.asm
+        ${ISAL_SOURCE_DIR}/igzip/bitbuf2.asm
+        ${ISAL_SOURCE_DIR}/igzip/encode_df_04.asm
+        ${ISAL_SOURCE_DIR}/igzip/encode_df_06.asm
+        ${ISAL_SOURCE_DIR}/igzip/heap_macros.asm
+        ${ISAL_SOURCE_DIR}/igzip/huffman.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_body.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_compare_types.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_decode_block_stateless_01.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_decode_block_stateless_04.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_deflate_hash.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_finish.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_gen_icf_map_lh1_04.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_gen_icf_map_lh1_06.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_icf_body_h1_gr_bt.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_icf_finish.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_inflate_multibinary.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_multibinary.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_set_long_icf_fg_04.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_set_long_icf_fg_06.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_update_histogram_01.asm
+        ${ISAL_SOURCE_DIR}/igzip/igzip_update_histogram_04.asm
+        ${ISAL_SOURCE_DIR}/igzip/lz0a_const.asm
+        ${ISAL_SOURCE_DIR}/igzip/options.asm
+        ${ISAL_SOURCE_DIR}/igzip/proc_heap.asm
+        ${ISAL_SOURCE_DIR}/igzip/rfc1951_lookup.asm
+        ${ISAL_SOURCE_DIR}/igzip/stdmac.asm
+        ${ISAL_SOURCE_DIR}/mem/mem_multibinary.asm
+        ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx.asm
+        ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx2.asm
+        ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx512.asm
+        ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_sse.asm
+        ${ISAL_SOURCE_DIR}/raid/pq_check_sse.asm
+        ${ISAL_SOURCE_DIR}/raid/pq_gen_avx.asm
+        ${ISAL_SOURCE_DIR}/raid/pq_gen_avx2.asm
+        ${ISAL_SOURCE_DIR}/raid/pq_gen_avx512.asm
+        ${ISAL_SOURCE_DIR}/raid/pq_gen_sse.asm
+        ${ISAL_SOURCE_DIR}/raid/raid_multibinary.asm
+        ${ISAL_SOURCE_DIR}/raid/xor_check_sse.asm
+        ${ISAL_SOURCE_DIR}/raid/xor_gen_avx.asm
+        ${ISAL_SOURCE_DIR}/raid/xor_gen_avx512.asm
+        ${ISAL_SOURCE_DIR}/raid/xor_gen_sse.asm
+)
+
+# Adding ISA-L library target
+add_library(_isal ${ISAL_C_SRC} ${ISAL_ASM_SRC})
+
+# Setting external and internal interfaces for ISA-L library
+target_include_directories(_isal
+        PUBLIC ${ISAL_SOURCE_DIR}/include
+        PUBLIC ${ISAL_SOURCE_DIR}/igzip
+        PUBLIC ${ISAL_SOURCE_DIR}/crc
+        PUBLIC ${ISAL_SOURCE_DIR}/erasure_code)
+
+# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS.
+# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined"
+if (SANITIZE STREQUAL "undefined")
+    get_target_property(target_options _isal COMPILE_OPTIONS)
+    list(REMOVE_ITEM target_options "-fno-sanitize=undefined")
+    set_property(TARGET _isal PROPERTY COMPILE_OPTIONS ${target_options})
+endif()
+
+add_library(ch_contrib::isal ALIAS _isal)
diff --git a/contrib/libbcrypt b/contrib/libbcrypt
new file mode 160000
index 00000000000..8aa32ad94eb
--- /dev/null
+++ b/contrib/libbcrypt
@@ -0,0 +1 @@
+Subproject commit 8aa32ad94ebe06b76853b0767c910c9fbf7ccef4
diff --git a/contrib/libbcrypt-cmake/CMakeLists.txt b/contrib/libbcrypt-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..d40d7f9195e
--- /dev/null
+++ b/contrib/libbcrypt-cmake/CMakeLists.txt
@@ -0,0 +1,19 @@
+option(ENABLE_BCRYPT "Enable bcrypt" ${ENABLE_LIBRARIES})
+
+if (NOT ENABLE_BCRYPT)
+    message(STATUS "Not using bcrypt")
+    return()
+endif()
+
+set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libbcrypt")
+
+set(SRCS 
+    "${LIBRARY_DIR}/bcrypt.c"
+    "${LIBRARY_DIR}/crypt_blowfish/crypt_blowfish.c"
+    "${LIBRARY_DIR}/crypt_blowfish/crypt_gensalt.c"
+    "${LIBRARY_DIR}/crypt_blowfish/wrapper.c"
+)
+
+add_library(_bcrypt ${SRCS})
+target_include_directories(_bcrypt SYSTEM PUBLIC "${LIBRARY_DIR}")
+add_library(ch_contrib::bcrypt ALIAS _bcrypt)
diff --git a/contrib/libhdfs3 b/contrib/libhdfs3
index 3c91d96ff29..164b89253fa 160000
--- a/contrib/libhdfs3
+++ b/contrib/libhdfs3
@@ -1 +1 @@
-Subproject commit 3c91d96ff29fe5928f055519c6d979c4b104db9e
+Subproject commit 164b89253fad7991bce77882f01b51ab81d19f3d
diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt
index c22cac731fe..fd9ed7dc182 100644
--- a/contrib/libhdfs3-cmake/CMakeLists.txt
+++ b/contrib/libhdfs3-cmake/CMakeLists.txt
@@ -70,6 +70,30 @@ set(SRCS
     "${HDFS3_SOURCE_DIR}/client/Token.cpp"
     "${HDFS3_SOURCE_DIR}/client/PacketPool.cpp"
     "${HDFS3_SOURCE_DIR}/client/OutputStream.cpp"
+    "${HDFS3_SOURCE_DIR}/client/AbstractNativeRawDecoder.cpp"
+    "${HDFS3_SOURCE_DIR}/client/AbstractNativeRawEncoder.cpp"
+    "${HDFS3_SOURCE_DIR}/client/ByteBufferDecodingState.cpp"
+    "${HDFS3_SOURCE_DIR}/client/ByteBufferEncodingState.cpp"
+    "${HDFS3_SOURCE_DIR}/client/CoderUtil.cpp"
+    "${HDFS3_SOURCE_DIR}/client/ECChunk.cpp"
+    "${HDFS3_SOURCE_DIR}/client/ErasureCoderOptions.cpp"
+    "${HDFS3_SOURCE_DIR}/client/GF256.cpp"
+    "${HDFS3_SOURCE_DIR}/client/GaloisField.cpp"
+    "${HDFS3_SOURCE_DIR}/client/NativeRSRawDecoder.cpp"
+    "${HDFS3_SOURCE_DIR}/client/NativeRSRawEncoder.cpp"
+    "${HDFS3_SOURCE_DIR}/client/Preconditions.cpp"
+    "${HDFS3_SOURCE_DIR}/client/RSUtil.cpp"
+    "${HDFS3_SOURCE_DIR}/client/RawErasureCoderFactory.cpp"
+    "${HDFS3_SOURCE_DIR}/client/RawErasureDecoder.cpp"
+    "${HDFS3_SOURCE_DIR}/client/RawErasureEncoder.cpp"
+    "${HDFS3_SOURCE_DIR}/client/StatefulStripeReader.cpp"
+    "${HDFS3_SOURCE_DIR}/client/StripeReader.cpp"
+    "${HDFS3_SOURCE_DIR}/client/StripedBlockUtil.cpp"
+    "${HDFS3_SOURCE_DIR}/client/StripedInputStreamImpl.cpp"
+    "${HDFS3_SOURCE_DIR}/client/StripedOutputStreamImpl.cpp"
+    "${HDFS3_SOURCE_DIR}/client/SystemECPolicies.cpp"
+    "${HDFS3_SOURCE_DIR}/client/dump.cpp"
+    "${HDFS3_SOURCE_DIR}/client/erasure_coder.cpp"
     "${HDFS3_SOURCE_DIR}/rpc/RpcChannelKey.cpp"
     "${HDFS3_SOURCE_DIR}/rpc/RpcProtocolInfo.cpp"
     "${HDFS3_SOURCE_DIR}/rpc/RpcClient.cpp"
@@ -148,6 +172,11 @@ if (TARGET OpenSSL::SSL)
     target_link_libraries(_hdfs3 PRIVATE OpenSSL::Crypto OpenSSL::SSL)
 endif()
 
+if (TARGET ch_contrib::isal)
+    target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal)
+    add_definitions(-DHADOOP_ISAL_LIBRARY)
+endif()
+
 add_library(ch_contrib::hdfs ALIAS _hdfs3)
 
 if (ENABLE_CLICKHOUSE_BENCHMARK)
diff --git a/contrib/qpl b/contrib/qpl
index d75a29d95d8..0bce2b03423 160000
--- a/contrib/qpl
+++ b/contrib/qpl
@@ -1 +1 @@
-Subproject commit d75a29d95d8a548297fce3549d21020005364dc8
+Subproject commit 0bce2b03423f6fbeb8bce66cc8be0bf558058848
diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt
index fc5548b0652..334731d105f 100644
--- a/contrib/qpl-cmake/CMakeLists.txt
+++ b/contrib/qpl-cmake/CMakeLists.txt
@@ -40,9 +40,10 @@ set (LOG_HW_INIT OFF)
 set (SANITIZE_MEMORY OFF)
 set (SANITIZE_THREADS OFF)
 set (LIB_FUZZING_ENGINE OFF)
+set (DYNAMIC_LOADING_LIBACCEL_CONFIG OFF)
 
 function(GetLibraryVersion _content _outputVar)
-    string(REGEX MATCHALL "Qpl VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}")
+    string(REGEX MATCHALL "QPL VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}")
     SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE)
 endfunction()
 
@@ -240,7 +241,9 @@ add_library(core_iaa OBJECT ${HW_PATH_SRC})
 target_include_directories(core_iaa
         PRIVATE ${UUID_DIR}
         PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/include>
-        PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/sources/include>
+        PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/sources/include>
+        PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include>  # status.h in own_checkers.h
+        PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/sources/c_api> # own_checkers.h
         PRIVATE $<TARGET_PROPERTY:qplcore_avx512,INTERFACE_INCLUDE_DIRECTORIES>)
 
 target_compile_options(core_iaa
@@ -339,4 +342,7 @@ target_link_libraries(_qpl
         PRIVATE ${CMAKE_DL_LIBS})
 
 add_library (ch_contrib::qpl ALIAS _qpl)
-target_include_directories(_qpl SYSTEM BEFORE PUBLIC "${QPL_PROJECT_DIR}/include")
+target_include_directories(_qpl SYSTEM BEFORE
+        PUBLIC "${QPL_PROJECT_DIR}/include"
+        PUBLIC "${LIBACCEL_SOURCE_DIR}/accfg"
+        PUBLIC ${UUID_DIR})
diff --git a/contrib/sysroot b/contrib/sysroot
index f0081b2649b..e0d1b64da66 160000
--- a/contrib/sysroot
+++ b/contrib/sysroot
@@ -1 +1 @@
-Subproject commit f0081b2649b94837855f3bc7d05ef326b100bad8
+Subproject commit e0d1b64da666afbfaa6f1ee0487c33f3fd2cd5cb
diff --git a/docker/images.json b/docker/images.json
index 9150abe1f1c..b4f3e755bd1 100644
--- a/docker/images.json
+++ b/docker/images.json
@@ -123,7 +123,8 @@
             "docker/test/stateless",
             "docker/test/integration/base",
             "docker/test/fuzzer",
-            "docker/test/keeper-jepsen"
+            "docker/test/keeper-jepsen",
+            "docker/test/server-jepsen"
          ]
     },
     "docker/test/integration/kerberized_hadoop": {
@@ -139,6 +140,10 @@
         "name": "clickhouse/keeper-jepsen-test",
         "dependent": []
     },
+    "docker/test/server-jepsen": {
+        "name": "clickhouse/server-jepsen-test",
+        "dependent": []
+    },
     "docker/test/install/deb": {
         "name": "clickhouse/install-deb-test",
         "dependent": []
diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 35b94d2563e..73da4515ff4 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
     esac
 
 ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.3.2.37"
+ARG VERSION="23.4.2.11"
 ARG PACKAGES="clickhouse-keeper"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index a20feaf654f..1a5d2071f6b 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.3.2.37"
+ARG VERSION="23.4.2.11"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 4851e2b1fc7..8792d419a16 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
 
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.3.2.37"
+ARG VERSION="23.4.2.11"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 
 # set non-empty deb_location_url url to create a docker image
diff --git a/docker/test/server-jepsen/Dockerfile b/docker/test/server-jepsen/Dockerfile
index 958dbfa066a..a212427b2a1 100644
--- a/docker/test/server-jepsen/Dockerfile
+++ b/docker/test/server-jepsen/Dockerfile
@@ -16,6 +16,11 @@ ENV TESTS_TO_RUN="8"
 ENV TIME_LIMIT="30"
 
 ENV KEEPER_NODE=""
+ENV NEMESIS=""
+ENV WORKLOAD=""
+ENV WITH_LOCAL_BINARY=""
+ENV RATE=""
+ENV CONCURRENCY=""
 
 
 # volumes
diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh
index 4a966d50f74..c11a48f6d4c 100644
--- a/docker/test/server-jepsen/run.sh
+++ b/docker/test/server-jepsen/run.sh
@@ -15,8 +15,38 @@ if [ -z "$CLICKHOUSE_REPO_PATH" ]; then
     ls -lath ||:
 fi
 
+clickhouse_source="--clickhouse-source $CLICKHOUSE_PACKAGE"
+if [ -n "$WITH_LOCAL_BINARY" ]; then
+    clickhouse_source="--clickhouse-source /clickhouse"
+fi
+
+tests_count="--test-count $TESTS_TO_RUN"
+tests_to_run="test-all"
+workload=""
+if [ -n "$WORKLOAD" ]; then
+    tests_to_run="test"
+    workload="--workload $WORKLOAD"
+    tests_count=""
+fi
+
+nemesis=""
+if [ -n "$NEMESIS" ]; then
+    nemesis="--nemesis $NEMESIS"
+fi
+
+rate=""
+if [ -n "$RATE" ]; then
+    rate="--rate $RATE"
+fi
+
+concurrency=""
+if [ -n "$CONCURRENCY" ]; then
+    concurrency="--concurrency $CONCURRENCY"
+fi
+
+
 cd "$CLICKHOUSE_REPO_PATH/tests/jepsen.clickhouse"
 
-(lein run server test-all --keeper "$KEEPER_NODE" --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 -r 50 --clickhouse-source "$CLICKHOUSE_PACKAGE" --test-count "$TESTS_TO_RUN" || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log"
+(lein run server $tests_to_run $workload --keeper "$KEEPER_NODE" $concurrency $nemesis $rate --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 $clickhouse_source $tests_count --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log"
 
 mv store "$TEST_OUTPUT/"
diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index fce90ca2537..10ba597a33a 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -59,6 +59,12 @@ install_packages previous_release_package_folder
 # available for dump via clickhouse-local
 configure
 
+# local_blob_storage disk type does not exist in older versions
+sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
+  | sed "s|<type>local_blob_storage</type>|<type>local</type>|" \
+  > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
+sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+
 start
 stop
 mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
@@ -83,6 +89,11 @@ export USE_S3_STORAGE_FOR_MERGE_TREE=1
 export ZOOKEEPER_FAULT_INJECTION=0
 configure
 
+sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
+  | sed "s|<type>local_blob_storage</type>|<type>local</type>|" \
+  > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
+sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+
 start
 
 clickhouse-client --query="SELECT 'Server version: ', version()"
diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py
index da58db8e45d..470eb61b3fa 100755
--- a/docker/test/util/process_functional_tests_result.py
+++ b/docker/test/util/process_functional_tests_result.py
@@ -18,7 +18,7 @@ SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"]
 RETRIES_SIGN = "Some tests were restarted"
 
 
-def process_test_log(log_path):
+def process_test_log(log_path, broken_tests):
     total = 0
     skipped = 0
     unknown = 0
@@ -62,8 +62,12 @@ def process_test_log(log_path):
                     failed += 1
                     test_results.append((test_name, "Timeout", test_time, []))
                 elif FAIL_SIGN in line:
-                    failed += 1
-                    test_results.append((test_name, "FAIL", test_time, []))
+                    if test_name in broken_tests:
+                        success += 1
+                        test_results.append((test_name, "OK", test_time, []))
+                    else:
+                        failed += 1
+                        test_results.append((test_name, "FAIL", test_time, []))
                 elif UNKNOWN_SIGN in line:
                     unknown += 1
                     test_results.append((test_name, "FAIL", test_time, []))
@@ -71,8 +75,19 @@ def process_test_log(log_path):
                     skipped += 1
                     test_results.append((test_name, "SKIPPED", test_time, []))
                 else:
-                    success += int(OK_SIGN in line)
-                    test_results.append((test_name, "OK", test_time, []))
+                    if OK_SIGN in line and test_name in broken_tests:
+                        failed += 1
+                        test_results.append(
+                            (
+                                test_name,
+                                "SKIPPED",
+                                test_time,
+                                ["This test passed. Update broken_tests.txt.\n"],
+                            )
+                        )
+                    else:
+                        success += int(OK_SIGN in line)
+                        test_results.append((test_name, "OK", test_time, []))
                 test_end = False
             elif (
                 len(test_results) > 0 and test_results[-1][1] == "FAIL" and not test_end
@@ -110,7 +125,7 @@ def process_test_log(log_path):
     )
 
 
-def process_result(result_path):
+def process_result(result_path, broken_tests):
     test_results = []
     state = "success"
     description = ""
@@ -134,7 +149,7 @@ def process_result(result_path):
             success_finish,
             retries,
             test_results,
-        ) = process_test_log(result_path)
+        ) = process_test_log(result_path, broken_tests)
         is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1))
         logging.info("Is flaky check: %s", is_flacky_check)
         # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
@@ -186,9 +201,17 @@ if __name__ == "__main__":
     parser.add_argument("--in-results-dir", default="/test_output/")
     parser.add_argument("--out-results-file", default="/test_output/test_results.tsv")
     parser.add_argument("--out-status-file", default="/test_output/check_status.tsv")
+    parser.add_argument("--broken-tests", default="/broken_tests.txt")
     args = parser.parse_args()
 
-    state, description, test_results = process_result(args.in_results_dir)
+    broken_tests = list()
+    if os.path.exists(args.broken_tests):
+        logging.info(f"File {args.broken_tests} with broken tests found")
+        with open(args.broken_tests) as f:
+            broken_tests = f.read().splitlines()
+        logging.info(f"Broken tests in the list: {len(broken_tests)}")
+
+    state, description, test_results = process_result(args.in_results_dir, broken_tests)
     logging.info("Result parsed")
     status = (state, description)
     write_results(args.out_results_file, args.out_status_file, test_results, status)
diff --git a/docs/changelogs/v23.4.1.1943-stable.md b/docs/changelogs/v23.4.1.1943-stable.md
new file mode 100644
index 00000000000..ea16f5856be
--- /dev/null
+++ b/docs/changelogs/v23.4.1.1943-stable.md
@@ -0,0 +1,375 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.4.1.1943-stable (3920eb987f7) FIXME as compared to v23.3.1.2823-lts (46e85357ce2)
+
+#### Backward Incompatible Change
+* If `path` in cache configuration is not empty and is not absolute path, then it will be put in `<clickhouse server data directory>/caches/<path_from_cache_config>`. [#48784](https://github.com/ClickHouse/ClickHouse/pull/48784) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Compatibility setting `parallelize_output_from_storages` to enable behavior before [#48727](https://github.com/ClickHouse/ClickHouse/issues/48727). [#49101](https://github.com/ClickHouse/ClickHouse/pull/49101) ([Igor Nikonov](https://github.com/devcrafter)).
+
+#### New Feature
+* Add `extractKeyValuePairs` function to extract key value pairs from strings. Input strings might contain noise (i.e log files / do not need to be 100% formatted in key-value-pair format), the algorithm will look for key value pairs matching the arguments passed to the function. As of now, function accepts the following arguments: `data_column` (mandatory), `key_value_pair_delimiter` (defaults to `:`), `pair_delimiters` (defaults to `\space \, \;`) and `quoting_character` (defaults to double quotes). [#43606](https://github.com/ClickHouse/ClickHouse/pull/43606) ([Arthur Passos](https://github.com/arthurpassos)).
+* Add MemoryTracker for the background tasks (merges and mutation). Introduces `merges_mutations_memory_usage_soft_limit` and `merges_mutations_memory_usage_to_ram_ratio` settings that represent the soft memory limit for merges and mutations. If this limit is reached ClickHouse won't schedule new merge or mutation tasks. Also `MergesMutationsMemoryTracking` metric is introduced to allow observing current memory usage of background tasks. Closes [#45710](https://github.com/ClickHouse/ClickHouse/issues/45710). [#46089](https://github.com/ClickHouse/ClickHouse/pull/46089) ([Dmitry Novik](https://github.com/novikd)).
+* Support new aggregate function quantileGK/quantilesGK, like [approx_percentile](https://spark.apache.org/docs/latest/api/sql/index.html#approx_percentile) in spark. Greenwald-Khanna algorithm refer to http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf. [#46428](https://github.com/ClickHouse/ClickHouse/pull/46428) ([李扬](https://github.com/taiyang-li)).
+* Add statement `SHOW COLUMNS` which shows distilled information from system.columns. [#48017](https://github.com/ClickHouse/ClickHouse/pull/48017) ([Robert Schulze](https://github.com/rschu1ze)).
+* Added `LIGHTWEIGHT` and `PULL` modifiers for `SYSTEM SYNC REPLICA` query. `LIGHTWEIGHT` version waits for fetches and drop-ranges only (merges and mutations are ignored). `PULL` version pulls new entries from ZooKeeper and does not wait for them. Fixes [#47794](https://github.com/ClickHouse/ClickHouse/issues/47794). [#48085](https://github.com/ClickHouse/ClickHouse/pull/48085) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Add kafkaMurmurHash function for compatibility with Kafka DefaultPartitioner. Closes [#47834](https://github.com/ClickHouse/ClickHouse/issues/47834). [#48185](https://github.com/ClickHouse/ClickHouse/pull/48185) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Allow to easily create a user with the same grants as the current user by using `GRANT CURRENT GRANTS`. [#48262](https://github.com/ClickHouse/ClickHouse/pull/48262) ([pufit](https://github.com/pufit)).
+* Add statistical aggregate function `kolmogorovSmirnovTest`. close [#48228](https://github.com/ClickHouse/ClickHouse/issues/48228). [#48325](https://github.com/ClickHouse/ClickHouse/pull/48325) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
+* Added a `lost_part_count` column to the `system.replicas` table. The column value shows the total number of lost parts in the corresponding table. Value is stored in zookeeper and can be used instead of not persistent `ReplicatedDataLoss` profile event for monitoring. [#48526](https://github.com/ClickHouse/ClickHouse/pull/48526) ([Sergei Trifonov](https://github.com/serxa)).
+* Add soundex function. Closes [#39880](https://github.com/ClickHouse/ClickHouse/issues/39880). [#48567](https://github.com/ClickHouse/ClickHouse/pull/48567) ([FriendLey](https://github.com/FriendLey)).
+* Support map type for JSONExtract. [#48629](https://github.com/ClickHouse/ClickHouse/pull/48629) ([李扬](https://github.com/taiyang-li)).
+* Add PrettyJSONEachRow format to output pretty JSON with new line delimieters and 4 space indents. [#48898](https://github.com/ClickHouse/ClickHouse/pull/48898) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add ParquetMetadata input format to read Parquet file metadata. [#48911](https://github.com/ClickHouse/ClickHouse/pull/48911) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### Performance Improvement
+* Reading files in Parquet format is now much faster. IO and decoding are parallelized (controlled by `max_threads` setting), and only required data ranges are read. [#47964](https://github.com/ClickHouse/ClickHouse/pull/47964) ([Michael Kolupaev](https://github.com/al13n321)).
+* Only check dependencies if necessary when applying `ALTER TABLE` queries. [#48062](https://github.com/ClickHouse/ClickHouse/pull/48062) ([Raúl Marín](https://github.com/Algunenano)).
+* Optimize function `mapUpdate`. [#48118](https://github.com/ClickHouse/ClickHouse/pull/48118) ([Anton Popov](https://github.com/CurtizJ)).
+* Now an internal query to local replica is sent explicitly and data from it received through loopback interface. Setting `prefer_localhost_replica` is not respected for parallel replicas. This is needed for better scheduling and makes the code cleaner: the initiator is only responsible for coordinating of the reading process and merging results, continiously answering for requests while all the secondary queries read the data. Note: Using loopback interface is not so performant, otherwise some replicas could starve for tasks which could lead to even slower query execution and not utilizing all possible resources. The initialization of the coordinator is now even more lazy. All incoming requests contain the information about the reading algorithm we initialize the coordinator with it when first request comes. If any replica will decide to read with different algorithm - an exception will be thrown and a query will be aborted. [#48246](https://github.com/ClickHouse/ClickHouse/pull/48246) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Do not build set for the right side of `IN` clause with subquery when it is used only for analysis of skip indexes and they are disabled by setting (`use_skip_indexes=0`). Previously it might affect the performance of queries. [#48299](https://github.com/ClickHouse/ClickHouse/pull/48299) ([Anton Popov](https://github.com/CurtizJ)).
+* Query processing is parallelized right after reading `FROM file(...)`. Related to [#38755](https://github.com/ClickHouse/ClickHouse/issues/38755). [#48525](https://github.com/ClickHouse/ClickHouse/pull/48525) ([Igor Nikonov](https://github.com/devcrafter)).
+* Query processing is parallelized right after reading from a data source. Affected data sources are mostly simple or external storages like table functions `url`, `file`. [#48727](https://github.com/ClickHouse/ClickHouse/pull/48727) ([Igor Nikonov](https://github.com/devcrafter)).
+* Using correct memory order for counter in `numebers_mt()`. [#48729](https://github.com/ClickHouse/ClickHouse/pull/48729) ([Igor Nikonov](https://github.com/devcrafter)).
+* Lowered contention of ThreadPool mutex (may increase performance for a huge amount of small jobs). [#48750](https://github.com/ClickHouse/ClickHouse/pull/48750) ([Sergei Trifonov](https://github.com/serxa)).
+* Simplify accounting of approximate size of granule in prefetched read pool. [#49051](https://github.com/ClickHouse/ClickHouse/pull/49051) ([Nikita Taranov](https://github.com/nickitat)).
+
+#### Improvement
+* Support config sections `keeper`/`keeper_server` as an alternative to `zookeeper`. Close [#34766](https://github.com/ClickHouse/ClickHouse/issues/34766) , [#34767](https://github.com/ClickHouse/ClickHouse/issues/34767). [#35113](https://github.com/ClickHouse/ClickHouse/pull/35113) ([李扬](https://github.com/taiyang-li)).
+* Many issues in ClickHouse applications's help were fixed. Help is now written to stdout from all tools. Status code for `clickhouse help` invocation is now 0. Updated help for `clickhouse-local`, `clickhouse-benchmark`, `clickhouse-client`, `clickhouse hash`, `clickhouse su`, `clickhouse-install`. [#45819](https://github.com/ClickHouse/ClickHouse/pull/45819) ([Ilya Yatsishin](https://github.com/qoega)).
+* Entries in the query cache are now squashed to max_block_size and compressed. [#45912](https://github.com/ClickHouse/ClickHouse/pull/45912) ([Robert Schulze](https://github.com/rschu1ze)).
+* It is possible to set _secure_ flag in named_collections for a dictionary with a ClickHouse table source. Addresses [#38450](https://github.com/ClickHouse/ClickHouse/issues/38450) . [#46323](https://github.com/ClickHouse/ClickHouse/pull/46323) ([Ilya Golshtein](https://github.com/ilejn)).
+* Functions replaceOne(), replaceAll(), replaceRegexpOne() and replaceRegexpAll() can now be called with non-const pattern and replacement arguments. [#46589](https://github.com/ClickHouse/ClickHouse/pull/46589) ([Robert Schulze](https://github.com/rschu1ze)).
+* Bump internal ZSTD from 1.5.4 to 1.5.5. [#46797](https://github.com/ClickHouse/ClickHouse/pull/46797) ([Robert Schulze](https://github.com/rschu1ze)).
+* If we run a mutation with IN (subquery) like this: `ALTER TABLE t UPDATE col='new value' WHERE id IN (SELECT id FROM huge_table)` and the table `t` has multiple parts than for each part a set for subquery `SELECT id FROM huge_table` is built in memory. And if there are many parts then this might consume a lot of memory (and lead to an OOM) and CPU. The solution is to introduce a short-lived cache of sets that are currently being built by mutation tasks. If another task of the same mutation is executed concurrently it can lookup the set in the cache, wait for it be be built and reuse it. [#46835](https://github.com/ClickHouse/ClickHouse/pull/46835) ([Alexander Gololobov](https://github.com/davenger)).
+* Added configurable retries for all operations with [Zoo]Keeper for Backup queries. [#47224](https://github.com/ClickHouse/ClickHouse/pull/47224) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Add async connection to socket and async writing to socket. Make creating connections and sending query/external tables async across shards. Refactor code with fibers. Closes [#46931](https://github.com/ClickHouse/ClickHouse/issues/46931). We will be able to increase `connect_timeout_with_failover_ms` by default after this PR (https://github.com/ClickHouse/ClickHouse/issues/5188). [#47229](https://github.com/ClickHouse/ClickHouse/pull/47229) ([Kruglov Pavel](https://github.com/Avogar)).
+* Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)).
+* Several improvements around data lakes: - Make StorageIceberg work with non-partitioned data. - Support Iceberg format version V2 (previously only V1 was supported) - Support reading partitioned data for DeltaLake/Hudi - Faster reading of DeltaLake metadata by using Delta's checkpoint files - Fixed incorrect Hudi reads: previously it incorrectly chose which data to read and therefore was able to read correctly only small size tables - Made these engines to pickup updates of changed data (previously the state was set on table creation) - Make proper testing for Iceberg/DeltaLake/Hudi using spark. [#47307](https://github.com/ClickHouse/ClickHouse/pull/47307) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Enable `use_environment_credentials` for S3 by default, so the entire provider chain is constructed by default. [#47397](https://github.com/ClickHouse/ClickHouse/pull/47397) ([Antonio Andelic](https://github.com/antonio2368)).
+* Currently, the JSON_VALUE function is similar as spark's get_json_object function, which support to get value from json string by a path like '$.key'. But still has something different - 1. in spark's get_json_object will return null while the path is not exist, but in JSON_VALUE will return empty string; - 2. in spark's get_json_object will return a complext type value, such as a json object/array value, but in JSON_VALUE will return empty string. [#47494](https://github.com/ClickHouse/ClickHouse/pull/47494) ([KevinyhZou](https://github.com/KevinyhZou)).
+* Add CNF/constraint optimizer in new analyzer. [#47617](https://github.com/ClickHouse/ClickHouse/pull/47617) ([Antonio Andelic](https://github.com/antonio2368)).
+* For use_structure_from_insertion_table_in_table_functions more flexible insert table structure propagation to table function. Fixed bug with name mapping and using virtual columns. No more need for 'auto' setting. [#47962](https://github.com/ClickHouse/ClickHouse/pull/47962) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Do not continue retrying to connect to ZK if the query is killed or over limits. [#47985](https://github.com/ClickHouse/ClickHouse/pull/47985) ([Raúl Marín](https://github.com/Algunenano)).
+* Added functions to work with columns of type `Map`: `mapConcat`, `mapSort`, `mapExists`. [#48071](https://github.com/ClickHouse/ClickHouse/pull/48071) ([Anton Popov](https://github.com/CurtizJ)).
+* Support Enum output/input in BSONEachRow, allow all map key types and avoid extra calculations on output. [#48122](https://github.com/ClickHouse/ClickHouse/pull/48122) ([Kruglov Pavel](https://github.com/Avogar)).
+* Support more ClickHouse types in ORC/Arrow/Parquet formats: Enum(8|16), (U)Int(128|256), Decimal256 (for ORC), allow reading IPv4 from Int32 values (ORC outputs IPv4 as Int32 and we couldn't read it back), fix reading Nullable(IPv6) from binary data for ORC. [#48126](https://github.com/ClickHouse/ClickHouse/pull/48126) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add columns `perform_ttl_move_on_insert`, `load_balancing` for table `system.storage_policies`, modify column `volume_type` type to `enum8`. [#48167](https://github.com/ClickHouse/ClickHouse/pull/48167) ([lizhuoyu5](https://github.com/lzydmxy)).
+* Added support for `BACKUP ALL` command which backups all tables and databases, including temporary and system ones. [#48189](https://github.com/ClickHouse/ClickHouse/pull/48189) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Function mapFromArrays support map type as input. [#48207](https://github.com/ClickHouse/ClickHouse/pull/48207) ([李扬](https://github.com/taiyang-li)).
+* The output of some SHOW PROCESSLIST is now sorted. [#48241](https://github.com/ClickHouse/ClickHouse/pull/48241) ([Robert Schulze](https://github.com/rschu1ze)).
+* Per-query/per-server throttling for remote IO/local IO/BACKUPs (server settings: `max_remote_read_network_bandwidth_for_server`, `max_remote_write_network_bandwidth_for_server`, `max_local_read_bandwidth_for_server`, `max_local_write_bandwidth_for_server`, `max_backup_bandwidth_for_server`, settings: `max_remote_read_network_bandwidth`, `max_remote_write_network_bandwidth`, `max_local_read_bandwidth`, `max_local_write_bandwidth`, `max_backup_bandwidth`). [#48242](https://github.com/ClickHouse/ClickHouse/pull/48242) ([Azat Khuzhin](https://github.com/azat)).
+* Support more types in CapnProto format: Map, (U)Int(128|256), Decimal(128|256). Allow integer conversions during input/output. [#48257](https://github.com/ClickHouse/ClickHouse/pull/48257) ([Kruglov Pavel](https://github.com/Avogar)).
+* It is now possible to define per-user quotas in the query cache. [#48284](https://github.com/ClickHouse/ClickHouse/pull/48284) ([Robert Schulze](https://github.com/rschu1ze)).
+* Don't throw CURRENT_WRITE_BUFFER_IS_EXHAUSTED for normal behaviour. [#48288](https://github.com/ClickHouse/ClickHouse/pull/48288) ([Raúl Marín](https://github.com/Algunenano)).
+* Add new setting `keeper_map_strict_mode` which enforces extra guarantees on operations made on top of `KeeperMap` tables. [#48293](https://github.com/ClickHouse/ClickHouse/pull/48293) ([Antonio Andelic](https://github.com/antonio2368)).
+* Check primary key type for simple dictionary is native unsigned integer type Add setting `check_dictionary_primary_key ` for compatibility(set `check_dictionary_primary_key =false` to disable checking). [#48335](https://github.com/ClickHouse/ClickHouse/pull/48335) ([lizhuoyu5](https://github.com/lzydmxy)).
+* Don't replicate mutations for `KeeperMap` because it's unnecessary. [#48354](https://github.com/ClickHouse/ClickHouse/pull/48354) ([Antonio Andelic](https://github.com/antonio2368)).
+* Allow write/read unnamed tuple as nested Message in Protobuf format. Tuple elements and Message fields are mathced by position. [#48390](https://github.com/ClickHouse/ClickHouse/pull/48390) ([Kruglov Pavel](https://github.com/Avogar)).
+* Support `additional_table_filters` and `additional_result_filter` settings in the new planner. Also, add a documentation entry for `additional_result_filter`. [#48405](https://github.com/ClickHouse/ClickHouse/pull/48405) ([Dmitry Novik](https://github.com/novikd)).
+* Parsedatetime now understands format string '%f' (fractional seconds). [#48420](https://github.com/ClickHouse/ClickHouse/pull/48420) ([Robert Schulze](https://github.com/rschu1ze)).
+* Format string "%f" in formatDateTime() now prints "000000" if the formatted value has no fractional seconds, the previous behavior (single zero) can be restored using setting "formatdatetime_f_prints_single_zero = 1". [#48422](https://github.com/ClickHouse/ClickHouse/pull/48422) ([Robert Schulze](https://github.com/rschu1ze)).
+* Don't replicate DELETE and TRUNCATE for KeeperMap. [#48434](https://github.com/ClickHouse/ClickHouse/pull/48434) ([Antonio Andelic](https://github.com/antonio2368)).
+* Generate valid Decimals and Bools in generateRandom function. [#48436](https://github.com/ClickHouse/ClickHouse/pull/48436) ([Kruglov Pavel](https://github.com/Avogar)).
+* Allow trailing commas in expression list of SELECT query, for example `SELECT a, b, c, FROM table`. Closes [#37802](https://github.com/ClickHouse/ClickHouse/issues/37802). [#48438](https://github.com/ClickHouse/ClickHouse/pull/48438) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Override `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables with `--user` and `--password` client parameters. Closes [#38909](https://github.com/ClickHouse/ClickHouse/issues/38909). [#48440](https://github.com/ClickHouse/ClickHouse/pull/48440) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Added retries to loading of data parts in `MergeTree` tables in case of retryable errors. [#48442](https://github.com/ClickHouse/ClickHouse/pull/48442) ([Anton Popov](https://github.com/CurtizJ)).
+* Add support for `Date`, `Date32`, `DateTime`, `DateTime64` data types to `arrayMin`, `arrayMax`, `arrayDifference` functions. Closes [#21645](https://github.com/ClickHouse/ClickHouse/issues/21645). [#48445](https://github.com/ClickHouse/ClickHouse/pull/48445) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Reduce memory usage for multiple `ALTER DELETE` mutations. [#48522](https://github.com/ClickHouse/ClickHouse/pull/48522) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Primary/secondary indices and sorting keys with identical expressions are now rejected. This behavior can be disabled using setting `allow_suspicious_indices`. [#48536](https://github.com/ClickHouse/ClickHouse/pull/48536) ([凌涛](https://github.com/lingtaolf)).
+* Just fix small typo in comment around `lockForAlter` method in `IStorage.h`. [#48559](https://github.com/ClickHouse/ClickHouse/pull/48559) ([artem-pershin](https://github.com/artem-pershin)).
+* Add support for `{server_uuid}` macro. It is useful for identifying replicas in autoscaled clusters when new replicas are constantly added and removed in runtime. This closes [#48554](https://github.com/ClickHouse/ClickHouse/issues/48554). [#48563](https://github.com/ClickHouse/ClickHouse/pull/48563) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The installation script will create a hard link instead of copying if it is possible. [#48578](https://github.com/ClickHouse/ClickHouse/pull/48578) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Support `SHOW TABLE` syntax meaning the same as `SHOW CREATE TABLE`. Closes [#48580](https://github.com/ClickHouse/ClickHouse/issues/48580). [#48591](https://github.com/ClickHouse/ClickHouse/pull/48591) ([flynn](https://github.com/ucasfl)).
+* HTTP temporary buffer support working with fs cache. [#48664](https://github.com/ClickHouse/ClickHouse/pull/48664) ([Vladimir C](https://github.com/vdimir)).
+* Make Schema inference works for `CREATE AS SELECT`. Closes [#47599](https://github.com/ClickHouse/ClickHouse/issues/47599). [#48679](https://github.com/ClickHouse/ClickHouse/pull/48679) ([flynn](https://github.com/ucasfl)).
+* Added a `replicated_max_mutations_in_one_entry` setting for `ReplicatedMergeTree` that allows limiting the number of mutation commands per one `MUTATE_PART` entry (default is 10000). [#48731](https://github.com/ClickHouse/ClickHouse/pull/48731) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* In AggregateFunction types, don't count unused arena bytes as `read_bytes`. [#48745](https://github.com/ClickHouse/ClickHouse/pull/48745) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix some mysql related settings not being handled with mysql dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix squashing in query cache. [#48763](https://github.com/ClickHouse/ClickHouse/pull/48763) ([Robert Schulze](https://github.com/rschu1ze)).
+* Support following new jsonpath format - '$.1key', path element begins with number - '$[key]', '$[“key”]', '$[\\\'key\\\']', '$["key 123"]', path element embraced in []. [#48768](https://github.com/ClickHouse/ClickHouse/pull/48768) ([lgbo](https://github.com/lgbo-ustc)).
+* If a user set `max_single_part_upload_size` to a very large value, it can lead to a crash due to a bug in the AWS S3 SDK. This fixes [#47679](https://github.com/ClickHouse/ClickHouse/issues/47679). [#48816](https://github.com/ClickHouse/ClickHouse/pull/48816) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Not for changelog. [#48824](https://github.com/ClickHouse/ClickHouse/pull/48824) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix data race in `StorageRabbitMQ` ([report](https://pastila.nl/?004f7100/de1505289ab5bb355e67ebe6c7cc8707)), refactor the code. [#48845](https://github.com/ClickHouse/ClickHouse/pull/48845) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Add aliases `name` and `part_name` form `system.parts` and `system.part_log`. Closes [#48718](https://github.com/ClickHouse/ClickHouse/issues/48718). [#48850](https://github.com/ClickHouse/ClickHouse/pull/48850) ([sichenzhao](https://github.com/sichenzhao)).
+* Functions "arrayDifferenceSupport()", "arrayCumSum()" and "arrayCumSumNonNegative()" now support input arrays of wide integer types (U)Int128/256. [#48866](https://github.com/ClickHouse/ClickHouse/pull/48866) ([cluster](https://github.com/infdahai)).
+* Multi-line history in clickhouse-client is now no longer padded. This makes pasting more natural. [#48870](https://github.com/ClickHouse/ClickHouse/pull/48870) ([Joanna Hulboj](https://github.com/jh0x)).
+* Not for changelog. [#48873](https://github.com/ClickHouse/ClickHouse/pull/48873) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Implement a slight improvement for the rare case when ClickHouse is run inside LXC and LXCFS is used. The LXCFS has an issue: sometimes it returns an error "Transport endpoint is not connected" on reading from the file inside `/proc`. This error was correctly logged into ClickHouse's server log. We have additionally workaround this issue by reopening a file. This is a minuscule change. [#48922](https://github.com/ClickHouse/ClickHouse/pull/48922) ([Real](https://github.com/RunningXie)).
+* Improve memory accounting for prefetches. Randomise prefetch settings In CI. [#48973](https://github.com/ClickHouse/ClickHouse/pull/48973) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Correctly set headers for native copy operations on GCS. [#48981](https://github.com/ClickHouse/ClickHouse/pull/48981) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add support for specifying setting names in the command line with dashes instead of underscores, for example, `--max-threads` instead of `--max_threads`. Additionally, support Unicode dash characters like `—` instead of `--` - this is useful when you communicate with a team in another company, and a manager from that team copy-pasted code from MS Word. [#48985](https://github.com/ClickHouse/ClickHouse/pull/48985) ([alekseygolub](https://github.com/alekseygolub)).
+* Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Increase default value for `connect_timeout_with_failover_ms` to 1000 ms (because of adding async connections in https://github.com/ClickHouse/ClickHouse/pull/47229) . Closes [#5188](https://github.com/ClickHouse/ClickHouse/issues/5188). [#49009](https://github.com/ClickHouse/ClickHouse/pull/49009) ([Kruglov Pavel](https://github.com/Avogar)).
+* Improve the embedded dashboard. Close [#46671](https://github.com/ClickHouse/ClickHouse/issues/46671). [#49036](https://github.com/ClickHouse/ClickHouse/pull/49036) ([Kevin Zhang](https://github.com/Kinzeng)).
+* Add profile events for log messages, so you can easily see the count of log messages by severity. [#49042](https://github.com/ClickHouse/ClickHouse/pull/49042) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* `bitCount` function support `FixedString` and `String` data type. [#49044](https://github.com/ClickHouse/ClickHouse/pull/49044) ([flynn](https://github.com/ucasfl)).
+* In previous versions, the `LineAsString` format worked inconsistently when the parallel parsing was enabled or not, in presence of DOS or MacOS Classic line breaks. This closes [#49039](https://github.com/ClickHouse/ClickHouse/issues/49039). [#49052](https://github.com/ClickHouse/ClickHouse/pull/49052) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* The exception message about the unparsed query parameter will also tell about the name of the parameter. Reimplement [#48878](https://github.com/ClickHouse/ClickHouse/issues/48878). Close [#48772](https://github.com/ClickHouse/ClickHouse/issues/48772). [#49061](https://github.com/ClickHouse/ClickHouse/pull/49061) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Added field `rows` with number of rows parsed from asynchronous insert to `system.asynchronous_insert_log`. [#49120](https://github.com/ClickHouse/ClickHouse/pull/49120) ([Anton Popov](https://github.com/CurtizJ)).
+* 1. Bump Intel QPL from v1.0.0 to v1.1.0 (fixes build issue [#47877](https://github.com/ClickHouse/ClickHouse/issues/47877)) 2. the DEFLATE_QPL codec now respects the maximum hardware jobs returned by libaccel_config. [#49126](https://github.com/ClickHouse/ClickHouse/pull/49126) ([jasperzhu](https://github.com/jinjunzh)).
+
+#### Build/Testing/Packaging Improvement
+* Reduce the number of dependencies in the header files to speed up the build. [#47984](https://github.com/ClickHouse/ClickHouse/pull/47984) ([Dmitry Novik](https://github.com/novikd)).
+* Randomize compression of marks and indices in tests. [#48286](https://github.com/ClickHouse/ClickHouse/pull/48286) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Randomize vertical merges from compact to wide parts in tests. [#48287](https://github.com/ClickHouse/ClickHouse/pull/48287) ([Raúl Marín](https://github.com/Algunenano)).
+* With the current approach, all ports are calculated at the beginning and could overlap or even be highjacked, see [the report](https://s3.amazonaws.com/clickhouse-test-reports/46793/02928ae50c52f31ce8e5bfa99eb1b5db046f4a4f/integration_tests__release__[1/2]/integration_run_parallel8_0.log) for `port is already allocated`. It's possibly the reason for [#45368](https://github.com/ClickHouse/ClickHouse/issues/45368). [#48393](https://github.com/ClickHouse/ClickHouse/pull/48393) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Support for CRC32 checksum in HDFS. Fix performance issues. [#48614](https://github.com/ClickHouse/ClickHouse/pull/48614) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove remainders of GCC support. [#48671](https://github.com/ClickHouse/ClickHouse/pull/48671) ([Robert Schulze](https://github.com/rschu1ze)).
+* Add CI run with new analyzer infrastructure enabled. [#48719](https://github.com/ClickHouse/ClickHouse/pull/48719) ([Dmitry Novik](https://github.com/novikd)).
+* Not for changelog. [#48879](https://github.com/ClickHouse/ClickHouse/pull/48879) ([larryluogit](https://github.com/larryluogit)).
+* After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Run more functional tests concurrently. [#48970](https://github.com/ClickHouse/ClickHouse/pull/48970) ([alesapin](https://github.com/alesapin)).
+* Fix glibc compatibility check: replace `preadv` from musl. [#49144](https://github.com/ClickHouse/ClickHouse/pull/49144) ([alesapin](https://github.com/alesapin)).
+* Use position independent encoding/code for sanitizers (at least msan :D) build to avoid issues with maximum relocation size. [#49145](https://github.com/ClickHouse/ClickHouse/pull/49145) ([alesapin](https://github.com/alesapin)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix system.query_views_log for MVs that are pushed from background threads [#46668](https://github.com/ClickHouse/ClickHouse/pull/46668) ([Azat Khuzhin](https://github.com/azat)).
+* Fix several `RENAME COLUMN` bugs [#46946](https://github.com/ClickHouse/ClickHouse/pull/46946) ([alesapin](https://github.com/alesapin)).
+* Fix minor hiliting issues in clickhouse-format [#47610](https://github.com/ClickHouse/ClickHouse/pull/47610) ([Natasha Murashkina](https://github.com/murfel)).
+* Fix crash for uploading parts which size is greater then INT_MAX to S3 [#47693](https://github.com/ClickHouse/ClickHouse/pull/47693) ([Azat Khuzhin](https://github.com/azat)).
+* Fix overflow in sparkbar function [#48121](https://github.com/ClickHouse/ClickHouse/pull/48121) ([Vladimir C](https://github.com/vdimir)).
+* Fix race in StorageS3 [#48190](https://github.com/ClickHouse/ClickHouse/pull/48190) ([Anton Popov](https://github.com/CurtizJ)).
+* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix alter formatting (minor) [#48289](https://github.com/ClickHouse/ClickHouse/pull/48289) ([Natasha Murashkina](https://github.com/murfel)).
+* Fix cpu usage in rabbitmq (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix ThreadPool for DistributedSink and use StrongTypedef for CurrentMetrics/ProfileEvents/StatusInfo to avoid further errors [#48314](https://github.com/ClickHouse/ClickHouse/pull/48314) ([Azat Khuzhin](https://github.com/azat)).
+* Fix crash in EXPLAIN PIPELINE for Merge over Distributed [#48320](https://github.com/ClickHouse/ClickHouse/pull/48320) ([Azat Khuzhin](https://github.com/azat)).
+* Check node for Backup Restore concurrency [#48342](https://github.com/ClickHouse/ClickHouse/pull/48342) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
+* close client [#48347](https://github.com/ClickHouse/ClickHouse/pull/48347) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix serializing LowCardinality as Arrow dictionary [#48361](https://github.com/ClickHouse/ClickHouse/pull/48361) ([Kruglov Pavel](https://github.com/Avogar)).
+* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)).
+* Fix possible SYSTEM SYNC REPLICA stuck in case of DROP/REPLACE PARTITION [#48391](https://github.com/ClickHouse/ClickHouse/pull/48391) ([Azat Khuzhin](https://github.com/azat)).
+* ClickHouse startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Don't check dependencies when renaming system tables automatically [#48431](https://github.com/ClickHouse/ClickHouse/pull/48431) ([Raúl Marín](https://github.com/Algunenano)).
+* Some fixes for parallel replicas [#48433](https://github.com/ClickHouse/ClickHouse/pull/48433) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
+* Update only affected rows in KV storage [#48435](https://github.com/ClickHouse/ClickHouse/pull/48435) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* toTimeZone function throw an error when no constant string is provided [#48471](https://github.com/ClickHouse/ClickHouse/pull/48471) ([Jordi Villar](https://github.com/jrdi)).
+* Fix logical error with IPv4 in Protobuf, add support for Date32 [#48486](https://github.com/ClickHouse/ClickHouse/pull/48486) ([Kruglov Pavel](https://github.com/Avogar)).
+* "changed" flag in system.settings is calculated incorrectly for settings with multiple values [#48516](https://github.com/ClickHouse/ClickHouse/pull/48516) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
+* Fix storage `Memory` with enabled compression [#48517](https://github.com/ClickHouse/ClickHouse/pull/48517) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix bracketed-paste mode messing up password input in client reconnect [#48528](https://github.com/ClickHouse/ClickHouse/pull/48528) ([Michael Kolupaev](https://github.com/al13n321)).
+* Avoid sending `nullptr` to `memcpy` in `copyStringInArena` [#48532](https://github.com/ClickHouse/ClickHouse/pull/48532) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix uncaught exception in case of parallel loader for hashed dictionaries [#48571](https://github.com/ClickHouse/ClickHouse/pull/48571) ([Azat Khuzhin](https://github.com/azat)).
+* `groupArray` returns cannot be nullable [#48593](https://github.com/ClickHouse/ClickHouse/pull/48593) ([lgbo](https://github.com/lgbo-ustc)).
+* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Fix IPv4 comparable with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
+* Fix possible error from cache [#48636](https://github.com/ClickHouse/ClickHouse/pull/48636) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix async inserts with empty data [#48663](https://github.com/ClickHouse/ClickHouse/pull/48663) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix table dependencies in case of failed RENAME TABLE [#48683](https://github.com/ClickHouse/ClickHouse/pull/48683) ([Azat Khuzhin](https://github.com/azat)).
+* Fix zero-copy-replication on encrypted disks. [#48741](https://github.com/ClickHouse/ClickHouse/pull/48741) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix skip_unavailable_shards in case of unavailable hosts [#48771](https://github.com/ClickHouse/ClickHouse/pull/48771) ([Azat Khuzhin](https://github.com/azat)).
+* Fix key condition on duplicate primary keys [#48838](https://github.com/ClickHouse/ClickHouse/pull/48838) ([Amos Bird](https://github.com/amosbird)).
+* Fix for race in ZooKeeper when joining send_thread/receive_thread [#48849](https://github.com/ClickHouse/ClickHouse/pull/48849) ([Alexander Gololobov](https://github.com/davenger)).
+* Fix unexpected part name error when trying to drop a ignored detached part with zero copy replication [#48862](https://github.com/ClickHouse/ClickHouse/pull/48862) ([Michael Lex](https://github.com/mlex)).
+* Fix reading Date32 Parquet/Arrow column into not Date32 column [#48864](https://github.com/ClickHouse/ClickHouse/pull/48864) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix UNKNOWN_IDENTIFIER error while select from table with row policy and column with dots [#48976](https://github.com/ClickHouse/ClickHouse/pull/48976) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix aggregate empty string error [#48999](https://github.com/ClickHouse/ClickHouse/pull/48999) ([LiuNeng](https://github.com/liuneng1994)).
+* Fix postgres database setting [#49100](https://github.com/ClickHouse/ClickHouse/pull/49100) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix flaky test_cache_with_full_disk_space [#49110](https://github.com/ClickHouse/ClickHouse/pull/49110) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix "prepared statement insert already exists" [#49154](https://github.com/ClickHouse/ClickHouse/pull/49154) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix replace[Regexp]{One,All}() with const haystacks [#49220](https://github.com/ClickHouse/ClickHouse/pull/49220) ([Robert Schulze](https://github.com/rschu1ze)).
+
+#### Build Improvement
+
+* Fixed hashing issue in creating partition IDs for s390x. [#48134](https://github.com/ClickHouse/ClickHouse/pull/48134) ([Harry Lee](https://github.com/HarryLeeIBM)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Revert "Randomize JIT settings in tests"'. [#48277](https://github.com/ClickHouse/ClickHouse/pull/48277) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* NO CL ENTRY:  'Fix test "02494_query_cache_drop.sql"'. [#48358](https://github.com/ClickHouse/ClickHouse/pull/48358) ([Anton Popov](https://github.com/CurtizJ)).
+* NO CL ENTRY:  'Revert "Check simple dictionary key is native unsigned integer"'. [#48732](https://github.com/ClickHouse/ClickHouse/pull/48732) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* NO CL ENTRY:  'Revert "Make Schema inference works for CREATE AS SELECT"'. [#48758](https://github.com/ClickHouse/ClickHouse/pull/48758) ([pufit](https://github.com/pufit)).
+* NO CL ENTRY:  'Revert "Add MemoryTracker for the background tasks"'. [#48760](https://github.com/ClickHouse/ClickHouse/pull/48760) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* NO CL ENTRY:  'Revert "Added tests for ClickHouse apps help and fixed help issues"'. [#48991](https://github.com/ClickHouse/ClickHouse/pull/48991) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY:  'Revert "Adapt marks count for prefetch read pool"'. [#49068](https://github.com/ClickHouse/ClickHouse/pull/49068) ([Nikita Taranov](https://github.com/nickitat)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* merge and mutation make thread group for setting memory trackers right [#47104](https://github.com/ClickHouse/ClickHouse/pull/47104) ([Sema Checherinda](https://github.com/CheSema)).
+* Query plan: update sort description [#47319](https://github.com/ClickHouse/ClickHouse/pull/47319) ([Igor Nikonov](https://github.com/devcrafter)).
+* Sqllogic [#47784](https://github.com/ClickHouse/ClickHouse/pull/47784) ([Sema Checherinda](https://github.com/CheSema)).
+* Fix race between DROP MatView and RESTART REPLICAS [#47863](https://github.com/ClickHouse/ClickHouse/pull/47863) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Merge [#35113](https://github.com/ClickHouse/ClickHouse/issues/35113) [#47934](https://github.com/ClickHouse/ClickHouse/pull/47934) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add a test for ClientInfo initial_query_start_time in inter-server mode [#48036](https://github.com/ClickHouse/ClickHouse/pull/48036) ([Azat Khuzhin](https://github.com/azat)).
+* Make custom key for parallel replicas work in new analyzer [#48054](https://github.com/ClickHouse/ClickHouse/pull/48054) ([Antonio Andelic](https://github.com/antonio2368)).
+* throw exception while non-parametric functions having parameters [#48115](https://github.com/ClickHouse/ClickHouse/pull/48115) ([save-my-heart](https://github.com/save-my-heart)).
+* Move FunctionsJSON implementation to header file [#48142](https://github.com/ClickHouse/ClickHouse/pull/48142) ([DimasKovas](https://github.com/DimasKovas)).
+* Use ThreadPool in PipelineExecutor [#48146](https://github.com/ClickHouse/ClickHouse/pull/48146) ([Azat Khuzhin](https://github.com/azat)).
+* Add sanity checks for writing number in variable length format (resubmit) [#48154](https://github.com/ClickHouse/ClickHouse/pull/48154) ([Azat Khuzhin](https://github.com/azat)).
+* Try fix 02151_hash_table_sizes_stats.sh test [#48178](https://github.com/ClickHouse/ClickHouse/pull/48178) ([Nikita Taranov](https://github.com/nickitat)).
+* Add scripts for sparse checkout of some contribs [#48183](https://github.com/ClickHouse/ClickHouse/pull/48183) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Do not take lock for shared context in setTempDataOnDisk [#48219](https://github.com/ClickHouse/ClickHouse/pull/48219) ([Vladimir C](https://github.com/vdimir)).
+* parseDateTime[InJodaSyntax](): Require format argument [#48222](https://github.com/ClickHouse/ClickHouse/pull/48222) ([Robert Schulze](https://github.com/rschu1ze)).
+* Do not  partially cancel processors added from expand pipeline. [#48231](https://github.com/ClickHouse/ClickHouse/pull/48231) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix some tests [#48267](https://github.com/ClickHouse/ClickHouse/pull/48267) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix compiling examples without Hive [#48269](https://github.com/ClickHouse/ClickHouse/pull/48269) ([Azat Khuzhin](https://github.com/azat)).
+* In messages, put values into quotes [#48271](https://github.com/ClickHouse/ClickHouse/pull/48271) ([Vadim Chekan](https://github.com/vchekan)).
+* Fix 01710_projection_optimize_materialize flakiness [#48276](https://github.com/ClickHouse/ClickHouse/pull/48276) ([Azat Khuzhin](https://github.com/azat)).
+* Fix UB (signed integer overflow) in StorageMergeTree::backupData() [#48278](https://github.com/ClickHouse/ClickHouse/pull/48278) ([Azat Khuzhin](https://github.com/azat)).
+* Update version after release [#48279](https://github.com/ClickHouse/ClickHouse/pull/48279) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Update version_date.tsv and changelogs after v23.3.1.2823-lts [#48281](https://github.com/ClickHouse/ClickHouse/pull/48281) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Small follow-up to [#48017](https://github.com/ClickHouse/ClickHouse/issues/48017) [#48292](https://github.com/ClickHouse/ClickHouse/pull/48292) ([Robert Schulze](https://github.com/rschu1ze)).
+* Try to update arrow library to release 11.0.0 [#48294](https://github.com/ClickHouse/ClickHouse/pull/48294) ([Kruglov Pavel](https://github.com/Avogar)).
+* fix test numbers again 2 [#48295](https://github.com/ClickHouse/ClickHouse/pull/48295) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix: copy forgotten show_secrets in FormatSettings semi-copy-ctor [#48297](https://github.com/ClickHouse/ClickHouse/pull/48297) ([Natasha Murashkina](https://github.com/murfel)).
+* Do not remove inputs from maybe compiled DAG. [#48303](https://github.com/ClickHouse/ClickHouse/pull/48303) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Update version_date.tsv and changelogs after v22.3.20.29-lts [#48304](https://github.com/ClickHouse/ClickHouse/pull/48304) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Update version_date.tsv and changelogs after v22.12.6.22-stable, v22.3.20.29-lts [#48305](https://github.com/ClickHouse/ClickHouse/pull/48305) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Merging [#46323](https://github.com/ClickHouse/ClickHouse/issues/46323) [#48312](https://github.com/ClickHouse/ClickHouse/pull/48312) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Follow-up to [#47863](https://github.com/ClickHouse/ClickHouse/issues/47863) [#48315](https://github.com/ClickHouse/ClickHouse/pull/48315) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* test / some complex query (it fails with analyzer enabled) [#48324](https://github.com/ClickHouse/ClickHouse/pull/48324) ([Denny Crane](https://github.com/den-crane)).
+* Fix constraints after merge [#48328](https://github.com/ClickHouse/ClickHouse/pull/48328) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add logging for concurrency checks for backups [#48337](https://github.com/ClickHouse/ClickHouse/pull/48337) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Update version_date.tsv and changelogs after v23.1.6.42-stable [#48345](https://github.com/ClickHouse/ClickHouse/pull/48345) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Update version_date.tsv and changelogs after v23.2.5.46-stable [#48346](https://github.com/ClickHouse/ClickHouse/pull/48346) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Fix lambda type resolution [#48355](https://github.com/ClickHouse/ClickHouse/pull/48355) ([Dmitry Novik](https://github.com/novikd)).
+* Avoid abort in protobuf library in debug build [#48356](https://github.com/ClickHouse/ClickHouse/pull/48356) ([Kruglov Pavel](https://github.com/Avogar)).
+* Batch fix for projections analysis with analyzer. [#48357](https://github.com/ClickHouse/ClickHouse/pull/48357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix tests with explain and analyzer where names changed. [#48360](https://github.com/ClickHouse/ClickHouse/pull/48360) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Small follow-up to [#45912](https://github.com/ClickHouse/ClickHouse/issues/45912) [#48373](https://github.com/ClickHouse/ClickHouse/pull/48373) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update version_date.tsv and changelogs after v22.8.16.32-lts [#48376](https://github.com/ClickHouse/ClickHouse/pull/48376) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Add script for a slack bot that reports broken tests [#48382](https://github.com/ClickHouse/ClickHouse/pull/48382) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix flaky `test_keeper_mntr_data_size` [#48384](https://github.com/ClickHouse/ClickHouse/pull/48384) ([Antonio Andelic](https://github.com/antonio2368)).
+* WITH FILL clarification and cleanup [#48395](https://github.com/ClickHouse/ClickHouse/pull/48395) ([Igor Nikonov](https://github.com/devcrafter)).
+* Cleanup mess in .clang-tidy [#48396](https://github.com/ClickHouse/ClickHouse/pull/48396) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix test_backup_all [#48400](https://github.com/ClickHouse/ClickHouse/pull/48400) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Find big allocations without memory limits checks  [#48401](https://github.com/ClickHouse/ClickHouse/pull/48401) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix issue with krb5 and building w/ OpenSSL [#48407](https://github.com/ClickHouse/ClickHouse/pull/48407) ([Boris Kuschel](https://github.com/bkuschel)).
+* Make CI slack bot less noisy [#48409](https://github.com/ClickHouse/ClickHouse/pull/48409) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* AST fuzzer: Fix assertion in TopK serialization [#48412](https://github.com/ClickHouse/ClickHouse/pull/48412) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix possible flakiness of lightweight delete tests (due to index granularity randomization) [#48413](https://github.com/ClickHouse/ClickHouse/pull/48413) ([Azat Khuzhin](https://github.com/azat)).
+* Fix flaky `test_keeper_snapshots` [#48417](https://github.com/ClickHouse/ClickHouse/pull/48417) ([Antonio Andelic](https://github.com/antonio2368)).
+* Update sort desc: more efficient original node search in ActionsDAG [#48427](https://github.com/ClickHouse/ClickHouse/pull/48427) ([Igor Nikonov](https://github.com/devcrafter)).
+* test for [#16399](https://github.com/ClickHouse/ClickHouse/issues/16399) [#48439](https://github.com/ClickHouse/ClickHouse/pull/48439) ([Denny Crane](https://github.com/den-crane)).
+* Better exception messages from Keeper client [#48444](https://github.com/ClickHouse/ClickHouse/pull/48444) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Small documentation follow-up to [#47246](https://github.com/ClickHouse/ClickHouse/issues/47246) [#48463](https://github.com/ClickHouse/ClickHouse/pull/48463) ([Robert Schulze](https://github.com/rschu1ze)).
+* Update 00002_log_and_exception_messages_formatting.sql [#48467](https://github.com/ClickHouse/ClickHouse/pull/48467) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Avoid operation on uninitialised data in readDateTimeTextImpl [#48472](https://github.com/ClickHouse/ClickHouse/pull/48472) ([Kruglov Pavel](https://github.com/Avogar)).
+* Add reading step for system zookeeper. Analyze path from filter DAG. [#48485](https://github.com/ClickHouse/ClickHouse/pull/48485) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix deadlock due to debug tracking of memory allocations [#48487](https://github.com/ClickHouse/ClickHouse/pull/48487) ([Azat Khuzhin](https://github.com/azat)).
+* Register datediff and trim aliases in system.functions [#48489](https://github.com/ClickHouse/ClickHouse/pull/48489) ([Robert Schulze](https://github.com/rschu1ze)).
+* Change error code [#48490](https://github.com/ClickHouse/ClickHouse/pull/48490) ([Anton Popov](https://github.com/CurtizJ)).
+* Update 00002_log_and_exception_messages_formatting.sql [#48499](https://github.com/ClickHouse/ClickHouse/pull/48499) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix query cache with sparse columns [#48500](https://github.com/ClickHouse/ClickHouse/pull/48500) ([Anton Popov](https://github.com/CurtizJ)).
+* Use std::string_view to get rid of strlen [#48509](https://github.com/ClickHouse/ClickHouse/pull/48509) ([ltrk2](https://github.com/ltrk2)).
+* Fix bytesSize() of zk SetRequest [#48512](https://github.com/ClickHouse/ClickHouse/pull/48512) ([Sergei Trifonov](https://github.com/serxa)).
+* Remove dead code and unused dependencies [#48518](https://github.com/ClickHouse/ClickHouse/pull/48518) ([ltrk2](https://github.com/ltrk2)).
+* Use forward declaration of ThreadPool [#48519](https://github.com/ClickHouse/ClickHouse/pull/48519) ([Azat Khuzhin](https://github.com/azat)).
+* Use std::string_view instead of strlen [#48520](https://github.com/ClickHouse/ClickHouse/pull/48520) ([ltrk2](https://github.com/ltrk2)).
+* Use std::string::starts_with instead of a roll your own variant [#48521](https://github.com/ClickHouse/ClickHouse/pull/48521) ([ltrk2](https://github.com/ltrk2)).
+* Fix flaky `test_alternative_keeper_config` [#48533](https://github.com/ClickHouse/ClickHouse/pull/48533) ([Antonio Andelic](https://github.com/antonio2368)).
+* Use one ThreadGroup while pushing to materialized views (and some refactoring for ThreadGroup) [#48543](https://github.com/ClickHouse/ClickHouse/pull/48543) ([Azat Khuzhin](https://github.com/azat)).
+* Fix some tests [#48550](https://github.com/ClickHouse/ClickHouse/pull/48550) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix 02477_projection_materialize_and_zero_copy flakiness (due to index granularity randomization) [#48551](https://github.com/ClickHouse/ClickHouse/pull/48551) ([Azat Khuzhin](https://github.com/azat)).
+* Better exception message for ZSTD [#48552](https://github.com/ClickHouse/ClickHouse/pull/48552) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove misleading comment and block [#48562](https://github.com/ClickHouse/ClickHouse/pull/48562) ([Sergei Trifonov](https://github.com/serxa)).
+* Update 02207_allow_plaintext_and_no_password.sh [#48566](https://github.com/ClickHouse/ClickHouse/pull/48566) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* bugfix: compare Bits and sizeof(Arithmetic) * 8 [#48569](https://github.com/ClickHouse/ClickHouse/pull/48569) ([caipengxiang](https://github.com/awfeequdng)).
+* Remove superfluous includes of logger_userful.h from headers [#48570](https://github.com/ClickHouse/ClickHouse/pull/48570) ([Azat Khuzhin](https://github.com/azat)).
+* Remove slow test from debug builds [#48574](https://github.com/ClickHouse/ClickHouse/pull/48574) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Don't use type conversion with String query parameters [#48577](https://github.com/ClickHouse/ClickHouse/pull/48577) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix TSan report in Kerberos [#48579](https://github.com/ClickHouse/ClickHouse/pull/48579) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add second_deadlock_stack=1 for TSan on CI and fix some lock-order-inversion problems [#48596](https://github.com/ClickHouse/ClickHouse/pull/48596) ([Azat Khuzhin](https://github.com/azat)).
+* Fix LOGICAL_ERROR in executable table function [#48605](https://github.com/ClickHouse/ClickHouse/pull/48605) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Fix flakiness of test_store_cleanup in case of image rebuild [#48610](https://github.com/ClickHouse/ClickHouse/pull/48610) ([Azat Khuzhin](https://github.com/azat)).
+* Remove strange code [#48612](https://github.com/ClickHouse/ClickHouse/pull/48612) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Minor refactoring of formatDateTime() [#48627](https://github.com/ClickHouse/ClickHouse/pull/48627) ([Robert Schulze](https://github.com/rschu1ze)).
+* Better handling of values too large for VarInt encoding [#48628](https://github.com/ClickHouse/ClickHouse/pull/48628) ([Robert Schulze](https://github.com/rschu1ze)).
+* refine some messages of exception in regexp tree [#48632](https://github.com/ClickHouse/ClickHouse/pull/48632) ([Han Fei](https://github.com/hanfei1991)).
+* Partially revert e0252db8d and fix pr-bugfix labeling [#48637](https://github.com/ClickHouse/ClickHouse/pull/48637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix build src/Interpreters/InterpreterInsertQuery.h [#48638](https://github.com/ClickHouse/ClickHouse/pull/48638) ([Vladimir C](https://github.com/vdimir)).
+* Fix build ThreadGroupPtr [#48641](https://github.com/ClickHouse/ClickHouse/pull/48641) ([Vladimir C](https://github.com/vdimir)).
+* Fix flaky test test_drop_replica_and_achieve_quorum [#48642](https://github.com/ClickHouse/ClickHouse/pull/48642) ([Kruglov Pavel](https://github.com/Avogar)).
+* fix 02504_regexp_dictionary_table_source [#48662](https://github.com/ClickHouse/ClickHouse/pull/48662) ([Han Fei](https://github.com/hanfei1991)).
+* Remove strange code from MutateTask [#48666](https://github.com/ClickHouse/ClickHouse/pull/48666) ([alesapin](https://github.com/alesapin)).
+* SonarCloud: C++ Reporting Standards [#48668](https://github.com/ClickHouse/ClickHouse/pull/48668) ([Julio Jimenez](https://github.com/juliojimenez)).
+* Remove lock for duplicated parts UUIDs (allow_experimental_query_deduplication=1) [#48670](https://github.com/ClickHouse/ClickHouse/pull/48670) ([Azat Khuzhin](https://github.com/azat)).
+* show result of minio listings for test test_attach_detach_partition [#48674](https://github.com/ClickHouse/ClickHouse/pull/48674) ([Sema Checherinda](https://github.com/CheSema)).
+* Fix tests for analyzer [#48675](https://github.com/ClickHouse/ClickHouse/pull/48675) ([Igor Nikonov](https://github.com/devcrafter)).
+* Call IProcessor::onCancel() once [#48687](https://github.com/ClickHouse/ClickHouse/pull/48687) ([Igor Nikonov](https://github.com/devcrafter)).
+* Update MergeTree syntax for optional index granularity argument [#48692](https://github.com/ClickHouse/ClickHouse/pull/48692) ([Robert Schulze](https://github.com/rschu1ze)).
+* Add test for old bug [#7826](https://github.com/ClickHouse/ClickHouse/issues/7826) [#48697](https://github.com/ClickHouse/ClickHouse/pull/48697) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix flaky `test_keeper_session` [#48699](https://github.com/ClickHouse/ClickHouse/pull/48699) ([Antonio Andelic](https://github.com/antonio2368)).
+* Better messages formatting in the CI Slack bot [#48712](https://github.com/ClickHouse/ClickHouse/pull/48712) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Add trusted contributors [#48715](https://github.com/ClickHouse/ClickHouse/pull/48715) ([Aleksei Filatov](https://github.com/aalexfvk)).
+* Do not remove broken detached parts on startup [#48730](https://github.com/ClickHouse/ClickHouse/pull/48730) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Remove `-Wshadow` suppression which leaked into global namespace [#48737](https://github.com/ClickHouse/ClickHouse/pull/48737) ([Robert Schulze](https://github.com/rschu1ze)).
+* VarInt coding: Always perform sanity check [#48740](https://github.com/ClickHouse/ClickHouse/pull/48740) ([Robert Schulze](https://github.com/rschu1ze)).
+* Try to fix flaky 02455_one_row_from_csv_memory_usage [#48756](https://github.com/ClickHouse/ClickHouse/pull/48756) ([Dmitry Novik](https://github.com/novikd)).
+* insert UInt32 Hashvalue in reverse order on big endian machine [#48764](https://github.com/ClickHouse/ClickHouse/pull/48764) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
+* Limit size of messages from the CI slack bot [#48766](https://github.com/ClickHouse/ClickHouse/pull/48766) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Update README.md [#48776](https://github.com/ClickHouse/ClickHouse/pull/48776) ([Tyler Hannan](https://github.com/tylerhannan)).
+* Remove duplicate definition of SingleEndpointHTTPSessionPool [#48779](https://github.com/ClickHouse/ClickHouse/pull/48779) ([JaySon](https://github.com/JaySon-Huang)).
+* Fix flaky test_version_update_after_mutation/test.py::test_upgrade_while_mutation [#48783](https://github.com/ClickHouse/ClickHouse/pull/48783) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix flaky test test_backup_all [#48789](https://github.com/ClickHouse/ClickHouse/pull/48789) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix a confusing warning about interserver mode [#48793](https://github.com/ClickHouse/ClickHouse/pull/48793) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Store clusters from ClusterDiscovery in separate map [#48795](https://github.com/ClickHouse/ClickHouse/pull/48795) ([Vladimir C](https://github.com/vdimir)).
+* Reimplement [#48790](https://github.com/ClickHouse/ClickHouse/issues/48790) [#48797](https://github.com/ClickHouse/ClickHouse/pull/48797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Allow running integration tests without spark [#48803](https://github.com/ClickHouse/ClickHouse/pull/48803) ([Vitaly Baranov](https://github.com/vitlibar)).
+* forbid gwpsan in debug mode to rescue stress tests [#48804](https://github.com/ClickHouse/ClickHouse/pull/48804) ([Han Fei](https://github.com/hanfei1991)).
+* Simplify FileCacheFactory [#48805](https://github.com/ClickHouse/ClickHouse/pull/48805) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix double whitespace in exception message [#48815](https://github.com/ClickHouse/ClickHouse/pull/48815) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Add a test for [#38128](https://github.com/ClickHouse/ClickHouse/issues/38128) [#48817](https://github.com/ClickHouse/ClickHouse/pull/48817) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove excessive logging [#48826](https://github.com/ClickHouse/ClickHouse/pull/48826) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* remove duplicate indentwith in clang-format [#48834](https://github.com/ClickHouse/ClickHouse/pull/48834) ([cluster](https://github.com/infdahai)).
+* Try fix flacky test_concurrent_alter_move_and_drop [#48843](https://github.com/ClickHouse/ClickHouse/pull/48843) ([Sergei Trifonov](https://github.com/serxa)).
+* fix the race wait loading parts [#48844](https://github.com/ClickHouse/ClickHouse/pull/48844) ([Sema Checherinda](https://github.com/CheSema)).
+* suppress assert of progress for test_system_replicated_fetches [#48856](https://github.com/ClickHouse/ClickHouse/pull/48856) ([Han Fei](https://github.com/hanfei1991)).
+* Fix: do not run test_store_cleanup_disk_s3 in parallel [#48863](https://github.com/ClickHouse/ClickHouse/pull/48863) ([Igor Nikonov](https://github.com/devcrafter)).
+* Update README.md [#48883](https://github.com/ClickHouse/ClickHouse/pull/48883) ([Tyler Hannan](https://github.com/tylerhannan)).
+* Fix test reference files for join using nullable column [#48893](https://github.com/ClickHouse/ClickHouse/pull/48893) ([Vladimir C](https://github.com/vdimir)).
+* bitNot marked as NO_SANITIZE_UNDEFINED [#48899](https://github.com/ClickHouse/ClickHouse/pull/48899) ([Vladimir C](https://github.com/vdimir)).
+* Fix order by in test_storage_delta [#48903](https://github.com/ClickHouse/ClickHouse/pull/48903) ([Vladimir C](https://github.com/vdimir)).
+* Fix segfault when set is not built yet [#48904](https://github.com/ClickHouse/ClickHouse/pull/48904) ([Alexander Gololobov](https://github.com/davenger)).
+* A non significant change (does not affect anything): add support for signed integers in the maskBits function [#48920](https://github.com/ClickHouse/ClickHouse/pull/48920) ([caipengxiang](https://github.com/awfeequdng)).
+* Follow-up to [#48866](https://github.com/ClickHouse/ClickHouse/issues/48866) [#48929](https://github.com/ClickHouse/ClickHouse/pull/48929) ([Robert Schulze](https://github.com/rschu1ze)).
+* Un-flake 01079_new_range_reader_segfault [#48934](https://github.com/ClickHouse/ClickHouse/pull/48934) ([Robert Schulze](https://github.com/rschu1ze)).
+* Add building stage to the fasttests report, respect existing status on rerun [#48935](https://github.com/ClickHouse/ClickHouse/pull/48935) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Update Settings.h [#48948](https://github.com/ClickHouse/ClickHouse/pull/48948) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Update cluster.py [#48949](https://github.com/ClickHouse/ClickHouse/pull/48949) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Docs: Replace annoying three spaces in enumerations by a single space [#48951](https://github.com/ClickHouse/ClickHouse/pull/48951) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix flaky 02706_arrow_different_dictionaries [#48952](https://github.com/ClickHouse/ClickHouse/pull/48952) ([Kruglov Pavel](https://github.com/Avogar)).
+* Use default `{replica}`, `{shard}` arguments in Replicated engine [#48961](https://github.com/ClickHouse/ClickHouse/pull/48961) ([Nikolay Degterinsky](https://github.com/evillique)).
+* Rename quantileApprox -> quantileGK [#48969](https://github.com/ClickHouse/ClickHouse/pull/48969) ([Vladimir C](https://github.com/vdimir)).
+* Don't throw logical error when column is not found in Parquet/Arrow schema [#48987](https://github.com/ClickHouse/ClickHouse/pull/48987) ([Kruglov Pavel](https://github.com/Avogar)).
+* Reimplement [#48986](https://github.com/ClickHouse/ClickHouse/issues/48986) [#49005](https://github.com/ClickHouse/ClickHouse/pull/49005) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Dont allow bad changelogs [#49006](https://github.com/ClickHouse/ClickHouse/pull/49006) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Update README.md [#49007](https://github.com/ClickHouse/ClickHouse/pull/49007) ([Nick-71](https://github.com/Nick-71)).
+* Remove outdated test [#49014](https://github.com/ClickHouse/ClickHouse/pull/49014) ([alesapin](https://github.com/alesapin)).
+* Fix typo [#49027](https://github.com/ClickHouse/ClickHouse/pull/49027) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix assertion after [#48636](https://github.com/ClickHouse/ClickHouse/issues/48636) [#49029](https://github.com/ClickHouse/ClickHouse/pull/49029) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix build error for big-endian platforms [#49037](https://github.com/ClickHouse/ClickHouse/pull/49037) ([ltrk2](https://github.com/ltrk2)).
+* Update version_date.tsv and changelogs after v22.8.17.17-lts [#49046](https://github.com/ClickHouse/ClickHouse/pull/49046) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Update version_date.tsv and changelogs after v23.1.7.30-stable [#49047](https://github.com/ClickHouse/ClickHouse/pull/49047) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Update version_date.tsv and changelogs after v23.3.2.37-lts [#49048](https://github.com/ClickHouse/ClickHouse/pull/49048) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Remove some code [#49054](https://github.com/ClickHouse/ClickHouse/pull/49054) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove some dead code in poco [#49075](https://github.com/ClickHouse/ClickHouse/pull/49075) ([Robert Schulze](https://github.com/rschu1ze)).
+* Prevent false positive report by static analyzer [#49078](https://github.com/ClickHouse/ClickHouse/pull/49078) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Update version_date.tsv and changelogs after v23.2.6.34-stable [#49080](https://github.com/ClickHouse/ClickHouse/pull/49080) ([robot-clickhouse](https://github.com/robot-clickhouse)).
+* Enforce documentation change for a new-feature PR [#49090](https://github.com/ClickHouse/ClickHouse/pull/49090) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Update clickhouse-test [#49094](https://github.com/ClickHouse/ClickHouse/pull/49094) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Disable long 02581 in debug, enable with sanitizers [#49105](https://github.com/ClickHouse/ClickHouse/pull/49105) ([Alexander Gololobov](https://github.com/davenger)).
+* Fix flaky integration test test_async_query_sending [#49107](https://github.com/ClickHouse/ClickHouse/pull/49107) ([Kruglov Pavel](https://github.com/Avogar)).
+* Correct functional test to reflect interoperability [#49108](https://github.com/ClickHouse/ClickHouse/pull/49108) ([ltrk2](https://github.com/ltrk2)).
+* Cleanup build guide [#49119](https://github.com/ClickHouse/ClickHouse/pull/49119) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix building iceberg without avro [#49125](https://github.com/ClickHouse/ClickHouse/pull/49125) ([Azat Khuzhin](https://github.com/azat)).
+* Add slash for close tag of user_defined_zookeeper_path [#49131](https://github.com/ClickHouse/ClickHouse/pull/49131) ([Hollin](https://github.com/Hooollin)).
+* Improve some lambdas [#49133](https://github.com/ClickHouse/ClickHouse/pull/49133) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Do not randomize prefetch settings for debug build [#49134](https://github.com/ClickHouse/ClickHouse/pull/49134) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Don't throw LOGICAL_ERROR when reading from remote if there is no local replica [#49136](https://github.com/ClickHouse/ClickHouse/pull/49136) ([Raúl Marín](https://github.com/Algunenano)).
+* Docs: Make caption of processors_profile_log page consistent with other pages [#49138](https://github.com/ClickHouse/ClickHouse/pull/49138) ([Robert Schulze](https://github.com/rschu1ze)).
+* Improve test reports [#49151](https://github.com/ClickHouse/ClickHouse/pull/49151) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Add a note regarding private/public repo to logs [#49152](https://github.com/ClickHouse/ClickHouse/pull/49152) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* suppress two timeout tests [#49175](https://github.com/ClickHouse/ClickHouse/pull/49175) ([Han Fei](https://github.com/hanfei1991)).
+* Document makeDateTime() and its variants [#49183](https://github.com/ClickHouse/ClickHouse/pull/49183) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix after [#49110](https://github.com/ClickHouse/ClickHouse/issues/49110) [#49206](https://github.com/ClickHouse/ClickHouse/pull/49206) ([Kseniia Sumarokova](https://github.com/kssenii)).
+
diff --git a/docs/changelogs/v23.4.2.11-stable.md b/docs/changelogs/v23.4.2.11-stable.md
new file mode 100644
index 00000000000..3c572b9c1cb
--- /dev/null
+++ b/docs/changelogs/v23.4.2.11-stable.md
@@ -0,0 +1,20 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.4.2.11-stable (b6442320f9d) FIXME as compared to v23.4.1.1943-stable (3920eb987f7)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Revert "Fix GCS native copy ([#48981](https://github.com/ClickHouse/ClickHouse/issues/48981))" [#49194](https://github.com/ClickHouse/ClickHouse/pull/49194) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix race on Outdated parts loading [#49223](https://github.com/ClickHouse/ClickHouse/pull/49223) ([Alexander Tokmakov](https://github.com/tavplubix)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Implement status comment [#48468](https://github.com/ClickHouse/ClickHouse/pull/48468) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Update curl to 8.0.1 (for CVEs) [#48765](https://github.com/ClickHouse/ClickHouse/pull/48765) ([Boris Kuschel](https://github.com/bkuschel)).
+* Fallback auth gh api [#49314](https://github.com/ClickHouse/ClickHouse/pull/49314) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/docs/en/development/build.md b/docs/en/development/build.md
index 6df46d9dec7..a55d44bdf93 100644
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@@ -13,23 +13,23 @@ Supported platforms:
 - AArch64
 - Power9 (experimental)
 
-## Normal Build for Development on Ubuntu
+## Building on Ubuntu
 
-The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution.
+The following tutorial is based on Ubuntu Linux.
+With appropriate changes, it should also work on any other Linux distribution.
+The minimum recommended Ubuntu version for development is 22.04 LTS.
 
 ### Install Prerequisites {#install-prerequisites}
 
 ``` bash
-sudo apt-get install git cmake ccache python3 ninja-build yasm gawk
+sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk
 ```
 
-Or cmake3 instead of cmake on older systems.
+### Install and Use the Clang compiler
 
-### Install the latest clang (recommended)
+On Ubuntu/Debian you can use LLVM's automatic installation script, see [here](https://apt.llvm.org/).
 
-On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/))
-
-```bash
+``` bash
 sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
 ```
 
@@ -40,19 +40,17 @@ sudo apt-get install software-properties-common
 sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
 ```
 
-For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html).
+For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
 
-#### Use the latest clang for Builds
+As of April 2023, any version of Clang >= 15 will work.
+GCC as a compiler is not supported
+To build with a specific Clang version:
 
 ``` bash
 export CC=clang-15
 export CXX=clang++-15
 ```
 
-In this example we use version 15 that is the latest as of Sept 2022.
-
-Gcc cannot be used.
-
 ### Checkout ClickHouse Sources {#checkout-clickhouse-sources}
 
 ``` bash
@@ -70,79 +68,46 @@ git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHo
 ``` bash
 cd ClickHouse
 mkdir build
-cd build
-cmake ..
-ninja
+cmake -S . -B build
+cmake --build build  # or: `cd build; ninja`
 ```
 
-To create an executable, run `ninja clickhouse`.
-This will create the `programs/clickhouse` executable, which can be used with `client` or `server` arguments.
+To create an executable, run `cmake --build build --target clickhouse` (or: `cd build; ninja clickhouse`).
+This will create executable `build/programs/clickhouse` which can be used with `client` or `server` arguments.
 
-## How to Build ClickHouse on Any Linux {#how-to-build-clickhouse-on-any-linux}
+## Building on Any Linux {#how-to-build-clickhouse-on-any-linux}
 
 The build requires the following components:
 
-- Git (is used only to checkout the sources, it’s not needed for the build)
-- CMake 3.15 or newer
+- Git (used to checkout the sources, not needed for the build)
+- CMake 3.20 or newer
+- Compiler: Clang 15 or newer
+- Linker: lld 15 or newer
 - Ninja
-- C++ compiler: clang-15 or newer
-- Linker: lld
 - Yasm
 - Gawk
 
 If all the components are installed, you may build in the same way as the steps above.
 
-Example for Ubuntu Eoan:
-``` bash
-sudo apt update
-sudo apt install git cmake ninja-build clang++ python yasm gawk
-git clone --recursive https://github.com/ClickHouse/ClickHouse.git
-mkdir build && cd build
-cmake ../ClickHouse
-ninja
-```
-
 Example for OpenSUSE Tumbleweed:
+
 ``` bash
-sudo zypper install git cmake ninja clang-c++ python lld yasm gawk
+sudo zypper install git cmake ninja clang-c++ python lld nasm yasm gawk
 git clone --recursive https://github.com/ClickHouse/ClickHouse.git
-mkdir build && cd build
-cmake ../ClickHouse
-ninja
+mkdir build
+cmake -S . -B build
+cmake --build build
 ```
 
 Example for Fedora Rawhide:
+
 ``` bash
 sudo yum update
-sudo yum --nogpg install git cmake make clang python3 ccache yasm gawk
+sudo yum --nogpg install git cmake make clang python3 ccache nasm yasm gawk
 git clone --recursive https://github.com/ClickHouse/ClickHouse.git
-mkdir build && cd build
-cmake ../ClickHouse
-make -j $(nproc)
-```
-
-Here is an example of how to build `clang` and all the llvm infrastructure from sources:
-
-```
-git clone git@github.com:llvm/llvm-project.git
-mkdir llvm-build && cd llvm-build
-cmake -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all ../llvm-project/llvm/
-make -j16
-sudo make install
-hash clang
-clang --version
-```
-
-You can install the older clang like clang-11 from packages and then use it to build the new clang from sources.
-
-Here is an example of how to install the new `cmake` from the official website:
-
-```
-wget https://github.com/Kitware/CMake/releases/download/v3.22.2/cmake-3.22.2-linux-x86_64.sh
-chmod +x cmake-3.22.2-linux-x86_64.sh
-./cmake-3.22.2-linux-x86_64.sh
-export PATH=/home/milovidov/work/cmake-3.22.2-linux-x86_64/bin/:${PATH}
-hash cmake
+mkdir build
+cmake -S . -B build
+cmake --build build
 ```
 
 ## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index cde09d79cd8..cf887a498ea 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -155,6 +155,9 @@ The following settings can be specified in configuration file for given endpoint
 - `no_sign_request` - Ignore all the credentials so requests are not signed. Useful for accessing public buckets.
 - `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
 - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional.
+- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
+- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
+- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
 - `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional.
 - `max_put_rps`, `max_put_burst`, `max_get_rps` and `max_get_burst` - Throttling settings (see description above) to use for specific endpoint instead of per query. Optional.
 
@@ -173,6 +176,9 @@ The following settings can be specified in configuration file for given endpoint
         <!-- <no_sign_request>false</no_sign_request> -->
         <!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
         <!-- <server_side_encryption_customer_key_base64>BASE64-ENCODED-KEY</server_side_encryption_customer_key_base64> -->
+        <!-- <server_side_encryption_kms_key_id>KMS_KEY_ID</server_side_encryption_kms_key_id> -->
+        <!-- <server_side_encryption_kms_encryption_context>KMS_ENCRYPTION_CONTEXT</server_side_encryption_kms_encryption_context> -->
+        <!-- <server_side_encryption_kms_bucket_key_enabled>true</server_side_encryption_kms_bucket_key_enabled> -->
         <!-- <max_single_read_retries>4</max_single_read_retries> -->
     </endpoint-name>
 </s3>
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index d5189d4b9d9..4044087256c 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -439,6 +439,50 @@ Syntax: `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions,
 - `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
 - `random_seed` — The seed for Bloom filter hash functions.
 
+Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows:  
+
+```sql
+CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster]   
+AS  
+(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2));   
+  
+CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster]   
+AS  
+(total_nubmer_of_all_grams,  probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2))));  
+    
+CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster]  
+AS   
+(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions);  
+  
+CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster]   
+AS  
+(number_of_hash_functions, probability_of_false_positives, size_of_bloom_filter_in_bytes) -> ceil(size_of_bloom_filter_in_bytes / (-number_of_hash_functions / log(1 - exp(log(probability_of_false_positives) / number_of_hash_functions))))
+
+```  
+To use those functions,we need to specify two parameter at least.
+For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries:   
+  
+
+```sql
+--- estimate number of bits in the filter
+SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes;  
+
+┌─size_of_bloom_filter_in_bytes─┐
+│                         10304 │
+└───────────────────────────────┘
+  
+--- estimate number of hash functions
+SELECT bfEstimateFunctions(4300, bfEstimateBmSize(4300, 0.0001)) as number_of_hash_functions
+  
+┌─number_of_hash_functions─┐
+│                       13 │
+└──────────────────────────┘
+
+```
+Of course, you can also use those functions to estimate parameters by other conditions.
+The functions refer to the content [here](https://hur.st/bloomfilter).
+
+
 #### Token Bloom Filter
 
 The same as `ngrambf_v1`, but stores tokens instead of ngrams. Tokens are sequences separated by non-alphanumeric characters.
@@ -731,7 +775,13 @@ The names given to the described entities can be found in the system tables, [sy
 
 ### Configuration {#table_engine-mergetree-multiple-volumes_configure}
 
-Disks, volumes and storage policies should be declared inside the `<storage_configuration>` tag either in the main file `config.xml` or in a distinct file in the `config.d` directory.
+Disks, volumes and storage policies should be declared inside the `<storage_configuration>` tag either in a file in the `config.d` directory.
+
+:::tip
+Disks can also be declared in the `SETTINGS` section of a query.  This is useful
+for adhoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
+See [dynamic storage](#dynamic-storage) for more details.
+:::
 
 Configuration structure:
 
@@ -876,6 +926,87 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF
 
 The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting.
 
+### Dynamic Storage
+
+This example query shows how to attach a table stored at a URL and configure the
+remote storage within the query. The web storage is not configured in the ClickHouse
+configuration files; all the settings are in the CREATE/ATTACH query.
+
+:::note
+The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
+:::
+
+```sql
+ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
+(
+    price UInt32,
+    date Date,
+    postcode1 LowCardinality(String),
+    postcode2 LowCardinality(String),
+    type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
+    is_new UInt8,
+    duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
+    addr1 String,
+    addr2 String,
+    street LowCardinality(String),
+    locality LowCardinality(String),
+    town LowCardinality(String),
+    district LowCardinality(String),
+    county LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2)
+  # highlight-start
+  SETTINGS disk = disk(
+      type=web,
+      endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
+      );
+  # highlight-end
+```
+
+### Nested Dynamic Storage
+
+This example query builds on the above dynamic disk configuration and shows how to
+use a local disk to cache data from a table stored at a URL. Neither the cache disk
+nor the web storage is configured in the ClickHouse configuration files; both are
+configured in the CREATE/ATTACH query settings.
+
+In the settings highlighted below notice that the disk of `type=web` is nested within 
+the disk of `type=cache`.
+
+```sql
+ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
+(
+    price UInt32,
+    date Date,
+    postcode1 LowCardinality(String),
+    postcode2 LowCardinality(String),
+    type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
+    is_new UInt8,
+    duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
+    addr1 String,
+    addr2 String,
+    street LowCardinality(String),
+    locality LowCardinality(String),
+    town LowCardinality(String),
+    district LowCardinality(String),
+    county LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2)
+  # highlight-start
+  SETTINGS disk = disk(
+    type=cache,
+    max_size='1Gi',
+    path='/var/lib/clickhouse/custom_disk_cache/',
+    disk=disk(
+      type=web,
+      endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
+      )
+  );
+  # highlight-end
+```
+
 ### Details {#details}
 
 In the case of `MergeTree` tables, data is getting to disk in different ways:
@@ -924,7 +1055,11 @@ Configuration markup:
             <access_key_id>your_access_key_id</access_key_id>
             <secret_access_key>your_secret_access_key</secret_access_key>
             <region></region>
+            <header>Authorization: Bearer SOME-TOKEN</header>
             <server_side_encryption_customer_key_base64>your_base64_encoded_customer_key</server_side_encryption_customer_key_base64>
+            <server_side_encryption_kms_key_id>your_kms_key_id</server_side_encryption_kms_key_id>
+            <server_side_encryption_kms_encryption_context>your_kms_encryption_context</server_side_encryption_kms_encryption_context>
+            <server_side_encryption_kms_bucket_key_enabled>true</server_side_encryption_kms_bucket_key_enabled>
             <proxy>
                 <uri>http://proxy1</uri>
                 <uri>http://proxy2</uri>
@@ -975,7 +1110,11 @@ Optional parameters:
 - `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
 - `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
 - `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
+- `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
 - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
+- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
+- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
+- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
 - `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
 - `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
 - `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md
index 4ebaaf8a82d..9bf4a465962 100644
--- a/docs/en/interfaces/cli.md
+++ b/docs/en/interfaces/cli.md
@@ -119,7 +119,7 @@ When processing a query, the client shows:
 1.  Progress, which is updated no more than 10 times per second (by default). For quick queries, the progress might not have time to be displayed.
 2.  The formatted query after parsing, for debugging.
 3.  The result in the specified format.
-4.  The number of lines in the result, the time passed, and the average speed of query processing.
+4.  The number of lines in the result, the time passed, and the average speed of query processing. All data amounts refer to uncompressed data.
 
 You can cancel a long query by pressing Ctrl+C. However, you will still need to wait for a little for the server to abort the request. It is not possible to cancel a query at certain stages. If you do not wait and press Ctrl+C a second time, the client will exit.
 
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 02145a2fb6c..267f37fd075 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -1324,7 +1324,7 @@ The trailing slash is mandatory.
 <path>/var/lib/clickhouse/</path>
 ```
 
-## prometheus {#server_configuration_parameters-prometheus}
+## Prometheus {#server_configuration_parameters-prometheus}
 
 Exposing metrics data for scraping from [Prometheus](https://prometheus.io).
 
@@ -1339,13 +1339,25 @@ Settings:
 **Example**
 
 ``` xml
- <prometheus>
-    <endpoint>/metrics</endpoint>
-    <port>9363</port>
-    <metrics>true</metrics>
-    <events>true</events>
-    <asynchronous_metrics>true</asynchronous_metrics>
-</prometheus>
+<clickhouse>
+    <listen_host>0.0.0.0</listen_host>
+    <http_port>8123</http_port>
+    <tcp_port>9000</tcp_port>
+    <!-- highlight-start -->
+    <prometheus>
+        <endpoint>/metrics</endpoint>
+        <port>9363</port>
+        <metrics>true</metrics>
+        <events>true</events>
+        <asynchronous_metrics>true</asynchronous_metrics>
+    </prometheus>
+    <!-- highlight-end -->
+</clickhouse>
+```
+
+Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server):
+```bash
+curl 127.0.0.1:9363/metrics
 ```
 
 ## query_log {#server_configuration_parameters-query-log}
@@ -2056,3 +2068,20 @@ Possible values:
 - Positive integer.
 
 Default value: `10000`.
+
+## display_secrets_in_show_and_select {#display_secrets_in_show_and_select}
+
+Enables or disables showing secrets in `SHOW` and `SELECT` queries for tables, databases,
+table functions, and dictionaries.
+
+User wishing to see secrets must also have
+[`format_display_secrets_in_show_and_select` format setting](../settings/formats#format_display_secrets_in_show_and_select)
+turned on and a
+[`displaySecretsInShowAndSelect`](../../sql-reference/statements/grant#grant-display-secrets) privilege.
+
+Possible values:
+
+-   0 — Disabled.
+-   1 — Enabled.
+
+Default value: 0.
diff --git a/docs/en/operations/settings/constraints-on-settings.md b/docs/en/operations/settings/constraints-on-settings.md
index 83ef46053a4..1895a79cd3e 100644
--- a/docs/en/operations/settings/constraints-on-settings.md
+++ b/docs/en/operations/settings/constraints-on-settings.md
@@ -40,7 +40,7 @@ If the user tries to violate the constraints an exception is thrown and the sett
 There are supported few types of constraints: `min`, `max`, `readonly` (with alias `const`) and `changeable_in_readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` or `const` constraint specifies that the user cannot change the corresponding setting at all. The `changeable_in_readonly` constraint type allows user to change the setting within `min`/`max` range even if `readonly` setting is set to 1, otherwise settings are not allow to be changed in `readonly=1` mode. Note that `changeable_in_readonly` is supported only if `settings_constraints_replace_previous` is enabled:
 ``` xml
 <access_control_improvements>
-  <settings_constraints_replace_previous>true<settings_constraints_replace_previous>
+  <settings_constraints_replace_previous>true</settings_constraints_replace_previous>
 </access_control_improvements>
 ```
 
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index ef4bbeeba89..3b87b829c92 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -7,6 +7,23 @@ toc_max_heading_level: 2
 
 # Format settings {#format-settings}
 
+## format_display_secrets_in_show_and_select {#format_display_secrets_in_show_and_select}
+
+Enables or disables showing secrets in `SHOW` and `SELECT` queries for tables, databases,
+table functions, and dictionaries.
+
+User wishing to see secrets must also have
+[`display_secrets_in_show_and_select` server setting](../server-configuration-parameters/settings#display_secrets_in_show_and_select)
+turned on and a
+[`displaySecretsInShowAndSelect`](../../sql-reference/statements/grant#grant-display-secrets) privilege.
+
+Possible values:
+
+-   0 — Disabled.
+-   1 — Enabled.
+
+Default value: 0.
+
 ## input_format_skip_unknown_fields {#input_format_skip_unknown_fields}
 
 Enables or disables skipping insertion of extra data.
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 66a24ac3fca..c6fdcf317c3 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -890,7 +890,7 @@ Write time that processor spent during execution/waiting for data to `system.pro
 
 See also:
 
-- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md#system-processors_profile_log)
+- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md)
 - [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)
 
 ## max_insert_block_size {#settings-max_insert_block_size}
diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md
index 4b1e75c25a1..deb9a0aaeb3 100644
--- a/docs/en/operations/system-tables/clusters.md
+++ b/docs/en/operations/system-tables/clusters.md
@@ -20,6 +20,9 @@ Columns:
 - `errors_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times this host failed to reach replica.
 - `slowdowns_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of slowdowns that led to changing replica when establishing a connection with hedged requests.
 - `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed and it is considered to be back to normal.
+- `database_shard_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database).
+- `database_replica_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database).
+- `is_active` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) — The status of the `Replicated` database replica (for clusters that belong to a `Replicated` database): 1 means "replica is online", 0 means "replica is offline", `NULL` means "unknown".
 
 **Example**
 
@@ -47,6 +50,9 @@ default_database:
 errors_count:            0
 slowdowns_count:         0
 estimated_recovery_time: 0
+database_shard_name:
+database_replica_name:
+is_active:               NULL
 
 Row 2:
 ──────
@@ -63,6 +69,9 @@ default_database:
 errors_count:            0
 slowdowns_count:         0
 estimated_recovery_time: 0
+database_shard_name:
+database_replica_name:
+is_active:               NULL
 ```
 
 **See Also**
diff --git a/docs/en/operations/system-tables/processors_profile_log.md b/docs/en/operations/system-tables/processors_profile_log.md
index e849525e495..a6ff15642a1 100644
--- a/docs/en/operations/system-tables/processors_profile_log.md
+++ b/docs/en/operations/system-tables/processors_profile_log.md
@@ -1,4 +1,4 @@
-# system.processors_profile_log {#system-processors_profile_log}
+# processors_profile_log
 
 This table contains profiling on processors level (that you can find in [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)).
 
@@ -73,4 +73,4 @@ Here you can see:
 
 **See Also**
 
-- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)
\ No newline at end of file
+- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)
diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md
index a90fa01a45d..58cdb82d31f 100644
--- a/docs/en/operations/system-tables/users.md
+++ b/docs/en/operations/system-tables/users.md
@@ -12,7 +12,7 @@ Columns:
 
 - `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of users. Configured in the `access_control_path` parameter.
 
-- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password.
+- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://en.wikipedia.org/wiki/SHA-2)-encoded password, with [double SHA-1](https://en.wikipedia.org/wiki/SHA-1)-encoded password or with [bcrypt](https://en.wikipedia.org/wiki/Bcrypt)-encoded password.
 
 - `auth_params` ([String](../../sql-reference/data-types/string.md)) — Authentication parameters in the JSON format depending on the `auth_type`.
 
diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index f64d8337387..a7ecadf19fa 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -41,9 +41,9 @@ If the file is sitting on the same machine as `clickhouse-local`, use the `file`
 ```
 
 ClickHouse knows the file uses a tab-separated format from filename extension. If you need to explicitly specify the format, simply add one of the [many ClickHouse input formats](../../interfaces/formats.md):
-    ```bash
-    ./clickhouse local -q "SELECT * FROM file('reviews.tsv', 'TabSeparated')"
-    ```
+```bash
+./clickhouse local -q "SELECT * FROM file('reviews.tsv', 'TabSeparated')"
+```
 
 The `file` table function creates a table, and you can use `DESCRIBE` to see the inferred schema:
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md
new file mode 100644
index 00000000000..3da9645181e
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md
@@ -0,0 +1,118 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest
+sidebar_position: 300
+sidebar_label: kolmogorovSmirnovTest
+---
+
+# kolmogorovSmirnovTest
+
+Applies Kolmogorov-Smirnov's test to samples from two populations.
+
+**Syntax**
+
+``` sql
+kolmogorovSmirnovTest([alternative, computation_method])(sample_data, sample_index)
+```
+
+Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
+Samples must belong to continuous, one-dimensional probability distributions.
+
+**Arguments**
+
+- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Parameters**
+
+- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
+    Let F(x) and G(x) be the CDFs of the first and second distributions respectively.
+    - `'two-sided'`
+        The null hypothesis is that samples come from the same distribution, e.g. F(x) = G(x) for all x.
+        And the alternative is that the distributions are not identical.
+    - `'greater'`
+        The null hypothesis is that values in the first sample are *stohastically smaller* than those in the second one,
+        e.g. the CDF of first distribution lies above and hence to the left of that for the second one.
+        Which in fact means that F(x) >= G(x) for all x. And the alternative in this case is that F(x) < G(x) for at least one x.
+    - `'less'`.
+        The null hypothesis is that values in the first sample are *stohastically greater* than those in the second one,
+        e.g. the CDF of first distribution lies below and hence to the right of that for the second one.
+        Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x.
+- `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md).
+    - `'exact'` - calculation is performed using precise probability distribution of the test statistics. Compute intensive and wasteful except for small samples.
+    - `'asymp'` (`'asymptotic'`) - calculation is performed using an approximation. For large sample sizes, the exact and asymptotic p-values are very similar.
+    - `'auto'`  - the `'exact'` method is used when a maximum number of samples is less than 10'000.
+
+
+**Returned values**
+
+[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+
+- calculated statistic. [Float64](../../../sql-reference/data-types/float.md).
+- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Example**
+
+Query:
+
+``` sql
+SELECT kolmogorovSmirnovTest('less', 'exact')(value, num)
+FROM
+(
+    SELECT
+        randNormal(0, 10) AS value,
+        0 AS num
+    FROM numbers(10000)
+    UNION ALL
+    SELECT
+        randNormal(0, 10) AS value,
+        1 AS num
+    FROM numbers(10000)
+)
+```
+
+Result:
+
+``` text
+┌─kolmogorovSmirnovTest('less', 'exact')(value, num)─┐
+│ (0.009899999999999996,0.37528595205132287)         │
+└────────────────────────────────────────────────────┘
+```
+
+Note:
+P-value is bigger than 0.05 (for confidence level of 95%), so null hypothesis is not rejected.
+
+
+Query:
+
+``` sql
+SELECT kolmogorovSmirnovTest('two-sided', 'exact')(value, num)
+FROM
+(
+    SELECT
+        randStudentT(10) AS value,
+        0 AS num
+    FROM numbers(100)
+    UNION ALL
+    SELECT
+        randNormal(0, 10) AS value,
+        1 AS num
+    FROM numbers(100)
+)
+```
+
+Result:
+
+``` text
+┌─kolmogorovSmirnovTest('two-sided', 'exact')(value, num)─┐
+│ (0.4100000000000002,6.61735760482795e-8)                │
+└─────────────────────────────────────────────────────────┘
+```
+
+Note:
+P-value is less than 0.05 (for confidence level of 95%), so null hypothesis is rejected.
+
+
+**See Also**
+
+- [Kolmogorov-Smirnov'test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test)
diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md
index c61a3069db6..2ad8ac4bb23 100644
--- a/docs/en/sql-reference/data-types/index.md
+++ b/docs/en/sql-reference/data-types/index.md
@@ -27,7 +27,7 @@ ClickHouse data types include:
 - **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results
 - **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell
 - **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type.
-- **Nullable**: [`Nullbale`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
+- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
 - **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses
 - **Geo types**: for[ geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon`
 - **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md)
\ No newline at end of file
diff --git a/docs/en/sql-reference/data-types/nullable.md b/docs/en/sql-reference/data-types/nullable.md
index 230b4af7960..28180f7f991 100644
--- a/docs/en/sql-reference/data-types/nullable.md
+++ b/docs/en/sql-reference/data-types/nullable.md
@@ -8,7 +8,7 @@ sidebar_label: Nullable
 
 Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`.
 
-For a `TypeName`, you can’t use composite data types [Array](../../sql-reference/data-types/array.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`.
+For a `TypeName`, you can’t use composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`.
 
 A `Nullable` type field can’t be included in table indexes.
 
diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md
index 48a8ce45d33..189673cdae7 100644
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@@ -1658,6 +1658,7 @@ Example of settings:
         <password></password>
         <db>test</db>
         <collection>dictionary_source</collection>
+        <options>ssl=true</options>
     </mongodb>
 </source>
 ```
@@ -1672,6 +1673,7 @@ SOURCE(MONGODB(
     password ''
     db 'test'
     collection 'dictionary_source'
+    options 'ssl=true'
 ))
 ```
 
@@ -1683,6 +1685,8 @@ Setting fields:
 - `password` – Password of the MongoDB user.
 - `db` – Name of the database.
 - `collection` – Name of the collection.
+- `options` -  MongoDB connection string options (optional parameter).
+
 
 ### Redis
 
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index c22b46a7eea..15644b54c2b 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -24,6 +24,90 @@ SELECT
 └─────────────────────┴────────────┴────────────┴─────────────────────┘
 ```
 
+## makeDate
+
+Creates a [Date](../../sql-reference/data-types/date.md) from a year, month and day argument.
+
+**Syntax**
+
+``` sql
+makeDate(year, month, day)
+```
+
+**Arguments**
+
+- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+
+**Returned value**
+
+- A date created from the arguments.
+
+Type: [Date](../../sql-reference/data-types/date.md).
+
+**Example**
+
+``` sql
+SELECT makeDate(2023, 2, 28) AS Date;
+```
+
+Result:
+
+``` text
+┌───────date─┐
+│ 2023-02-28 │
+└────────────┘
+```
+
+## makeDate32
+
+Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md).
+
+## makeDateTime
+
+Creates a [DateTime](../../sql-reference/data-types/datetime.md) from a year, month, day, hour, minute and second argument.
+
+**Syntax**
+
+``` sql
+makeDateTime(year, month, day, hour, minute, second[, timezone])
+```
+
+**Arguments**
+
+- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `hour` — Hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `minute` — Minute. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `second` — Second. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional).
+
+**Returned value**
+
+- A date with time created from the arguments.
+
+Type: [DateTime](../../sql-reference/data-types/datetime.md).
+
+**Example**
+
+``` sql
+SELECT makeDateTime(2023, 2, 28, 17, 12, 33) AS DateTime;
+```
+
+Result:
+
+``` text
+┌────────────DateTime─┐
+│ 2023-02-28 17:12:33 │
+└─────────────────────┘
+```
+
+## makeDateTime64
+
+Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md).
+
 ## timeZone
 
 Returns the timezone of the server.
diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md
index 2fb20e4e462..52e99d93109 100644
--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@@ -109,7 +109,7 @@ For the query to run successfully, the following conditions must be met:
 
 - Both tables must have the same structure.
 - Both tables must have the same partition key, the same order by key and the same primary key.
-- Both tables must have the same storage policy (a disk where the partition is stored should be available for both tables).
+- Both tables must have the same storage policy.
 
 ## REPLACE PARTITION
 
@@ -123,7 +123,7 @@ For the query to run successfully, the following conditions must be met:
 
 - Both tables must have the same structure.
 - Both tables must have the same partition key, the same order by key and the same primary key.
-- Both tables must have the same storage policy (a disk where the partition is stored should be available for both tables).
+- Both tables must have the same storage policy.
 
 ## MOVE PARTITION TO TABLE
 
@@ -137,7 +137,7 @@ For the query to run successfully, the following conditions must be met:
 
 - Both tables must have the same structure.
 - Both tables must have the same partition key, the same order by key and the same primary key.
-- Both tables must have the same storage policy (a disk where the partition is stored should be available for both tables).
+- Both tables must have the same storage policy.
 - Both tables must be the same engine family (replicated or non-replicated).
 
 ## CLEAR COLUMN IN PARTITION
diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md
index 3548ef7cc07..d168be63c36 100644
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@@ -32,9 +32,12 @@ There are multiple ways of user identification:
 - `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'`
 - `IDENTIFIED WITH double_sha1_password BY 'qwerty'`
 - `IDENTIFIED WITH double_sha1_hash BY 'hash'`
+- `IDENTIFIED WITH bcrypt_password BY 'qwerty'`
+- `IDENTIFIED WITH bcrypt_hash BY 'hash'`
 - `IDENTIFIED WITH ldap SERVER 'server_name'`
 - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'`
 - `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'`
+- `IDENTIFIED BY 'qwerty'`
 
 ## Examples
 
@@ -54,21 +57,12 @@ There are multiple ways of user identification:
     The password is stored in a SQL text file in `/var/lib/clickhouse/access`, so it's not a good idea to use `plaintext_password`. Try `sha256_password` instead, as demonstrated next...
     :::
 
-3. The best option is to use a password that is hashed using SHA-256. ClickHouse will hash the password for you when you specify `IDENTIFIED WITH sha256_password`. For example:
+3. The most common option is to use a password that is hashed using SHA-256. ClickHouse will hash the password for you when you specify `IDENTIFIED WITH sha256_password`. For example:
 
     ```sql
     CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password'
     ```
 
-    Notice ClickHouse generates and runs the following command for you:
-
-    ```response
-    CREATE USER name3
-    IDENTIFIED WITH sha256_hash
-    BY '8B3404953FCAA509540617F082DB13B3E0734F90FF6365C19300CC6A6EA818D6'
-    SALT 'D6489D8B5692D82FF944EA6415785A8A8A1AF33825456AFC554487725A74A609'
-    ```
-
     The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
 
     ```bash
@@ -92,6 +86,34 @@ There are multiple ways of user identification:
     CREATE USER name4 IDENTIFIED WITH double_sha1_hash BY 'CCD3A959D6A004B9C3807B728BC2E55B67E10518'
     ```
 
+5. The `bcrypt_password` is the most secure option for storing passwords. It uses the [bcrypt](https://en.wikipedia.org/wiki/Bcrypt) algorithm, which is resilient against brute force attacks even if the password hash is compromised.
+
+    ```sql
+    CREATE USER name5 IDENTIFIED WITH bcrypt_password BY 'my_password'
+    ```
+
+    The length of the password is limited to 72 characters with this method. The bcrypt work factor parameter, which defines the amount of computations and time needed to compute the hash and verify the password, can be modified in the server configuration:
+
+    ```xml
+    <bcrypt_workfactor>12</bcrypt_workfactor>
+    ```
+
+    The work factor must be between 4 and 31, with a default value of 12.
+
+6. The type of the password can also be omitted:
+
+    ```sql
+    CREATE USER name6 IDENTIFIED BY 'my_password'
+    ```
+
+    In this case, ClickHouse will use the default password type specified in the server configuration:
+
+    ```xml
+    <default_password_type>sha256_password</default_password_type>
+    ```
+
+    The available password types are: `plaintext_password`, `sha256_password`, `double_sha1_password`.
+
 ## User Host
 
 User host is a host from which a connection to ClickHouse server could be established. The host can be specified in the `HOST` query section in the following ways:
diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md
index 8fa3b5de1b8..b6d6f285f3d 100644
--- a/docs/en/sql-reference/statements/grant.md
+++ b/docs/en/sql-reference/statements/grant.md
@@ -36,6 +36,18 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
 The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`.
 The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles.
 
+## Grant Current Grants Syntax
+``` sql
+GRANT CURRENT GRANTS{(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*}) | ON {db.table|db.*|*.*|table|*}} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION]
+```
+
+-   `privilege` — Type of privilege.
+-   `role` — ClickHouse user role.
+-   `user` — ClickHouse user account.
+
+Using the `CURRENT GRANTS` statement allows you to give all specified privileges to the given user or role.
+If none of the privileges were specified, then the given user or role will receive all available privileges for `CURRENT_USER`.
+
 ## Usage
 
 To use `GRANT`, your account must have the `GRANT OPTION` privilege. You can grant privileges only inside the scope of your account privileges.
@@ -188,6 +200,7 @@ Hierarchy of privileges:
     - `HDFS`
     - `S3`
 - [dictGet](#grant-dictget)
+- [displaySecretsInShowAndSelect](#grant-display-secrets)
 
 Examples of how this hierarchy is treated:
 
@@ -473,6 +486,15 @@ Privilege level: `DICTIONARY`.
 - `GRANT dictGet ON mydb.mydictionary TO john`
 - `GRANT dictGet ON mydictionary TO john`
 
+
+### displaySecretsInShowAndSelect {#grant-display-secrets}
+
+Allows a user to view secrets in `SHOW` and `SELECT` queries if both
+[`display_secrets_in_show_and_select` server setting](../../operations/server-configuration-parameters/settings#display_secrets_in_show_and_select)
+and
+[`format_display_secrets_in_show_and_select` format setting](../../operations/settings/formats#format_display_secrets_in_show_and_select)
+are turned on.
+
 ### ALL
 
 Grants all the privileges on regulated entity to a user account or a role.
diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index 428a04ae030..ed3f8a074c8 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -6,6 +6,13 @@ sidebar_label: SHOW
 
 # SHOW Statements
 
+N.B. `SHOW CREATE (TABLE|DATABASE|USER)` hides secrets unless
+[`display_secrets_in_show_and_select` server setting](../../operations/server-configuration-parameters/settings#display_secrets_in_show_and_select)
+is turned on,
+[`format_display_secrets_in_show_and_select` format setting](../../operations/settings/formats#format_display_secrets_in_show_and_select)
+is turned on and user has
+[`displaySecretsInShowAndSelect`](grant.md#grant-display-secrets) privilege.
+
 ## SHOW CREATE TABLE | DICTIONARY | VIEW | DATABASE
 
 ``` sql
@@ -293,8 +300,6 @@ If user is not specified, the query returns privileges for the current user.
 
 Shows parameters that were used at a [user creation](../../sql-reference/statements/create/user.md).
 
-`SHOW CREATE USER` does not output user passwords.
-
 **Syntax**
 
 ``` sql
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index 5a5a771f239..c5596b7ba5f 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -76,7 +76,7 @@ Resets the mark cache.
 
 ## DROP REPLICA
 
-Dead replicas can be dropped using following syntax:
+Dead replicas of `ReplicatedMergeTree` tables can be dropped using following syntax:
 
 ``` sql
 SYSTEM DROP REPLICA 'replica_name' FROM TABLE database.table;
@@ -85,13 +85,25 @@ SYSTEM DROP REPLICA 'replica_name';
 SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
 ```
 
-Queries will remove the replica path in ZooKeeper. It is useful when the replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it cannot drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk.
+Queries will remove the `ReplicatedMergeTree` replica path in ZooKeeper. It is useful when the replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it cannot drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk.
 
 The first one removes metadata of `'replica_name'` replica of `database.table` table.
 The second one does the same for all replicated tables in the database.
 The third one does the same for all replicated tables on the local server.
 The fourth one is useful to remove metadata of dead replica when all other replicas of a table were dropped. It requires the table path to be specified explicitly. It must be the same path as was passed to the first argument of `ReplicatedMergeTree` engine on table creation.
 
+## DROP DATABASE REPLICA
+
+Dead replicas of `Replicated` databases can be dropped using following syntax:
+
+``` sql
+SYSTEM DROP DATABASE REPLICA 'replica_name' [FROM SHARD 'shard_name'] FROM DATABASE database;
+SYSTEM DROP DATABASE REPLICA 'replica_name' [FROM SHARD 'shard_name'];
+SYSTEM DROP DATABASE REPLICA 'replica_name' [FROM SHARD 'shard_name'] FROM ZKPATH '/path/to/table/in/zk';
+```
+
+Similar to `SYSTEM DROP REPLICA`, but removes the `Replicated` database replica path from ZooKeeper when there's no database to run `DROP DATABASE`. Please note that it does not remove `ReplicatedMergeTree` replicas (so you may need `SYSTEM DROP REPLICA` as well). Shard and replica names are the names that were specified in `Replicated` engine arguments when creating the database. Also, these names can be obtained from `database_shard_name` and `database_replica_name` columns in `system.clusters`. If the `FROM SHARD` clause is missing, then `replica_name` must be a full replica name in `shard_name|replica_name` format.
+
 ## DROP UNCOMPRESSED CACHE
 
 Reset the uncompressed data cache.
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md b/docs/ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md
new file mode 100644
index 00000000000..2f8c6bb6760
--- /dev/null
+++ b/docs/ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md
@@ -0,0 +1,117 @@
+---
+slug: /ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest
+sidebar_position: 300
+sidebar_label: kolmogorovSmirnovTest
+---
+
+# kolmogorovSmirnovTest {#kolmogorovSmirnovTest}
+
+Проводит статистический тест Колмогорова-Смирнова для двух независимых выборок.
+
+**Синтаксис**
+
+``` sql
+kolmogorovSmirnovTest([alternative, computation_method])(sample_data, sample_index)
+```
+
+Значения выборок берутся из столбца `sample_data`. Если  `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
+Выборки должны принадлежать непрерывным одномерным распределениям.
+
+**Аргументы**
+
+-   `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+-   `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Параметры**
+
+- `alternative` — альтернативная гипотеза (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
+    Пусть F(x) и G(x) - функции распределения первой и второй выборки соотвественно.
+    - `'two-sided'`
+        Нулевая гипотеза состоит в том, что выборки происходит из одного и того же распределение, то есть F(x) = G(x) для любого x.
+        Альтернатива - выборки принадлежат разным распределениям.
+    - `'greater'`
+        Нулевая гипотеза состоит в том, что элементы первой выборки в асимптотически почти наверное меньше элементов из второй выборки,
+        то есть функция распределения первой выборки лежит выше и соотвественно левее, чем функция распределения второй выборки.
+        Таким образом это означает, что F(x) >= G(x) for любого x, а альтернатива в этом случае состоит в том, что F(x) < G(x) хотя бы для одного x.
+    - `'less'`.
+        Нулевая гипотеза состоит в том, что элементы первой выборки в асимптотически почти наверное больше элементов из второй выборки,
+        то есть функция распределения первой выборки лежит ниже и соотвественно правее, чем функция распределения второй выборки.
+        Таким образом это означает, что F(x) <= G(x) for любого x, а альтернатива в этом случае состоит в том, что F(x) > G(x) хотя бы для одного x.
+- `computation_method` — метод, используемый для вычисления p-value. (Необязательный параметр, по умолчанию: `'auto'`.) [String](../../../sql-reference/data-types/string.md).
+    - `'exact'` - вычисление производится с помощью вычисления точного распределения статистики. Требует большого количества вычислительных ресурсов и расточительно для больших выборок.
+    - `'asymp'`(`'asymptotic'`) - используется приближенное вычисление. Для больших выборок приближенный результат и точный почти идентичны.
+    - `'auto'`  - значение вычисляется точно (с помощью метода `'exact'`), если максимальный размер двух выборок не превышает 10'000.
+
+**Возвращаемые значения**
+
+[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+
+-   вычисленное статистики. [Float64](../../../sql-reference/data-types/float.md).
+-   вычисленное p-value. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT kolmogorovSmirnovTest('less', 'exact')(value, num)
+FROM
+(
+    SELECT
+        randNormal(0, 10) AS value,
+        0 AS num
+    FROM numbers(10000)
+    UNION ALL
+    SELECT
+        randNormal(0, 10) AS value,
+        1 AS num
+    FROM numbers(10000)
+)
+```
+
+Результат:
+
+``` text
+┌─kolmogorovSmirnovTest('less', 'exact')(value, num)─┐
+│ (0.009899999999999996,0.37528595205132287)         │
+└────────────────────────────────────────────────────┘
+```
+
+Заметки:
+P-value больше чем 0.05 (для уровня значимости 95%), то есть нулевая гипотеза не отвергается.
+
+
+Запрос:
+
+``` sql
+SELECT kolmogorovSmirnovTest('two-sided', 'exact')(value, num)
+FROM
+(
+    SELECT
+        randStudentT(10) AS value,
+        0 AS num
+    FROM numbers(100)
+    UNION ALL
+    SELECT
+        randNormal(0, 10) AS value,
+        1 AS num
+    FROM numbers(100)
+)
+```
+
+Результат:
+
+``` text
+┌─kolmogorovSmirnovTest('two-sided', 'exact')(value, num)─┐
+│ (0.4100000000000002,6.61735760482795e-8)                │
+└─────────────────────────────────────────────────────────┘
+```
+
+Заметки:
+P-value меньше чем 0.05 (для уровня значимости 95%), то есть нулевая гипотеза отвергается.
+
+
+**Смотрите также**
+
+- [Критерий согласия Колмогорова-Смирнова](https://ru.wikipedia.org/wiki/%D0%9A%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D1%81%D0%BE%D0%B3%D0%BB%D0%B0%D1%81%D0%B8%D1%8F_%D0%9A%D0%BE%D0%BB%D0%BC%D0%BE%D0%B3%D0%BE%D1%80%D0%BE%D0%B2%D0%B0)
diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md
index 73c63850750..9b8fafabfcc 100644
--- a/docs/ru/sql-reference/statements/grant.md
+++ b/docs/ru/sql-reference/statements/grant.md
@@ -37,6 +37,19 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
 `WITH ADMIN OPTION` присваивает привилегию [ADMIN OPTION](#admin-option-privilege) пользователю или роли.
 `WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, если не указано, добавляет новые новые роли.
 
+## Синтаксис присвоения текущих привилегий {#grant-current-grants-syntax}
+
+```sql
+GRANT CURRENT GRANTS{(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*}) | ON {db.table|db.*|*.*|table|*}} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION]
+```
+
+- `privilege` — Тип привилегии
+- `role` — Роль пользователя ClickHouse.
+- `user` — Пользователь ClickHouse.
+
+Использование выражения `CURRENT GRANTS` позволяет присвоить все указанные и доступные для присвоения привилегии.
+Если список привелегий не задан, то указанный пользователь или роль получат все доступные привилегии для `CURRENT_USER`.
+
 ## Использование {#grant-usage}
 
 Для использования `GRANT` пользователь должен иметь привилегию `GRANT OPTION`. Пользователь может выдавать привилегии только внутри области действий назначенных ему самому привилегий.
diff --git a/programs/diagnostics/internal/platform/data/file_test.go b/programs/diagnostics/internal/platform/data/file_test.go
index b93c4fc3350..938c34281f1 100644
--- a/programs/diagnostics/internal/platform/data/file_test.go
+++ b/programs/diagnostics/internal/platform/data/file_test.go
@@ -135,7 +135,7 @@ func TestConfigFileFrameCopy(t *testing.T) {
 		sizes := map[string]int64{
 			"users.xml":            int64(2017),
 			"default-password.xml": int64(188),
-			"config.xml":           int64(61260),
+			"config.xml":           int64(61662),
 			"server-include.xml":   int64(168),
 			"user-include.xml":     int64(559),
 		}
diff --git a/programs/diagnostics/testdata/configs/xml/config.xml b/programs/diagnostics/testdata/configs/xml/config.xml
index 18997855955..21a0821f89d 100644
--- a/programs/diagnostics/testdata/configs/xml/config.xml
+++ b/programs/diagnostics/testdata/configs/xml/config.xml
@@ -1260,8 +1260,12 @@
                 <access_key_id>REPLACE_ME</access_key_id>
                 <secret_access_key>REPLACE_ME</secret_access_key>
                 <region></region>
+                <header>Authorization: Bearer SOME-TOKEN</header>
                 <server_side_encryption_customer_key_base64>your_base64_encoded_customer_key
                 </server_side_encryption_customer_key_base64>
+                <server_side_encryption_kms_key_id>REPLACE_ME</server_side_encryption_kms_key_id>
+                <server_side_encryption_kms_encryption_context>REPLACE_ME</server_side_encryption_kms_encryption_context>
+                <server_side_encryption_kms_bucket_key_enabled>true</server_side_encryption_kms_bucket_key_enabled>
                 <proxy>
                     <uri>http://proxy1</uri>
                     <uri>http://proxy2</uri>
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 5768e744f94..8925f50fe97 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -26,12 +26,13 @@
 #include <Common/TLDListsHolder.h>
 #include <Common/quoteString.h>
 #include <Common/randomSeed.h>
+#include <Common/ThreadPool.h>
 #include <Loggers/Loggers.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromFileDescriptor.h>
 #include <IO/UseSSL.h>
-#include <IO/IOThreadPool.h>
+#include <IO/SharedThreadPools.h>
 #include <Parsers/IAST.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Common/ErrorHandlers.h>
@@ -133,6 +134,11 @@ void LocalServer::initialize(Poco::Util::Application & self)
         config().getUInt("max_io_thread_pool_size", 100),
         config().getUInt("max_io_thread_pool_free_size", 0),
         config().getUInt("io_thread_pool_queue_size", 10000));
+
+    OutdatedPartsLoadingThreadPool::initialize(
+        config().getUInt("max_outdated_parts_loading_thread_pool_size", 16),
+        0, // We don't need any threads one all the parts will be loaded
+        config().getUInt("outdated_part_loading_thread_pool_queue_size", 10000));
 }
 
 
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 8c0d50bae55..bbd536d9300 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -41,10 +41,9 @@
 #include <Common/TLDListsHolder.h>
 #include <Common/Config/AbstractConfigurationComparison.h>
 #include <Core/ServerUUID.h>
-#include <IO/BackupsIOThreadPool.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromFile.h>
-#include <IO/IOThreadPool.h>
+#include <IO/SharedThreadPools.h>
 #include <IO/UseSSL.h>
 #include <Interpreters/ServerAsynchronousMetrics.h>
 #include <Interpreters/DDLWorker.h>
@@ -778,6 +777,11 @@ try
         server_settings.max_backups_io_thread_pool_free_size,
         server_settings.backups_io_thread_pool_queue_size);
 
+    OutdatedPartsLoadingThreadPool::initialize(
+        server_settings.max_outdated_parts_loading_thread_pool_size,
+        0, // We don't need any threads one all the parts will be loaded
+        server_settings.outdated_part_loading_thread_pool_queue_size);
+
     /// Initialize global local cache for remote filesystem.
     if (config().has("local_cache_for_remote_fs"))
     {
@@ -1852,7 +1856,7 @@ try
                 LOG_INFO(log, "Closed all listening sockets.");
 
             /// Killing remaining queries.
-            if (server_settings.shutdown_wait_unfinished_queries)
+            if (!server_settings.shutdown_wait_unfinished_queries)
                 global_context->getProcessList().killAllQueries();
 
             if (current_connections)
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 7a75d7251a9..51aa04ba0e5 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -476,6 +476,14 @@
     <allow_no_password>1</allow_no_password>
     <allow_implicit_no_password>1</allow_implicit_no_password>
 
+    <!-- When a user does not specify a password type in the CREATE USER query, the default password type is used.
+         Accepted values are: 'plaintext_password', 'sha256_password', 'double_sha1_password', 'bcrypt_password'.
+      -->
+    <default_password_type>sha256_password</default_password_type>
+
+    <!-- Work factor for bcrypt_password authentication type-->
+    <bcrypt_workfactor>12</bcrypt_workfactor>
+
     <!-- Complexity requirements for user passwords. -->
     <!-- <password_complexity>
         <rule>
@@ -1293,7 +1301,7 @@
 
     <!-- Path in ZooKeeper to store user-defined SQL functions created by the command CREATE FUNCTION.
      If not specified they will be stored locally. -->
-    <!-- <user_defined_zookeeper_path>/clickhouse/user_defined<user_defined_zookeeper_path> -->
+    <!-- <user_defined_zookeeper_path>/clickhouse/user_defined</user_defined_zookeeper_path> -->
 
     <!-- Uncomment if you want data to be compressed 30-100% better.
          Don't do that if you just started using ClickHouse.
diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp
index 875f4965e0b..6179c823b56 100644
--- a/src/Access/AccessControl.cpp
+++ b/src/Access/AccessControl.cpp
@@ -271,8 +271,11 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration
     setImplicitNoPasswordAllowed(config_.getBool("allow_implicit_no_password", true));
     setNoPasswordAllowed(config_.getBool("allow_no_password", true));
     setPlaintextPasswordAllowed(config_.getBool("allow_plaintext_password", true));
+    setDefaultPasswordTypeFromConfig(config_.getString("default_password_type", "sha256_password"));
     setPasswordComplexityRulesFromConfig(config_);
 
+    setBcryptWorkfactor(config_.getInt("bcrypt_workfactor", 12));
+
     /// Optional improvements in access control system.
     /// The default values are false because we need to be compatible with earlier access configurations
     setEnabledUsersWithoutRowPoliciesCanReadRows(config_.getBool("access_control_improvements.users_without_row_policies_can_read_rows", false));
@@ -653,6 +656,27 @@ bool AccessControl::isPlaintextPasswordAllowed() const
     return allow_plaintext_password;
 }
 
+void AccessControl::setDefaultPasswordTypeFromConfig(const String & type_)
+{
+    for (auto check_type : collections::range(AuthenticationType::MAX))
+    {
+        const auto & info = AuthenticationTypeInfo::get(check_type);
+
+        if (type_ == info.name && info.is_password)
+        {
+            default_password_type = check_type;
+            return;
+        }
+    }
+
+    throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown password type in 'default_password_type' in config");
+}
+
+AuthenticationType AccessControl::getDefaultPasswordType() const
+{
+    return default_password_type;
+}
+
 void AccessControl::setPasswordComplexityRulesFromConfig(const Poco::Util::AbstractConfiguration & config_)
 {
     password_rules->setPasswordComplexityRulesFromConfig(config_);
@@ -673,6 +697,21 @@ std::vector<std::pair<String, String>> AccessControl::getPasswordComplexityRules
     return password_rules->getPasswordComplexityRules();
 }
 
+void AccessControl::setBcryptWorkfactor(int workfactor_)
+{
+    if (workfactor_ < 4)
+        bcrypt_workfactor = 4;
+    else if (workfactor_ > 31)
+        bcrypt_workfactor = 31;
+    else
+        bcrypt_workfactor = workfactor_;
+}
+
+int AccessControl::getBcryptWorkfactor() const
+{
+    return bcrypt_workfactor;
+}
+
 
 std::shared_ptr<const ContextAccess> AccessControl::getContextAccess(
     const UUID & user_id,
diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h
index 957a2483cd1..2a8293a49e7 100644
--- a/src/Access/AccessControl.h
+++ b/src/Access/AccessControl.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Access/MultipleAccessStorage.h>
+#include <Access/Common/AuthenticationType.h>
 #include <Common/SettingsChanges.h>
 #include <Common/ZooKeeper/Common.h>
 #include <base/scope_guard.h>
@@ -147,13 +148,20 @@ public:
     void setPlaintextPasswordAllowed(const bool allow_plaintext_password_);
     bool isPlaintextPasswordAllowed() const;
 
-    /// Check complexity requirements for plaintext passwords
+    /// Default password type when the user does not specify it.
+    void setDefaultPasswordTypeFromConfig(const String & type_);
+    AuthenticationType getDefaultPasswordType() const;
 
+    /// Check complexity requirements for passwords
     void setPasswordComplexityRulesFromConfig(const Poco::Util::AbstractConfiguration & config_);
     void setPasswordComplexityRules(const std::vector<std::pair<String, String>> & rules_);
     void checkPasswordComplexityRules(const String & password_) const;
     std::vector<std::pair<String, String>> getPasswordComplexityRules() const;
 
+    /// Workfactor for bcrypt encoded passwords
+    void setBcryptWorkfactor(int workfactor_);
+    int getBcryptWorkfactor() const;
+
     /// Enables logic that users without permissive row policies can still read rows using a SELECT query.
     /// For example, if there two users A, B and a row policy is defined only for A, then
     /// if this setting is true the user B will see all rows, and if this setting is false the user B will see no rows.
@@ -242,6 +250,8 @@ private:
     std::atomic_bool select_from_system_db_requires_grant = false;
     std::atomic_bool select_from_information_schema_requires_grant = false;
     std::atomic_bool settings_constraints_replace_previous = false;
+    std::atomic_int bcrypt_workfactor = 12;
+    std::atomic<AuthenticationType> default_password_type = AuthenticationType::SHA256_PASSWORD;
 };
 
 }
diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp
index b9bc0ee961c..f4f5259597a 100644
--- a/src/Access/Authentication.cpp
+++ b/src/Access/Authentication.cpp
@@ -1,5 +1,5 @@
 #include <Access/Authentication.h>
-#include <Access/Common/AuthenticationData.h>
+#include <Access/AuthenticationData.h>
 #include <Access/Credentials.h>
 #include <Access/ExternalAuthenticators.h>
 #include <Access/LDAPClient.h>
@@ -31,6 +31,11 @@ namespace
         return (Util::encodeDoubleSHA1(password) == password_double_sha1);
     }
 
+    bool checkPasswordBcrypt(std::string_view password, const Digest & password_bcrypt)
+    {
+        return Util::checkPasswordBcrypt(password, password_bcrypt);
+    }
+
     bool checkPasswordSHA256(std::string_view password, const Digest & password_sha256, const String & salt)
     {
         return Util::encodeSHA256(String(password).append(salt)) == password_sha256;
@@ -81,6 +86,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
             case AuthenticationType::PLAINTEXT_PASSWORD:
             case AuthenticationType::SHA256_PASSWORD:
             case AuthenticationType::DOUBLE_SHA1_PASSWORD:
+            case AuthenticationType::BCRYPT_PASSWORD:
             case AuthenticationType::LDAP:
                 throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
 
@@ -109,6 +115,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
                 return checkPasswordDoubleSHA1MySQL(mysql_credentials->getScramble(), mysql_credentials->getScrambledPassword(), auth_data.getPasswordHashBinary());
 
             case AuthenticationType::SHA256_PASSWORD:
+            case AuthenticationType::BCRYPT_PASSWORD:
             case AuthenticationType::LDAP:
             case AuthenticationType::KERBEROS:
                 throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
@@ -146,6 +153,9 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
             case AuthenticationType::SSL_CERTIFICATE:
                 throw Authentication::Require<BasicCredentials>("ClickHouse X.509 Authentication");
 
+            case AuthenticationType::BCRYPT_PASSWORD:
+                return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary());
+
             case AuthenticationType::MAX:
                 break;
         }
@@ -159,6 +169,7 @@ bool Authentication::areCredentialsValid(const Credentials & credentials, const
             case AuthenticationType::PLAINTEXT_PASSWORD:
             case AuthenticationType::SHA256_PASSWORD:
             case AuthenticationType::DOUBLE_SHA1_PASSWORD:
+            case AuthenticationType::BCRYPT_PASSWORD:
             case AuthenticationType::LDAP:
                 throw Authentication::Require<BasicCredentials>("ClickHouse Basic Authentication");
 
diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h
index ab787851cb2..d1e00a28ebb 100644
--- a/src/Access/Authentication.h
+++ b/src/Access/Authentication.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Access/Common/AuthenticationData.h>
+#include <Access/AuthenticationData.h>
 #include <Common/Exception.h>
 #include <base/types.h>
 
diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp
new file mode 100644
index 00000000000..409338209cc
--- /dev/null
+++ b/src/Access/AuthenticationData.cpp
@@ -0,0 +1,448 @@
+#include <Access/AccessControl.h>
+#include <Access/AuthenticationData.h>
+#include <Common/Exception.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/evaluateConstantExpression.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTLiteral.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+
+#include <Common/OpenSSLHelpers.h>
+#include <Poco/SHA1Engine.h>
+#include <base/types.h>
+#include <boost/algorithm/hex.hpp>
+#include <boost/algorithm/string/case_conv.hpp>
+
+#include "config.h"
+
+#if USE_SSL
+#     include <openssl/crypto.h>
+#     include <openssl/rand.h>
+#     include <openssl/err.h>
+#endif
+
+#if USE_BCRYPT
+#     include <bcrypt.h>
+#endif
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int SUPPORT_IS_DISABLED;
+    extern const int BAD_ARGUMENTS;
+    extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
+    extern const int OPENSSL_ERROR;
+}
+
+AuthenticationData::Digest AuthenticationData::Util::encodeSHA256(std::string_view text [[maybe_unused]])
+{
+#if USE_SSL
+    Digest hash;
+    hash.resize(32);
+    ::DB::encodeSHA256(text, hash.data());
+    return hash;
+#else
+    throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SHA256 passwords support is disabled, because ClickHouse was built without SSL library");
+#endif
+}
+
+
+AuthenticationData::Digest AuthenticationData::Util::encodeSHA1(std::string_view text)
+{
+    Poco::SHA1Engine engine;
+    engine.update(text.data(), text.size());
+    return engine.digest();
+}
+
+AuthenticationData::Digest AuthenticationData::Util::encodeBcrypt(std::string_view text [[maybe_unused]], int workfactor [[maybe_unused]])
+{
+#if USE_BCRYPT
+    if (text.size() > 72)
+        throw Exception(
+            ErrorCodes::BAD_ARGUMENTS,
+            "bcrypt does not support passwords with a length of more than 72 bytes");
+
+    char salt[BCRYPT_HASHSIZE];
+    Digest hash;
+    hash.resize(64);
+
+    int ret = bcrypt_gensalt(workfactor, salt);
+    if (ret != 0)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "BCrypt library failed: bcrypt_gensalt returned {}", ret);
+
+    ret = bcrypt_hashpw(text.data(), salt, reinterpret_cast<char *>(hash.data()));
+    if (ret != 0)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "BCrypt library failed: bcrypt_hashpw returned {}", ret);
+
+    return hash;
+#else
+    throw Exception(
+        ErrorCodes::SUPPORT_IS_DISABLED,
+        "bcrypt passwords support is disabled, because ClickHouse was built without bcrypt library");
+#endif
+}
+
+bool AuthenticationData::Util::checkPasswordBcrypt(std::string_view password [[maybe_unused]], const Digest & password_bcrypt [[maybe_unused]])
+{
+#if USE_BCRYPT
+    int ret = bcrypt_checkpw(password.data(), reinterpret_cast<const char *>(password_bcrypt.data()));
+    if (ret == -1)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "BCrypt library failed: bcrypt_checkpw returned {}", ret);
+    return (ret == 0);
+#else
+    throw Exception(
+        ErrorCodes::SUPPORT_IS_DISABLED,
+        "bcrypt passwords support is disabled, because ClickHouse was built without bcrypt library");
+#endif
+}
+
+bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
+{
+    return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash)
+        && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm)
+        && (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names);
+}
+
+
+void AuthenticationData::setPassword(const String & password_)
+{
+    switch (type)
+    {
+        case AuthenticationType::PLAINTEXT_PASSWORD:
+            return setPasswordHashBinary(Util::stringToDigest(password_));
+
+        case AuthenticationType::SHA256_PASSWORD:
+            return setPasswordHashBinary(Util::encodeSHA256(password_));
+
+        case AuthenticationType::DOUBLE_SHA1_PASSWORD:
+            return setPasswordHashBinary(Util::encodeDoubleSHA1(password_));
+
+        case AuthenticationType::BCRYPT_PASSWORD:
+        case AuthenticationType::NO_PASSWORD:
+        case AuthenticationType::LDAP:
+        case AuthenticationType::KERBEROS:
+        case AuthenticationType::SSL_CERTIFICATE:
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify password for authentication type {}", toString(type));
+
+        case AuthenticationType::MAX:
+            break;
+    }
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setPassword(): authentication type {} not supported", toString(type));
+}
+
+void AuthenticationData::setPasswordBcrypt(const String & password_, int workfactor_)
+{
+    if (type != AuthenticationType::BCRYPT_PASSWORD)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify bcrypt password for authentication type {}", toString(type));
+
+    return setPasswordHashBinary(Util::encodeBcrypt(password_, workfactor_));
+}
+
+String AuthenticationData::getPassword() const
+{
+    if (type != AuthenticationType::PLAINTEXT_PASSWORD)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot decode the password");
+    return String(password_hash.data(), password_hash.data() + password_hash.size());
+}
+
+
+void AuthenticationData::setPasswordHashHex(const String & hash)
+{
+    Digest digest;
+    digest.resize(hash.size() / 2);
+
+    try
+    {
+        boost::algorithm::unhex(hash.begin(), hash.end(), digest.data());
+    }
+    catch (const std::exception &)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read password hash in hex, check for valid characters [0-9a-fA-F] and length");
+    }
+
+    setPasswordHashBinary(digest);
+}
+
+
+String AuthenticationData::getPasswordHashHex() const
+{
+    if (type == AuthenticationType::LDAP || type == AuthenticationType::KERBEROS || type == AuthenticationType::SSL_CERTIFICATE)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get password hex hash for authentication type {}", toString(type));
+
+    String hex;
+    hex.resize(password_hash.size() * 2);
+    boost::algorithm::hex(password_hash.begin(), password_hash.end(), hex.data());
+    return hex;
+}
+
+
+void AuthenticationData::setPasswordHashBinary(const Digest & hash)
+{
+    switch (type)
+    {
+        case AuthenticationType::PLAINTEXT_PASSWORD:
+        {
+            password_hash = hash;
+            return;
+        }
+
+        case AuthenticationType::SHA256_PASSWORD:
+        {
+            if (hash.size() != 32)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                "Password hash for the 'SHA256_PASSWORD' authentication type has length {} "
+                                "but must be exactly 32 bytes.", hash.size());
+            password_hash = hash;
+            return;
+        }
+
+        case AuthenticationType::DOUBLE_SHA1_PASSWORD:
+        {
+            if (hash.size() != 20)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                "Password hash for the 'DOUBLE_SHA1_PASSWORD' authentication type has length {} "
+                                "but must be exactly 20 bytes.", hash.size());
+            password_hash = hash;
+            return;
+        }
+
+        case AuthenticationType::BCRYPT_PASSWORD:
+        {
+            /// Depending on the workfactor the resulting hash can be 59 or 60 characters long.
+            /// However the library we use to encode it requires hash string to be 64 characters long,
+            ///  so we also allow the hash of this length.
+
+            if (hash.size() != 59 && hash.size() != 60 && hash.size() != 64)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                                "Password hash for the 'BCRYPT_PASSWORD' authentication type has length {} "
+                                "but must be 59 or 60 bytes.", hash.size());
+            password_hash = hash;
+            password_hash.resize(64);
+            return;
+        }
+
+        case AuthenticationType::NO_PASSWORD:
+        case AuthenticationType::LDAP:
+        case AuthenticationType::KERBEROS:
+        case AuthenticationType::SSL_CERTIFICATE:
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify password binary hash for authentication type {}", toString(type));
+
+        case AuthenticationType::MAX:
+            break;
+    }
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setPasswordHashBinary(): authentication type {} not supported", toString(type));
+}
+
+void AuthenticationData::setSalt(String salt_)
+{
+    if (type != AuthenticationType::SHA256_PASSWORD)
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setSalt(): authentication type {} not supported", toString(type));
+    salt = std::move(salt_);
+}
+
+String AuthenticationData::getSalt() const
+{
+    return salt;
+}
+
+void AuthenticationData::setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_)
+{
+    if (common_names_.empty())
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of common names.");
+    ssl_certificate_common_names = std::move(common_names_);
+}
+
+std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
+{
+    auto node = std::make_shared<ASTAuthenticationData>();
+    auto auth_type = getType();
+    node->type = auth_type;
+
+    switch (auth_type)
+    {
+        case AuthenticationType::PLAINTEXT_PASSWORD:
+        {
+            node->contains_password = true;
+            node->children.push_back(std::make_shared<ASTLiteral>(getPassword()));
+            break;
+        }
+        case AuthenticationType::SHA256_PASSWORD:
+        {
+            node->contains_hash = true;
+            node->children.push_back(std::make_shared<ASTLiteral>(getPasswordHashHex()));
+
+            if (!getSalt().empty())
+                node->children.push_back(std::make_shared<ASTLiteral>(getSalt()));
+            break;
+        }
+        case AuthenticationType::DOUBLE_SHA1_PASSWORD:
+        {
+            node->contains_hash = true;
+            node->children.push_back(std::make_shared<ASTLiteral>(getPasswordHashHex()));
+            break;
+        }
+        case AuthenticationType::BCRYPT_PASSWORD:
+        {
+            node->contains_hash = true;
+            node->children.push_back(std::make_shared<ASTLiteral>(AuthenticationData::Util::digestToString(getPasswordHashBinary())));
+            break;
+        }
+        case AuthenticationType::LDAP:
+        {
+            node->children.push_back(std::make_shared<ASTLiteral>(getLDAPServerName()));
+            break;
+        }
+        case AuthenticationType::KERBEROS:
+        {
+            const auto & realm = getKerberosRealm();
+
+            if (!realm.empty())
+                node->children.push_back(std::make_shared<ASTLiteral>(realm));
+
+            break;
+        }
+        case AuthenticationType::SSL_CERTIFICATE:
+        {
+            for (const auto & name : getSSLCertificateCommonNames())
+                node->children.push_back(std::make_shared<ASTLiteral>(name));
+
+            break;
+        }
+
+        case AuthenticationType::NO_PASSWORD: [[fallthrough]];
+        case AuthenticationType::MAX:
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "AST: Unexpected authentication type {}", toString(auth_type));
+    }
+
+    return node;
+}
+
+
+AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & query, ContextPtr context, bool check_password_rules)
+{
+    if (query.type && query.type == AuthenticationType::NO_PASSWORD)
+        return AuthenticationData();
+
+    size_t args_size = query.children.size();
+    ASTs args(args_size);
+    for (size_t i = 0; i < args_size; ++i)
+        args[i] = evaluateConstantExpressionAsLiteral(query.children[i], context);
+
+    if (query.contains_password)
+    {
+        if (!query.type && !context)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get default password type without context");
+
+        if (check_password_rules && !context)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot check password complexity rules without context");
+
+        if (query.type == AuthenticationType::BCRYPT_PASSWORD && !context)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get bcrypt work factor without context");
+
+        String value = checkAndGetLiteralArgument<String>(args[0], "password");
+
+        AuthenticationType current_type;
+
+        if (query.type)
+            current_type = *query.type;
+        else
+            current_type = context->getAccessControl().getDefaultPasswordType();
+
+        AuthenticationData auth_data(current_type);
+
+        if (check_password_rules)
+            context->getAccessControl().checkPasswordComplexityRules(value);
+
+        if (query.type == AuthenticationType::BCRYPT_PASSWORD)
+        {
+            int workfactor = context->getAccessControl().getBcryptWorkfactor();
+            auth_data.setPasswordBcrypt(value, workfactor);
+            return auth_data;
+        }
+
+        if (query.type == AuthenticationType::SHA256_PASSWORD)
+        {
+#if USE_SSL
+            ///random generator FIPS complaint
+            uint8_t key[32];
+            if (RAND_bytes(key, sizeof(key)) != 1)
+            {
+                char buf[512] = {0};
+                ERR_error_string_n(ERR_get_error(), buf, sizeof(buf));
+                throw Exception(ErrorCodes::OPENSSL_ERROR, "Cannot generate salt for password. OpenSSL {}", buf);
+            }
+
+            String salt;
+            salt.resize(sizeof(key) * 2);
+            char * buf_pos = salt.data();
+            for (uint8_t k : key)
+            {
+                writeHexByteUppercase(k, buf_pos);
+                buf_pos += 2;
+            }
+            value.append(salt);
+            auth_data.setSalt(salt);
+#else
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
+                            "SHA256 passwords support is disabled, because ClickHouse was built without SSL library");
+#endif
+        }
+
+        auth_data.setPassword(value);
+        return auth_data;
+    }
+
+    AuthenticationData auth_data(*query.type);
+
+    if (query.contains_hash)
+    {
+        String value = checkAndGetLiteralArgument<String>(args[0], "hash");
+
+        if (query.type == AuthenticationType::BCRYPT_PASSWORD)
+        {
+            auth_data.setPasswordHashBinary(AuthenticationData::Util::stringToDigest(value));
+            return auth_data;
+        }
+        else
+        {
+            auth_data.setPasswordHashHex(value);
+        }
+
+        if (query.type == AuthenticationType::SHA256_PASSWORD && args_size == 2)
+        {
+            String parsed_salt = checkAndGetLiteralArgument<String>(args[1], "salt");
+            auth_data.setSalt(parsed_salt);
+        }
+    }
+    else if (query.type == AuthenticationType::LDAP)
+    {
+        String value = checkAndGetLiteralArgument<String>(args[0], "ldap_server_name");
+        auth_data.setLDAPServerName(value);
+    }
+    else if (query.type == AuthenticationType::KERBEROS)
+    {
+        if (!args.empty())
+        {
+            String value = checkAndGetLiteralArgument<String>(args[0], "kerberos_realm");
+            auth_data.setKerberosRealm(value);
+        }
+    }
+    else if (query.type == AuthenticationType::SSL_CERTIFICATE)
+    {
+        boost::container::flat_set<String> common_names;
+        for (const auto & arg : args)
+            common_names.insert(checkAndGetLiteralArgument<String>(arg, "common_name"));
+
+        auth_data.setSSLCertificateCommonNames(std::move(common_names));
+    }
+    else
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected ASTAuthenticationData structure");
+    }
+
+    return auth_data;
+}
+
+}
diff --git a/src/Access/Common/AuthenticationData.h b/src/Access/AuthenticationData.h
similarity index 73%
rename from src/Access/Common/AuthenticationData.h
rename to src/Access/AuthenticationData.h
index ced9fcd4b6d..5ebef7d44f2 100644
--- a/src/Access/Common/AuthenticationData.h
+++ b/src/Access/AuthenticationData.h
@@ -1,5 +1,9 @@
 #pragma once
 
+#include <Access/Common/AuthenticationType.h>
+#include <Parsers/Access/ASTAuthenticationData.h>
+#include <Interpreters/Context_fwd.h>
+
 #include <base/types.h>
 #include <boost/container/flat_set.hpp>
 #include <vector>
@@ -7,47 +11,6 @@
 namespace DB
 {
 
-enum class AuthenticationType
-{
-    /// User doesn't have to enter password.
-    NO_PASSWORD,
-
-    /// Password is stored as is.
-    PLAINTEXT_PASSWORD,
-
-    /// Password is encrypted in SHA256 hash.
-    SHA256_PASSWORD,
-
-    /// SHA1(SHA1(password)).
-    /// This kind of hash is used by the `mysql_native_password` authentication plugin.
-    DOUBLE_SHA1_PASSWORD,
-
-    /// Password is checked by a [remote] LDAP server. Connection will be made at each authentication attempt.
-    LDAP,
-
-    /// Kerberos authentication performed through GSS-API negotiation loop.
-    KERBEROS,
-
-    /// Authentication is done in SSL by checking user certificate.
-    /// Certificates may only be trusted if 'strict' SSL mode is enabled.
-    SSL_CERTIFICATE,
-
-    MAX,
-};
-
-struct AuthenticationTypeInfo
-{
-    const char * const raw_name;
-    const String name; /// Lowercased with underscores, e.g. "sha256_password".
-    static const AuthenticationTypeInfo & get(AuthenticationType type_);
-};
-
-inline String toString(AuthenticationType type_)
-{
-    return AuthenticationTypeInfo::get(type_).raw_name;
-}
-
-
 /// Stores data for checking password when a user logins.
 class AuthenticationData
 {
@@ -80,6 +43,9 @@ public:
     void setSalt(String salt);
     String getSalt() const;
 
+    /// Sets the password using bcrypt hash with specified workfactor
+    void setPasswordBcrypt(const String & password_, int workfactor_);
+
     /// Sets the server name for authentication type LDAP.
     const String & getLDAPServerName() const { return ldap_server_name; }
     void setLDAPServerName(const String & name) { ldap_server_name = name; }
@@ -94,14 +60,20 @@ public:
     friend bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs);
     friend bool operator !=(const AuthenticationData & lhs, const AuthenticationData & rhs) { return !(lhs == rhs); }
 
+    static AuthenticationData fromAST(const ASTAuthenticationData & query, ContextPtr context, bool check_password_rules);
+    std::shared_ptr<ASTAuthenticationData> toAST() const;
+
     struct Util
     {
+        static String digestToString(const Digest & text) { return String(text.data(), text.data() + text.size()); }
         static Digest stringToDigest(std::string_view text) { return Digest(text.data(), text.data() + text.size()); }
         static Digest encodeSHA256(std::string_view text);
         static Digest encodeSHA1(std::string_view text);
         static Digest encodeSHA1(const Digest & text) { return encodeSHA1(std::string_view{reinterpret_cast<const char *>(text.data()), text.size()}); }
         static Digest encodeDoubleSHA1(std::string_view text) { return encodeSHA1(encodeSHA1(text)); }
         static Digest encodeDoubleSHA1(const Digest & text) { return encodeSHA1(encodeSHA1(text)); }
+        static Digest encodeBcrypt(std::string_view text, int workfactor);
+        static bool checkPasswordBcrypt(std::string_view password, const Digest & password_bcrypt);
     };
 
 private:
diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index 57fa75dc67b..ecc0a927591 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -188,6 +188,7 @@ enum class AccessType
     M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
     \
     M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\
+    M(displaySecretsInShowAndSelect, "", GLOBAL, ALL) /* allows to show plaintext secrets in SELECT and SHOW queries. display_secrets_in_show_and_select format and server settings must be turned on */\
     \
     M(addressToLine, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLine() */\
     M(addressToLineWithInlines, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLineWithInlines() */\
diff --git a/src/Access/Common/AuthenticationData.cpp b/src/Access/Common/AuthenticationData.cpp
deleted file mode 100644
index 0a22eeb92b3..00000000000
--- a/src/Access/Common/AuthenticationData.cpp
+++ /dev/null
@@ -1,228 +0,0 @@
-#include <Access/Common/AuthenticationData.h>
-#include <Common/Exception.h>
-#include <Common/OpenSSLHelpers.h>
-#include <Poco/SHA1Engine.h>
-#include <base/types.h>
-#include <boost/algorithm/hex.hpp>
-#include <boost/algorithm/string/case_conv.hpp>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int SUPPORT_IS_DISABLED;
-    extern const int BAD_ARGUMENTS;
-    extern const int LOGICAL_ERROR;
-    extern const int NOT_IMPLEMENTED;
-}
-
-
-const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType type_)
-{
-    static constexpr auto make_info = [](const char * raw_name_)
-    {
-        String init_name = raw_name_;
-        boost::to_lower(init_name);
-        return AuthenticationTypeInfo{raw_name_, std::move(init_name)};
-    };
-
-    switch (type_)
-    {
-        case AuthenticationType::NO_PASSWORD:
-        {
-            static const auto info = make_info("NO_PASSWORD");
-            return info;
-        }
-        case AuthenticationType::PLAINTEXT_PASSWORD:
-        {
-            static const auto info = make_info("PLAINTEXT_PASSWORD");
-            return info;
-        }
-        case AuthenticationType::SHA256_PASSWORD:
-        {
-            static const auto info = make_info("SHA256_PASSWORD");
-            return info;
-        }
-        case AuthenticationType::DOUBLE_SHA1_PASSWORD:
-        {
-            static const auto info = make_info("DOUBLE_SHA1_PASSWORD");
-            return info;
-        }
-        case AuthenticationType::LDAP:
-        {
-            static const auto info = make_info("LDAP");
-            return info;
-        }
-        case AuthenticationType::KERBEROS:
-        {
-            static const auto info = make_info("KERBEROS");
-            return info;
-        }
-        case AuthenticationType::SSL_CERTIFICATE:
-        {
-            static const auto info = make_info("SSL_CERTIFICATE");
-            return info;
-        }
-        case AuthenticationType::MAX:
-            break;
-    }
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown authentication type: {}", static_cast<int>(type_));
-}
-
-
-AuthenticationData::Digest AuthenticationData::Util::encodeSHA256(std::string_view text [[maybe_unused]])
-{
-#if USE_SSL
-    Digest hash;
-    hash.resize(32);
-    ::DB::encodeSHA256(text, hash.data());
-    return hash;
-#else
-    throw DB::Exception(DB::ErrorCodes::SUPPORT_IS_DISABLED, "SHA256 passwords support is disabled, because ClickHouse was built without SSL library");
-#endif
-}
-
-
-AuthenticationData::Digest AuthenticationData::Util::encodeSHA1(std::string_view text)
-{
-    Poco::SHA1Engine engine;
-    engine.update(text.data(), text.size());
-    return engine.digest();
-}
-
-
-bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
-{
-    return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash)
-        && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm)
-        && (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names);
-}
-
-
-void AuthenticationData::setPassword(const String & password_)
-{
-    switch (type)
-    {
-        case AuthenticationType::PLAINTEXT_PASSWORD:
-            return setPasswordHashBinary(Util::stringToDigest(password_));
-
-        case AuthenticationType::SHA256_PASSWORD:
-            return setPasswordHashBinary(Util::encodeSHA256(password_));
-
-        case AuthenticationType::DOUBLE_SHA1_PASSWORD:
-            return setPasswordHashBinary(Util::encodeDoubleSHA1(password_));
-
-        case AuthenticationType::NO_PASSWORD:
-        case AuthenticationType::LDAP:
-        case AuthenticationType::KERBEROS:
-        case AuthenticationType::SSL_CERTIFICATE:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify password for authentication type {}", toString(type));
-
-        case AuthenticationType::MAX:
-            break;
-    }
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setPassword(): authentication type {} not supported", toString(type));
-}
-
-
-String AuthenticationData::getPassword() const
-{
-    if (type != AuthenticationType::PLAINTEXT_PASSWORD)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot decode the password");
-    return String(password_hash.data(), password_hash.data() + password_hash.size());
-}
-
-
-void AuthenticationData::setPasswordHashHex(const String & hash)
-{
-    Digest digest;
-    digest.resize(hash.size() / 2);
-
-    try
-    {
-        boost::algorithm::unhex(hash.begin(), hash.end(), digest.data());
-    }
-    catch (const std::exception &)
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read password hash in hex, check for valid characters [0-9a-fA-F] and length");
-    }
-
-    setPasswordHashBinary(digest);
-}
-
-
-String AuthenticationData::getPasswordHashHex() const
-{
-    if (type == AuthenticationType::LDAP || type == AuthenticationType::KERBEROS || type == AuthenticationType::SSL_CERTIFICATE)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get password hex hash for authentication type {}", toString(type));
-
-    String hex;
-    hex.resize(password_hash.size() * 2);
-    boost::algorithm::hex(password_hash.begin(), password_hash.end(), hex.data());
-    return hex;
-}
-
-
-void AuthenticationData::setPasswordHashBinary(const Digest & hash)
-{
-    switch (type)
-    {
-        case AuthenticationType::PLAINTEXT_PASSWORD:
-        {
-            password_hash = hash;
-            return;
-        }
-
-        case AuthenticationType::SHA256_PASSWORD:
-        {
-            if (hash.size() != 32)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                "Password hash for the 'SHA256_PASSWORD' authentication type has length {} "
-                                "but must be exactly 32 bytes.", hash.size());
-            password_hash = hash;
-            return;
-        }
-
-        case AuthenticationType::DOUBLE_SHA1_PASSWORD:
-        {
-            if (hash.size() != 20)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                "Password hash for the 'DOUBLE_SHA1_PASSWORD' authentication type has length {} "
-                                "but must be exactly 20 bytes.", hash.size());
-            password_hash = hash;
-            return;
-        }
-
-        case AuthenticationType::NO_PASSWORD:
-        case AuthenticationType::LDAP:
-        case AuthenticationType::KERBEROS:
-        case AuthenticationType::SSL_CERTIFICATE:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify password binary hash for authentication type {}", toString(type));
-
-        case AuthenticationType::MAX:
-            break;
-    }
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setPasswordHashBinary(): authentication type {} not supported", toString(type));
-}
-
-void AuthenticationData::setSalt(String salt_)
-{
-    if (type != AuthenticationType::SHA256_PASSWORD)
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setSalt(): authentication type {} not supported", toString(type));
-    salt = std::move(salt_);
-}
-
-String AuthenticationData::getSalt() const
-{
-    return salt;
-}
-
-void AuthenticationData::setSSLCertificateCommonNames(boost::container::flat_set<String> common_names_)
-{
-    if (common_names_.empty())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 'SSL CERTIFICATE' authentication type requires a non-empty list of common names.");
-    ssl_certificate_common_names = std::move(common_names_);
-}
-
-}
diff --git a/src/Access/Common/AuthenticationType.cpp b/src/Access/Common/AuthenticationType.cpp
new file mode 100644
index 00000000000..7ab28b5fbaf
--- /dev/null
+++ b/src/Access/Common/AuthenticationType.cpp
@@ -0,0 +1,71 @@
+#include <Access/Common/AuthenticationType.h>
+#include <Common/Exception.h>
+#include <boost/algorithm/string/case_conv.hpp>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+
+const AuthenticationTypeInfo & AuthenticationTypeInfo::get(AuthenticationType type_)
+{
+    static constexpr auto make_info = [](const char * raw_name_, bool is_password_ = false)
+    {
+        String init_name = raw_name_;
+        boost::to_lower(init_name);
+        return AuthenticationTypeInfo{raw_name_, std::move(init_name), is_password_};
+    };
+
+    switch (type_)
+    {
+        case AuthenticationType::NO_PASSWORD:
+        {
+            static const auto info = make_info("NO_PASSWORD");
+            return info;
+        }
+        case AuthenticationType::PLAINTEXT_PASSWORD:
+        {
+            static const auto info = make_info("PLAINTEXT_PASSWORD", true);
+            return info;
+        }
+        case AuthenticationType::SHA256_PASSWORD:
+        {
+            static const auto info = make_info("SHA256_PASSWORD", true);
+            return info;
+        }
+        case AuthenticationType::DOUBLE_SHA1_PASSWORD:
+        {
+            static const auto info = make_info("DOUBLE_SHA1_PASSWORD", true);
+            return info;
+        }
+        case AuthenticationType::LDAP:
+        {
+            static const auto info = make_info("LDAP");
+            return info;
+        }
+        case AuthenticationType::KERBEROS:
+        {
+            static const auto info = make_info("KERBEROS");
+            return info;
+        }
+        case AuthenticationType::SSL_CERTIFICATE:
+        {
+            static const auto info = make_info("SSL_CERTIFICATE");
+            return info;
+        }
+        case AuthenticationType::BCRYPT_PASSWORD:
+        {
+            static const auto info = make_info("BCRYPT_PASSWORD", true);
+            return info;
+        }
+        case AuthenticationType::MAX:
+            break;
+    }
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown authentication type: {}", static_cast<int>(type_));
+}
+
+}
diff --git a/src/Access/Common/AuthenticationType.h b/src/Access/Common/AuthenticationType.h
new file mode 100644
index 00000000000..d7c2d0cdd40
--- /dev/null
+++ b/src/Access/Common/AuthenticationType.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <base/types.h>
+
+namespace DB
+{
+
+enum class AuthenticationType
+{
+    /// User doesn't have to enter password.
+    NO_PASSWORD,
+
+    /// Password is stored as is.
+    PLAINTEXT_PASSWORD,
+
+    /// Password is encrypted in SHA256 hash.
+    SHA256_PASSWORD,
+
+    /// SHA1(SHA1(password)).
+    /// This kind of hash is used by the `mysql_native_password` authentication plugin.
+    DOUBLE_SHA1_PASSWORD,
+
+    /// Password is checked by a [remote] LDAP server. Connection will be made at each authentication attempt.
+    LDAP,
+
+    /// Kerberos authentication performed through GSS-API negotiation loop.
+    KERBEROS,
+
+    /// Authentication is done in SSL by checking user certificate.
+    /// Certificates may only be trusted if 'strict' SSL mode is enabled.
+    SSL_CERTIFICATE,
+
+    /// Password is encrypted in bcrypt hash.
+    BCRYPT_PASSWORD,
+
+    MAX,
+};
+
+struct AuthenticationTypeInfo
+{
+    const char * const raw_name;
+    const String name; /// Lowercased with underscores, e.g. "sha256_password".
+    bool is_password;
+    static const AuthenticationTypeInfo & get(AuthenticationType type_);
+};
+
+inline String toString(AuthenticationType type_)
+{
+    return AuthenticationTypeInfo::get(type_).raw_name;
+}
+
+}
diff --git a/src/Access/User.h b/src/Access/User.h
index 958d8bb486f..4b4bf90137f 100644
--- a/src/Access/User.h
+++ b/src/Access/User.h
@@ -2,7 +2,7 @@
 
 #include <Access/IAccessEntity.h>
 #include <Access/AccessRights.h>
-#include <Access/Common/AuthenticationData.h>
+#include <Access/AuthenticationData.h>
 #include <Access/Common/AllowedClientHosts.h>
 #include <Access/GrantedRoles.h>
 #include <Access/RolesOrUsersSet.h>
diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h
index e09e0ef621d..4338dcff5c0 100644
--- a/src/AggregateFunctions/AggregateFunctionDistinct.h
+++ b/src/AggregateFunctions/AggregateFunctionDistinct.h
@@ -9,6 +9,7 @@
 #include <Common/HashTable/HashSet.h>
 #include <Common/HashTable/HashMap.h>
 #include <Common/SipHash.h>
+#include <IO/ReadHelpersArena.h>
 
 
 namespace DB
diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
index bc7ccb08267..4cd7a7932b0 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
@@ -4,6 +4,7 @@
 
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
+#include <IO/ReadHelpersArena.h>
 
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h
index 3a98737f199..967bc9bb517 100644
--- a/src/AggregateFunctions/AggregateFunctionHistogram.h
+++ b/src/AggregateFunctions/AggregateFunctionHistogram.h
@@ -2,7 +2,6 @@
 
 #include <base/sort.h>
 
-#include <Common/Arena.h>
 #include <Common/NaNUtils.h>
 
 #include <Columns/ColumnVector.h>
@@ -29,6 +28,7 @@
 namespace DB
 {
 struct Settings;
+class Arena;
 
 namespace ErrorCodes
 {
diff --git a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h
index e31c62802f1..16e9388d4bb 100644
--- a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h
+++ b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h
@@ -6,7 +6,6 @@
 
 #include <Columns/ColumnsNumber.h>
 
-#include <Common/ArenaAllocator.h>
 #include <Common/assert_cast.h>
 #include <base/arithmeticOverflow.h>
 #include <base/sort.h>
diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h
index 51e0950f782..33a9966ee2c 100644
--- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h
+++ b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.h
@@ -5,7 +5,6 @@
 #include <Columns/ColumnTuple.h>
 #include <Common/Exception.h>
 #include <Common/assert_cast.h>
-#include <Common/ArenaAllocator.h>
 #include <Common/PODArray_fwd.h>
 #include <base/types.h>
 #include <DataTypes/DataTypeNullable.h>
@@ -92,9 +91,9 @@ struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
         UInt64 ny_g = n2 / g;
 
         if (method == "auto")
-            method = std::max(n1, n2) <= 10000 ? "exact" : "asymp";
+            method = std::max(n1, n2) <= 10000 ? "exact" : "asymptotic";
         else if (method == "exact" && nx_g >= std::numeric_limits<Int32>::max() / ny_g)
-            method = "asymp";
+            method = "asymptotic";
 
         Float64 p_value = std::numeric_limits<Float64>::infinity();
 
@@ -144,7 +143,7 @@ struct KolmogorovSmirnov : public StatisticalSample<Float64, Float64>
             }
             p_value = c[n1];
         }
-        else if (method == "asymp")
+        else if (method == "asymp" || method == "asymptotic")
         {
             Float64 n = std::min(n1, n2);
             Float64 m = std::max(n1, n2);
@@ -243,9 +242,9 @@ public:
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require second parameter to be a String", getName());
 
         method = params[1].get<String>();
-        if (method != "auto" && method != "exact" && method != "asymp")
+        if (method != "auto" && method != "exact" && method != "asymp" && method != "asymptotic")
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown method in aggregate function {}. "
-                    "It must be one of: 'auto', 'exact', 'asymp'", getName());
+                    "It must be one of: 'auto', 'exact', 'asymp' (or 'asymptotic')", getName());
     }
 
     String getName() const override
diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.h b/src/AggregateFunctions/AggregateFunctionMannWhitney.h
index 86075440169..ac6ce0d0ca9 100644
--- a/src/AggregateFunctions/AggregateFunctionMannWhitney.h
+++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.h
@@ -6,7 +6,6 @@
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnTuple.h>
 #include <Common/assert_cast.h>
-#include <Common/ArenaAllocator.h>
 #include <Common/PODArray_fwd.h>
 #include <base/types.h>
 #include <DataTypes/DataTypeArray.h>
diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
index b984772c8ea..9c809352fd3 100644
--- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@@ -47,7 +47,7 @@ private:
     using ColVecType = ColumnVectorOrDecimal<T>;
 
     bool has_value = false; /// We need to remember if at least one value has been passed. This is necessary for AggregateFunctionIf.
-    T value;
+    T value = T{};
 
 public:
     static constexpr bool is_nullable = false;
@@ -554,7 +554,8 @@ public:
         if (capacity < size_to_reserve)
         {
             if (unlikely(MAX_STRING_SIZE < size_to_reserve))
-                throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", size_to_reserve);
+                throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({}), maximum: {}",
+                                size_to_reserve, MAX_STRING_SIZE);
 
             size_t rounded_capacity = roundUpToPowerOfTwoOrZero(size_to_reserve);
             chassert(rounded_capacity <= MAX_STRING_SIZE + 1);  /// rounded_capacity <= 2^31
@@ -624,7 +625,8 @@ public:
     void changeImpl(StringRef value, Arena * arena)
     {
         if (unlikely(MAX_STRING_SIZE < value.size))
-            throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", value.size);
+            throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({}), maximum: {}",
+                            value.size, MAX_STRING_SIZE);
 
         UInt32 value_size = static_cast<UInt32>(value.size);
 
diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h
index c03b7f16a2a..13320ad90b6 100644
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@@ -163,15 +163,11 @@ public:
         if constexpr (std::is_same_v<Data, QuantileTiming<Value>>)
         {
             /// QuantileTiming only supports unsigned integers. Too large values are also meaningless.
-#ifdef OS_DARWIN
 #   pragma clang diagnostic push
 #   pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion"
-#endif
             if (isNaN(value) || value > std::numeric_limits<Int64>::max() || value < 0)
                 return;
-#ifdef OS_DARWIN
 #   pragma clang diagnostic pop
-#endif
         }
 
         if constexpr (has_second_arg)
diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
index 4f9ca55f9f5..4f7d04100cf 100644
--- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
+++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
@@ -14,8 +14,6 @@
 #include <DataTypes/DataTypeTuple.h>
 #include <DataTypes/DataTypeArray.h>
 
-#include <Common/ArenaAllocator.h>
-
 namespace DB
 {
 struct Settings;
diff --git a/src/AggregateFunctions/AggregateFunctionRetention.h b/src/AggregateFunctions/AggregateFunctionRetention.h
index 7ecb9509dd5..63ff5921540 100644
--- a/src/AggregateFunctions/AggregateFunctionRetention.h
+++ b/src/AggregateFunctions/AggregateFunctionRetention.h
@@ -8,7 +8,6 @@
 #include <DataTypes/DataTypeArray.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
-#include <Common/ArenaAllocator.h>
 #include <base/range.h>
 #include <bitset>
 
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
index 563d3d6aa8a..f2e17940d35 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
+++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h
@@ -50,7 +50,7 @@ struct AggregateFunctionSequenceMatchData final
     bool sorted = true;
     PODArrayWithStackMemory<TimestampEvents, 64> events_list;
     /// sequenceMatch conditions met at least once in events_list
-    std::bitset<max_events> conditions_met;
+    Events conditions_met;
 
     void add(const Timestamp timestamp, const Events & events)
     {
@@ -101,6 +101,11 @@ struct AggregateFunctionSequenceMatchData final
         size_t size;
         readBinary(size, buf);
 
+        /// If we lose these flags, functionality is broken
+        /// If we serialize/deserialize these flags, we have compatibility issues
+        /// If we set these flags to 1, we have a minor performance penalty, which seems acceptable
+        conditions_met.set();
+
         events_list.clear();
         events_list.reserve(size);
 
diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h
index f1e57608195..89985c0ea6b 100644
--- a/src/AggregateFunctions/AggregateFunctionTopK.h
+++ b/src/AggregateFunctions/AggregateFunctionTopK.h
@@ -2,6 +2,7 @@
 
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
+#include <IO/ReadHelpersArena.h>
 
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypesNumber.h>
diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
index c4a9fa1b936..e83c5277d26 100644
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
@@ -6,7 +6,6 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
-#include <Common/ArenaAllocator.h>
 #include <Common/assert_cast.h>
 
 #include <AggregateFunctions/AggregateFunctionNull.h>
diff --git a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
index 39f57e00c48..2ea01e1d5bc 100644
--- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
+++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
@@ -13,6 +13,7 @@
 
 #include <AggregateFunctions/registerAggregateFunctions.h>
 
+#include <base/scope_guard.h>
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
 try
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 8afb9078fae..7ab0261850b 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -116,6 +116,7 @@ namespace ErrorCodes
     extern const int UNKNOWN_TABLE;
     extern const int ILLEGAL_COLUMN;
     extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
+    extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
 }
 
 /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@@ -4081,12 +4082,12 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
                 if (apply_transformer_was_used || replace_transformer_was_used)
                     continue;
 
-                replace_transformer_was_used = true;
-
                 auto replace_expression = replace_transformer->findReplacementExpression(column_name);
                 if (!replace_expression)
                     continue;
 
+                replace_transformer_was_used = true;
+
                 if (replace_transformer->isStrict())
                     strict_transformer_to_used_column_names[replace_transformer].insert(column_name);
 
@@ -4819,6 +4820,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
                     lambda_expression_untyped->formatASTForErrorMessage(),
                     scope.scope_node->formatASTForErrorMessage());
 
+            if (!parameters.empty())
+            {
+                throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_node.formatASTForErrorMessage());
+            }
+
             auto lambda_expression_clone = lambda_expression_untyped->clone();
 
             IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/);
@@ -4935,9 +4941,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
     }
 
     FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters);
+    bool is_executable_udf = false;
 
     if (!function)
         function = FunctionFactory::instance().tryGet(function_name, scope.context);
+    else
+        is_executable_udf = true;
 
     if (!function)
     {
@@ -4988,6 +4997,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
         return result_projection_names;
     }
 
+    /// Executable UDFs may have parameters. They are checked in UserDefinedExecutableFunctionFactory.
+    if (!parameters.empty() && !is_executable_udf)
+    {
+        throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_name);
+    }
+
     /** For lambda arguments we need to initialize lambda argument types DataTypeFunction using `getLambdaArgumentTypes` function.
       * Then each lambda arguments are initialized with columns, where column source is lambda.
       * This information is important for later steps of query processing.
@@ -6679,7 +6694,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
 
     bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube();
 
-    if (query_node_typed.isGroupByWithGroupingSets() && query_node_typed.isGroupByWithTotals())
+    if (query_node_typed.isGroupByWithGroupingSets()
+        && query_node_typed.isGroupByWithTotals()
+        && query_node_typed.getGroupBy().getNodes().size() != 1)
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and GROUPING SETS are not supported together");
 
     if (query_node_typed.isGroupByWithGroupingSets() && is_rollup_or_cube)
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index ba5ba170427..90333900d4a 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -5,7 +5,7 @@
 #include <Disks/ObjectStorages/S3/copyS3FileToDisk.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
 #include <Interpreters/Context.h>
-#include <IO/BackupsIOThreadPool.h>
+#include <IO/SharedThreadPools.h>
 #include <IO/ReadBufferFromS3.h>
 #include <IO/WriteBufferFromS3.h>
 #include <IO/HTTPHeaderEntries.h>
@@ -65,6 +65,7 @@ namespace
             credentials.GetAWSAccessKeyId(),
             credentials.GetAWSSecretKey(),
             settings.auth_settings.server_side_encryption_customer_key_base64,
+            settings.auth_settings.server_side_encryption_kms_config,
             std::move(headers),
             S3::CredentialsConfiguration
             {
@@ -197,7 +198,7 @@ void BackupWriterS3::copyFileNative(DiskPtr src_disk, const String & src_file_na
         auto object_storage = src_disk->getObjectStorage();
         std::string src_bucket = object_storage->getObjectsNamespace();
         auto file_path = fs::path(s3_uri.key) / dest_file_name;
-        copyS3File(client, src_bucket, objects[0].absolute_path, src_offset, src_size, s3_uri.bucket, file_path, request_settings, {},
+        copyS3File(client, src_bucket, objects[0].remote_path, src_offset, src_size, s3_uri.bucket, file_path, request_settings, {},
                    threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"));
     }
 }
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 76e5ef83e41..9b1968c7d82 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -136,6 +136,7 @@ if (TARGET ch_contrib::hdfs)
 endif()
 
 add_headers_and_sources(dbms Disks/ObjectStorages/Cached)
+add_headers_and_sources(dbms Disks/ObjectStorages/Local)
 add_headers_and_sources(dbms Disks/ObjectStorages/Web)
 
 add_headers_and_sources(dbms Storages/Cache)
@@ -505,6 +506,10 @@ if (TARGET ch_contrib::sqlite)
     dbms_target_link_libraries(PUBLIC ch_contrib::sqlite)
 endif()
 
+if (TARGET ch_contrib::bcrypt)
+    target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::bcrypt)
+endif()
+
 if (TARGET ch_contrib::msgpack)
     target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::msgpack)
 endif()
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index a17e720c1e0..7ae75ba250d 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -34,6 +34,7 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTCreateFunctionQuery.h>
 #include <Parsers/Access/ASTCreateUserQuery.h>
+#include <Parsers/Access/ASTAuthenticationData.h>
 #include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSetQuery.h>
@@ -1612,10 +1613,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
 
     if (const auto * create_user_query = parsed_query->as<ASTCreateUserQuery>())
     {
-        if (!create_user_query->attach && create_user_query->temporary_password_for_checks)
+        if (!create_user_query->attach && create_user_query->auth_data)
         {
-            global_context->getAccessControl().checkPasswordComplexityRules(create_user_query->temporary_password_for_checks.value());
-            create_user_query->temporary_password_for_checks.reset();
+            if (const auto * auth_data = create_user_query->auth_data->as<ASTAuthenticationData>())
+            {
+                auto password = auth_data->getPassword();
+
+                if (password)
+                    global_context->getAccessControl().checkPasswordComplexityRules(*password);
+            }
         }
     }
 
diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp
index fd46b38ada8..e521262acd2 100644
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@@ -385,8 +385,7 @@ void ColumnAggregateFunction::updateHashFast(SipHash & hash) const
 /// threads, so we can't know the size of these data.
 size_t ColumnAggregateFunction::byteSize() const
 {
-    return data.size() * sizeof(data[0])
-            + (my_arena ? my_arena->size() : 0);
+    return data.size() * sizeof(data[0]) + (my_arena ? my_arena->usedBytes() : 0);
 }
 
 size_t ColumnAggregateFunction::byteSizeAt(size_t) const
@@ -395,11 +394,11 @@ size_t ColumnAggregateFunction::byteSizeAt(size_t) const
     return sizeof(data[0]) + func->sizeOfData();
 }
 
-/// Like in byteSize(), the size is underestimated.
+/// Similar to byteSize() the size is underestimated.
+/// In this case it's also overestimated at the same time as it counts all the bytes allocated by the arena, used or not
 size_t ColumnAggregateFunction::allocatedBytes() const
 {
-    return data.allocated_bytes()
-            + (my_arena ? my_arena->size() : 0);
+    return data.allocated_bytes() + (my_arena ? my_arena->allocatedBytes() : 0);
 }
 
 void ColumnAggregateFunction::protect()
diff --git a/src/Common/Arena.h b/src/Common/Arena.h
index 5772dff6bca..0cf343809e8 100644
--- a/src/Common/Arena.h
+++ b/src/Common/Arena.h
@@ -80,7 +80,8 @@ private:
 
     /// Last contiguous MemoryChunk of memory.
     MemoryChunk * head;
-    size_t size_in_bytes;
+    size_t allocated_bytes;
+    size_t used_bytes;
     size_t page_size;
 
     static size_t roundUpToPageSize(size_t s, size_t page_size)
@@ -119,7 +120,7 @@ private:
     void NO_INLINE addMemoryChunk(size_t min_size)
     {
         head = new MemoryChunk(nextSize(min_size + pad_right), head);
-        size_in_bytes += head->size();
+        allocated_bytes += head->size();
     }
 
     friend class ArenaAllocator;
@@ -127,9 +128,12 @@ private:
 
 public:
     explicit Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, size_t linear_growth_threshold_ = 128 * 1024 * 1024)
-        : growth_factor(growth_factor_), linear_growth_threshold(linear_growth_threshold_),
-        head(new MemoryChunk(initial_size_, nullptr)), size_in_bytes(head->size()),
-        page_size(static_cast<size_t>(::getPageSize()))
+        : growth_factor(growth_factor_)
+        , linear_growth_threshold(linear_growth_threshold_)
+        , head(new MemoryChunk(initial_size_, nullptr))
+        , allocated_bytes(head->size())
+        , used_bytes(0)
+        , page_size(static_cast<size_t>(::getPageSize()))
     {
     }
 
@@ -141,6 +145,7 @@ public:
     /// Get piece of memory, without alignment.
     char * alloc(size_t size)
     {
+        used_bytes += size;
         if (unlikely(static_cast<std::ptrdiff_t>(size) > head->end - head->pos))
             addMemoryChunk(size);
 
@@ -153,6 +158,7 @@ public:
     /// Get piece of memory with alignment
     char * alignedAlloc(size_t size, size_t alignment)
     {
+        used_bytes += size;
         do
         {
             void * head_pos = head->pos;
@@ -184,6 +190,7 @@ public:
       */
     void * rollback(size_t size)
     {
+        used_bytes -= size;
         head->pos -= size;
         ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right);
         return head->pos;
@@ -299,11 +306,11 @@ public:
         return res;
     }
 
-    /// Size of MemoryChunks in bytes.
-    size_t size() const
-    {
-        return size_in_bytes;
-    }
+    /// Size of all MemoryChunks in bytes.
+    size_t allocatedBytes() const { return allocated_bytes; }
+
+    /// Total space actually used (not counting padding or space unused by caller allocations) in all MemoryChunks in bytes.
+    size_t usedBytes() const { return used_bytes; }
 
     /// Bad method, don't use it -- the MemoryChunks are not your business, the entire
     /// purpose of the arena code is to manage them for you, so if you find
diff --git a/src/Common/ArenaWithFreeLists.h b/src/Common/ArenaWithFreeLists.h
index 53a59c98299..76760a20320 100644
--- a/src/Common/ArenaWithFreeLists.h
+++ b/src/Common/ArenaWithFreeLists.h
@@ -107,10 +107,7 @@ public:
     }
 
     /// Size of the allocated pool in bytes
-    size_t size() const
-    {
-        return pool.size();
-    }
+    size_t allocatedBytes() const { return pool.allocatedBytes(); }
 };
 
 class SynchronizedArenaWithFreeLists : private ArenaWithFreeLists
@@ -135,10 +132,10 @@ public:
     }
 
     /// Size of the allocated pool in bytes
-    size_t size() const
+    size_t allocatedBytes() const
     {
         std::lock_guard lock{mutex};
-        return ArenaWithFreeLists::size();
+        return ArenaWithFreeLists::allocatedBytes();
     }
 private:
     mutable std::mutex mutex;
diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp
index 6c55bfe3d97..ac2180103c5 100644
--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@@ -67,8 +67,15 @@ AsynchronousMetrics::AsynchronousMetrics(
     openFileIfExists("/proc/uptime", uptime);
     openFileIfExists("/proc/net/dev", net_dev);
 
-    openFileIfExists("/sys/fs/cgroup/memory/memory.limit_in_bytes", cgroupmem_limit_in_bytes);
-    openFileIfExists("/sys/fs/cgroup/memory/memory.usage_in_bytes", cgroupmem_usage_in_bytes);
+    /// CGroups v2
+    openFileIfExists("/sys/fs/cgroup/memory.max", cgroupmem_limit_in_bytes);
+    openFileIfExists("/sys/fs/cgroup/memory.current", cgroupmem_usage_in_bytes);
+
+    /// CGroups v1
+    if (!cgroupmem_limit_in_bytes)
+        openFileIfExists("/sys/fs/cgroup/memory/memory.limit_in_bytes", cgroupmem_limit_in_bytes);
+    if (!cgroupmem_usage_in_bytes)
+        openFileIfExists("/sys/fs/cgroup/memory/memory.usage_in_bytes", cgroupmem_usage_in_bytes);
 
     openSensors();
     openBlockDevices();
@@ -900,33 +907,25 @@ void AsynchronousMetrics::update(TimePoint update_time)
 
     if (cgroupmem_limit_in_bytes && cgroupmem_usage_in_bytes)
     {
-        try {
+        try
+        {
             cgroupmem_limit_in_bytes->rewind();
             cgroupmem_usage_in_bytes->rewind();
 
-            uint64_t cgroup_mem_limit_in_bytes = 0;
-            uint64_t cgroup_mem_usage_in_bytes = 0;
+            uint64_t limit = 0;
+            uint64_t usage = 0;
 
-            readText(cgroup_mem_limit_in_bytes, *cgroupmem_limit_in_bytes);
-            readText(cgroup_mem_usage_in_bytes, *cgroupmem_usage_in_bytes);
+            tryReadText(limit, *cgroupmem_limit_in_bytes);
+            tryReadText(usage, *cgroupmem_usage_in_bytes);
 
-            if (cgroup_mem_limit_in_bytes && cgroup_mem_usage_in_bytes)
-            {
-                new_values["CgroupMemoryTotal"] = { cgroup_mem_limit_in_bytes, "The total amount of memory in cgroup, in bytes." };
-                new_values["CgroupMemoryUsed"] = { cgroup_mem_usage_in_bytes, "The amount of memory used in cgroup, in bytes." };
-            }
-            else
-            {
-                LOG_DEBUG(log, "Cannot read statistics about the cgroup memory total and used. Total got '{}', Used got '{}'.",
-                    cgroup_mem_limit_in_bytes, cgroup_mem_usage_in_bytes);
-            }
+            new_values["CGroupMemoryTotal"] = { limit, "The total amount of memory in cgroup, in bytes. If stated zero, the limit is the same as OSMemoryTotal." };
+            new_values["CGroupMemoryUsed"] = { usage, "The amount of memory used in cgroup, in bytes." };
         }
         catch (...)
         {
             tryLogCurrentException(__PRETTY_FUNCTION__);
         }
     }
-
     if (meminfo)
     {
         try
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 0802941bff6..cfdd78fe788 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -142,6 +142,8 @@
     M(QueryPipelineExecutorThreadsActive, "Number of threads in the PipelineExecutor thread pool running a task.") \
     M(ParquetDecoderThreads, "Number of threads in the ParquetBlockInputFormat thread pool running a task.") \
     M(ParquetDecoderThreadsActive, "Number of threads in the ParquetBlockInputFormat thread pool.") \
+    M(OutdatedPartsLoadingThreads, "Number of threads in the threadpool for loading Outdated data parts.") \
+    M(OutdatedPartsLoadingThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \
     M(DistributedFilesToInsert, "Number of pending files to process for asynchronous insertion into Distributed tables. Number of files for every shard is summed.") \
     M(BrokenDistributedFilesToInsert, "Number of files for asynchronous insertion into Distributed tables that has been marked as broken. This metric will starts from 0 on start. Number of files for every shard is summed.") \
     M(TablesToDropQueueSize, "Number of dropped tables, that are waiting for background data removal.") \
@@ -188,6 +190,7 @@
     M(MergeTreeReadTaskRequestsSent, "The current number of callback requests in flight from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side.") \
     M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.")
 
+
 namespace CurrentMetrics
 {
     #define M(NAME, DOCUMENTATION) extern const Metric NAME = Metric(__COUNTER__);
diff --git a/src/Common/Documentation.cpp b/src/Common/Documentation.cpp
new file mode 100644
index 00000000000..862ecb6054c
--- /dev/null
+++ b/src/Common/Documentation.cpp
@@ -0,0 +1,30 @@
+#include <Common/Documentation.h>
+
+namespace DB
+{
+
+std::string Documentation::examplesAsString() const
+{
+    std::string res;
+    for (const auto & [example_name, example_query] : examples)
+    {
+        res += example_name + ":\n\n";
+        res += "```sql\n";
+        res += example_query + "\n";
+        res += "```\n";
+    }
+    return res;
+}
+
+std::string Documentation::categoriesAsString() const
+{
+    if (categories.empty())
+        return "";
+
+    std::string res = categories[0];
+    for (size_t i = 1; i < categories.size(); ++i)
+        res += ", " + categories[i];
+    return res;
+}
+
+}
diff --git a/src/Common/Documentation.h b/src/Common/Documentation.h
index 0b0eacbeccd..c71aa8772ed 100644
--- a/src/Common/Documentation.h
+++ b/src/Common/Documentation.h
@@ -42,27 +42,44 @@ namespace DB
   *
   * Documentation does not support multiple languages.
   * The only available language is English.
+  *
+  * TODO: Allow to specify Syntax, Argument(s) and a Returned Value.
+  * TODO: Organize Examples as a struct of ExampleName, ExampleQuery and ExampleResult.
   */
 struct Documentation
 {
     using Description = std::string;
+
+    using Syntax = std::string;
+
+    using Argument = std::string;
+    using Arguments = std::vector<Argument>;
+
+    using ReturnedValue = std::string;
+
     using ExampleName = std::string;
     using ExampleQuery = std::string;
     using Examples = std::map<ExampleName, ExampleQuery>;
+
     using Category = std::string;
     using Categories = std::vector<Category>;
 
+    using Related = std::string;
+
     Description description;
     Examples examples;
     Categories categories;
 
-    Documentation(Description description_) : description(std::move(description_)) {}
+    Documentation(Description description_) : description(std::move(description_)) {} /// NOLINT
     Documentation(Description description_, Examples examples_) : description(std::move(description_)), examples(std::move(examples_)) {}
     Documentation(Description description_, Examples examples_, Categories categories_)
         : description(std::move(description_)), examples(std::move(examples_)), categories(std::move(categories_)) {}
 
     /// TODO: Please remove this constructor. Documentation should always be non-empty.
-    Documentation() {}
+    Documentation() = default;
+
+    std::string examplesAsString() const;
+    std::string categoriesAsString() const;
 };
 
 }
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index d570eab8f18..9e747226b8a 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -634,7 +634,6 @@
     M(663, INCONSISTENT_METADATA_FOR_BACKUP) \
     M(664, ACCESS_STORAGE_DOESNT_ALLOW_BACKUP) \
     M(665, CANNOT_CONNECT_NATS) \
-    M(666, CANNOT_USE_CACHE) \
     M(667, NOT_INITIALIZED) \
     M(668, INVALID_STATE) \
     M(669, NAMED_COLLECTION_DOESNT_EXIST) \
diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index 7e7ccfa4877..20206b76225 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -15,6 +15,7 @@
 #include <Common/formatReadable.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/ErrorCodes.h>
+#include <Common/MemorySanitizer.h>
 #include <Common/SensitiveDataMasker.h>
 #include <Common/LockMemoryExceptionInThread.h>
 #include <filesystem>
@@ -96,7 +97,10 @@ Exception::Exception(CreateFromPocoTag, const Poco::Exception & exc)
     : Poco::Exception(exc.displayText(), ErrorCodes::POCO_EXCEPTION)
 {
 #ifdef STD_EXCEPTION_HAS_STACK_TRACE
-    set_stack_trace(exc.get_stack_trace_frames(), exc.get_stack_trace_size());
+    auto * stack_trace_frames = exc.get_stack_trace_frames();
+    auto stack_trace_size = exc.get_stack_trace_size();
+    __msan_unpoison(stack_trace_frames, stack_trace_size * sizeof(stack_trace_frames[0]));
+    set_stack_trace(stack_trace_frames, stack_trace_size);
 #endif
 }
 
@@ -104,7 +108,10 @@ Exception::Exception(CreateFromSTDTag, const std::exception & exc)
     : Poco::Exception(demangle(typeid(exc).name()) + ": " + String(exc.what()), ErrorCodes::STD_EXCEPTION)
 {
 #ifdef STD_EXCEPTION_HAS_STACK_TRACE
-    set_stack_trace(exc.get_stack_trace_frames(), exc.get_stack_trace_size());
+    auto * stack_trace_frames = exc.get_stack_trace_frames();
+    auto stack_trace_size = exc.get_stack_trace_size();
+    __msan_unpoison(stack_trace_frames, stack_trace_size * sizeof(stack_trace_frames[0]));
+    set_stack_trace(stack_trace_frames, stack_trace_size);
 #endif
 }
 
@@ -112,7 +119,10 @@ Exception::Exception(CreateFromSTDTag, const std::exception & exc)
 std::string getExceptionStackTraceString(const std::exception & e)
 {
 #ifdef STD_EXCEPTION_HAS_STACK_TRACE
-    return StackTrace::toString(e.get_stack_trace_frames(), 0, e.get_stack_trace_size());
+    auto * stack_trace_frames = e.get_stack_trace_frames();
+    auto stack_trace_size = e.get_stack_trace_size();
+    __msan_unpoison(stack_trace_frames, stack_trace_size * sizeof(stack_trace_frames[0]));
+    return StackTrace::toString(stack_trace_frames, 0, stack_trace_size);
 #else
     if (const auto * db_exception = dynamic_cast<const Exception *>(&e))
         return db_exception->getStackTraceString();
@@ -140,7 +150,10 @@ std::string getExceptionStackTraceString(std::exception_ptr e)
 std::string Exception::getStackTraceString() const
 {
 #ifdef STD_EXCEPTION_HAS_STACK_TRACE
-    return StackTrace::toString(get_stack_trace_frames(), 0, get_stack_trace_size());
+    auto * stack_trace_frames = get_stack_trace_frames();
+    auto stack_trace_size = get_stack_trace_size();
+    __msan_unpoison(stack_trace_frames, stack_trace_size * sizeof(stack_trace_frames[0]));
+    return StackTrace::toString(stack_trace_frames, 0, stack_trace_size);
 #else
     return trace.toString();
 #endif
@@ -156,6 +169,7 @@ Exception::FramePointers Exception::getStackFramePointers() const
         {
             frame_pointers[i] = get_stack_trace_frames()[i];
         }
+        __msan_unpoison(frame_pointers.data(), frame_pointers.size() * sizeof(frame_pointers[0]));
     }
 #else
     {
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index 387eafdc145..3d3abab93d5 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -10,6 +10,7 @@
     M(InsertQuery, "Same as Query, but only for INSERT queries.") \
     M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \
     M(AsyncInsertBytes, "Data size in bytes of asynchronous INSERT queries.") \
+    M(AsyncInsertRows, "Number of rows inserted by asynchronous INSERT queries.") \
     M(AsyncInsertCacheHits, "Number of times a duplicate hash id has been found in asynchronous INSERT hash id cache.") \
     M(FailedQuery, "Number of failed queries.") \
     M(FailedSelectQuery, "Same as FailedQuery, but only for SELECT queries.") \
diff --git a/src/Common/TLDListsHolder.h b/src/Common/TLDListsHolder.h
index 5ea8c5afe9f..be399843c08 100644
--- a/src/Common/TLDListsHolder.h
+++ b/src/Common/TLDListsHolder.h
@@ -3,7 +3,6 @@
 #include <base/defines.h>
 #include <base/StringRef.h>
 #include <Common/HashTable/StringHashMap.h>
-#include <Common/Arena.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <mutex>
 #include <string>
@@ -11,6 +10,7 @@
 
 namespace DB
 {
+class Arena;
 
 enum TLDType
 {
diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h
index 172714fe04f..b09f096d761 100644
--- a/src/Common/ZooKeeper/IKeeper.h
+++ b/src/Common/ZooKeeper/IKeeper.h
@@ -319,6 +319,9 @@ struct CheckRequest : virtual Request
     String path;
     int32_t version = -1;
 
+    /// should it check if a node DOES NOT exist
+    bool not_exists = false;
+
     void addRootPath(const String & root_path) override;
     String getPath() const override { return path; }
 
@@ -524,7 +527,7 @@ public:
         const Requests & requests,
         MultiCallback callback) = 0;
 
-    virtual DB::KeeperApiVersion getApiVersion() = 0;
+    virtual DB::KeeperApiVersion getApiVersion() const = 0;
 
     /// Expire session and finish all pending requests
     virtual void finalize(const String & reason) = 0;
diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h
index fb4e527e50e..27405d8d571 100644
--- a/src/Common/ZooKeeper/TestKeeper.h
+++ b/src/Common/ZooKeeper/TestKeeper.h
@@ -91,7 +91,7 @@ public:
 
     void finalize(const String & reason) override;
 
-    DB::KeeperApiVersion getApiVersion() override
+    DB::KeeperApiVersion getApiVersion() const override
     {
         return KeeperApiVersion::ZOOKEEPER_COMPATIBLE;
     }
diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp
index a8da0dff0cc..09047b5b232 100644
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@@ -342,6 +342,31 @@ void ZooKeeper::createAncestors(const std::string & path)
     }
 }
 
+void ZooKeeper::checkExistsAndGetCreateAncestorsOps(const std::string & path, Coordination::Requests & requests)
+{
+    std::vector<std::string> paths_to_check;
+    size_t pos = 1;
+    while (true)
+    {
+        pos = path.find('/', pos);
+        if (pos == std::string::npos)
+            break;
+        paths_to_check.emplace_back(path.substr(0, pos));
+        ++pos;
+    }
+
+    MultiExistsResponse response = exists(paths_to_check);
+
+    for (size_t i = 0; i < paths_to_check.size(); ++i)
+    {
+        if (response[i].error != Coordination::Error::ZOK)
+        {
+            /// Ephemeral nodes cannot have children
+            requests.emplace_back(makeCreateRequest(paths_to_check[i], "", CreateMode::Persistent));
+        }
+    }
+}
+
 Coordination::Error ZooKeeper::removeImpl(const std::string & path, int32_t version)
 {
     auto future_result = asyncTryRemoveNoThrow(path, version);
@@ -821,7 +846,7 @@ bool ZooKeeper::expired()
     return impl->isExpired();
 }
 
-DB::KeeperApiVersion ZooKeeper::getApiVersion()
+DB::KeeperApiVersion ZooKeeper::getApiVersion() const
 {
     return impl->getApiVersion();
 }
@@ -1282,7 +1307,6 @@ Coordination::RequestPtr makeExistsRequest(const std::string & path)
     return request;
 }
 
-
 std::string normalizeZooKeeperPath(std::string zookeeper_path, bool check_starts_with_slash, Poco::Logger * log)
 {
     if (!zookeeper_path.empty() && zookeeper_path.back() == '/')
diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h
index 8e7639b8cc1..ca6a44c4cbc 100644
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@@ -215,7 +215,7 @@ public:
     /// Returns true, if the session has expired.
     bool expired();
 
-    DB::KeeperApiVersion getApiVersion();
+    DB::KeeperApiVersion getApiVersion() const;
 
     /// Create a znode.
     /// Throw an exception if something went wrong.
@@ -237,6 +237,8 @@ public:
     /// Does not create the node itself.
     void createAncestors(const std::string & path);
 
+    void checkExistsAndGetCreateAncestorsOps(const std::string & path, Coordination::Requests & requests);
+
     /// Remove the node if the version matches. (if version == -1, remove any version).
     void remove(const std::string & path, int32_t version = -1);
 
@@ -522,8 +524,6 @@ public:
     void setServerCompletelyStarted();
 
 private:
-    friend class EphemeralNodeHolder;
-
     void init(ZooKeeperArgs args_);
 
     /// The following methods don't any throw exceptions but return error codes.
@@ -674,4 +674,20 @@ bool hasZooKeeperConfig(const Poco::Util::AbstractConfiguration & config);
 
 String getZooKeeperConfigName(const Poco::Util::AbstractConfiguration & config);
 
+template <typename Client>
+void addCheckNotExistsRequest(Coordination::Requests & requests, const Client & client, const std::string & path)
+{
+    if (client.getApiVersion() >= DB::KeeperApiVersion::WITH_CHECK_NOT_EXISTS)
+    {
+        auto request = std::make_shared<Coordination::CheckRequest>();
+        request->path = path;
+        request->not_exists = true;
+        requests.push_back(std::move(request));
+        return;
+    }
+
+    requests.push_back(makeCreateRequest(path, "", zkutil::CreateMode::Persistent));
+    requests.push_back(makeRemoveRequest(path, -1));
+}
+
 }
diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
index 1ee56936889..5031af38812 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp
@@ -666,7 +666,15 @@ ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return setTime(
 ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSetResponse>()); }
 ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperListResponse>()); }
 ZooKeeperResponsePtr ZooKeeperSimpleListRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSimpleListResponse>()); }
-ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperCheckResponse>()); }
+
+ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const
+{
+    if (not_exists)
+        return setTime(std::make_shared<ZooKeeperCheckNotExistsResponse>());
+
+    return setTime(std::make_shared<ZooKeeperCheckResponse>());
+}
+
 ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const
 {
     std::shared_ptr<ZooKeeperMultiResponse> response;
@@ -931,6 +939,8 @@ void registerZooKeeperRequest(ZooKeeperRequestFactory & factory)
             res->operation_type = ZooKeeperMultiRequest::OperationType::Read;
         else if constexpr (num == OpNum::Multi)
             res->operation_type = ZooKeeperMultiRequest::OperationType::Write;
+        else if constexpr (num == OpNum::CheckNotExists)
+            res->not_exists = true;
 
         return res;
     });
@@ -956,6 +966,7 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory()
     registerZooKeeperRequest<OpNum::GetACL, ZooKeeperGetACLRequest>(*this);
     registerZooKeeperRequest<OpNum::SetACL, ZooKeeperSetACLRequest>(*this);
     registerZooKeeperRequest<OpNum::FilteredList, ZooKeeperFilteredListRequest>(*this);
+    registerZooKeeperRequest<OpNum::CheckNotExists, ZooKeeperCheckRequest>(*this);
 }
 
 PathMatchResult matchPath(std::string_view path, std::string_view match_to)
diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h
index 1755ebd8ccc..5f00698423e 100644
--- a/src/Common/ZooKeeper/ZooKeeperCommon.h
+++ b/src/Common/ZooKeeper/ZooKeeperCommon.h
@@ -390,12 +390,12 @@ struct ZooKeeperSimpleListResponse final : ZooKeeperListResponse
     size_t bytesSize() const override { return ZooKeeperListResponse::bytesSize() - sizeof(stat); }
 };
 
-struct ZooKeeperCheckRequest final : CheckRequest, ZooKeeperRequest
+struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest
 {
     ZooKeeperCheckRequest() = default;
     explicit ZooKeeperCheckRequest(const CheckRequest & base) : CheckRequest(base) {}
 
-    OpNum getOpNum() const override { return OpNum::Check; }
+    OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; }
     void writeImpl(WriteBuffer & out) const override;
     void readImpl(ReadBuffer & in) override;
     std::string toStringImpl() const override;
@@ -408,7 +408,7 @@ struct ZooKeeperCheckRequest final : CheckRequest, ZooKeeperRequest
     void createLogElements(LogElements & elems) const override;
 };
 
-struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse
+struct ZooKeeperCheckResponse : CheckResponse, ZooKeeperResponse
 {
     void readImpl(ReadBuffer &) override {}
     void writeImpl(WriteBuffer &) const override {}
@@ -417,6 +417,12 @@ struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse
     size_t bytesSize() const override { return CheckResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
 };
 
+struct ZooKeeperCheckNotExistsResponse : public ZooKeeperCheckResponse
+{
+    OpNum getOpNum() const override { return OpNum::CheckNotExists; }
+    using ZooKeeperCheckResponse::ZooKeeperCheckResponse;
+};
+
 /// This response may be received only as an element of responses in MultiResponse.
 struct ZooKeeperErrorResponse final : ErrorResponse, ZooKeeperResponse
 {
diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.cpp b/src/Common/ZooKeeper/ZooKeeperConstants.cpp
index c2e4c0f5cbd..86f70ea547a 100644
--- a/src/Common/ZooKeeper/ZooKeeperConstants.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.cpp
@@ -26,6 +26,7 @@ static const std::unordered_set<int32_t> VALID_OPERATIONS =
     static_cast<int32_t>(OpNum::SetACL),
     static_cast<int32_t>(OpNum::GetACL),
     static_cast<int32_t>(OpNum::FilteredList),
+    static_cast<int32_t>(OpNum::CheckNotExists),
 };
 
 std::string toString(OpNum op_num)
@@ -70,6 +71,8 @@ std::string toString(OpNum op_num)
             return "GetACL";
         case OpNum::FilteredList:
             return "FilteredList";
+        case OpNum::CheckNotExists:
+            return "CheckNotExists";
     }
     int32_t raw_op = static_cast<int32_t>(op_num);
     throw Exception("Operation " + std::to_string(raw_op) + " is unknown", Error::ZUNIMPLEMENTED);
diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h
index 912e253718b..6b50c5c5d09 100644
--- a/src/Common/ZooKeeper/ZooKeeperConstants.h
+++ b/src/Common/ZooKeeper/ZooKeeperConstants.h
@@ -36,6 +36,7 @@ enum class OpNum : int32_t
 
     // CH Keeper specific operations
     FilteredList = 500,
+    CheckNotExists = 501,
 
     SessionID = 997, /// Special internal request
 };
diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
index f97bf292198..6c79fc4f178 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@@ -1085,7 +1085,7 @@ void ZooKeeper::pushRequest(RequestInfo && info)
     ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions);
 }
 
-KeeperApiVersion ZooKeeper::getApiVersion()
+KeeperApiVersion ZooKeeper::getApiVersion() const
 {
     return keeper_api_version;
 }
diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h
index 9fff12309bd..c0c57d3f719 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.h
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.h
@@ -179,7 +179,7 @@ public:
         const Requests & requests,
         MultiCallback callback) override;
 
-    DB::KeeperApiVersion getApiVersion() override;
+    DB::KeeperApiVersion getApiVersion() const override;
 
     /// Without forcefully invalidating (finalizing) ZooKeeper session before
     /// establishing a new one, there was a possibility that server is using
diff --git a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
index 130590ceb40..a0b8527f480 100644
--- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
+++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
@@ -6,6 +6,7 @@
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Common/logger_useful.h>
 #include <Common/randomSeed.h>
+#include "Coordination/KeeperConstants.h"
 
 namespace DB
 {
@@ -257,19 +258,22 @@ public:
 
     Coordination::Error tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & path_created)
     {
+        path_created.clear();
+
         auto error = access(
             "tryCreate",
             path,
             [&]() { return keeper->tryCreate(path, data, mode, path_created); },
-            [&](Coordination::Error &)
+            [&](Coordination::Error & code)
             {
                 try
                 {
-                    if (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral)
+                    if (!path_created.empty() && (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral))
                     {
-                        keeper->remove(path);
+                        keeper->remove(path_created);
                         if (unlikely(logger))
-                            LOG_TRACE(logger, "ZooKeeperWithFaultInjection cleanup: seed={} func={} path={}", seed, "tryCreate", path);
+                            LOG_TRACE(logger, "ZooKeeperWithFaultInjection cleanup: seed={} func={} path={} path_created={} code={}",
+                                seed, "tryCreate", path, path_created, code);
                     }
                 }
                 catch (const zkutil::KeeperException & e)
@@ -277,10 +281,11 @@ public:
                     if (unlikely(logger))
                         LOG_TRACE(
                             logger,
-                            "ZooKeeperWithFaultInjection cleanup FAILED: seed={} func={} path={} code={} message={} ",
+                            "ZooKeeperWithFaultInjection cleanup FAILED: seed={} func={} path={} path_created={} code={} message={} ",
                             seed,
                             "tryCreate",
                             path,
+                            path_created,
                             e.code,
                             e.message());
                 }
@@ -289,8 +294,8 @@ public:
         /// collect ephemeral nodes when no fault was injected (to clean up later)
         if (unlikely(fault_policy))
         {
-            if (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral)
-                ephemeral_nodes.push_back(path);
+            if (!path_created.empty() && (mode == zkutil::CreateMode::EphemeralSequential || mode == zkutil::CreateMode::Ephemeral))
+                ephemeral_nodes.push_back(path_created);
         }
 
         return error;
@@ -356,6 +361,10 @@ public:
         return access("trySet", path, [&]() { return keeper->trySet(path, data, version, stat); });
     }
 
+    void checkExistsAndGetCreateAncestorsOps(const std::string & path, Coordination::Requests & requests)
+    {
+        return access("checkExistsAndGetCreateAncestorsOps", path, [&]() { return keeper->checkExistsAndGetCreateAncestorsOps(path, requests); });
+    }
 
     void handleEphemeralNodeExistenceNoFailureInjection(const std::string & path, const std::string & fast_delete_if_equal_value)
     {
@@ -381,6 +390,11 @@ public:
         ephemeral_nodes.clear();
     }
 
+    KeeperApiVersion getApiVersion() const
+    {
+        return keeper->getApiVersion();
+    }
+
 private:
     void faultInjectionBefore(std::function<void()> fault_cleanup)
     {
diff --git a/src/Common/config.h.in b/src/Common/config.h.in
index d5080ba447e..6e3e53cc1bf 100644
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@@ -57,3 +57,4 @@
 #cmakedefine01 USE_SKIM
 #cmakedefine01 USE_OPENSSL_INTREE
 #cmakedefine01 USE_ULID
+#cmakedefine01 USE_BCRYPT
diff --git a/src/Common/examples/arena_with_free_lists.cpp b/src/Common/examples/arena_with_free_lists.cpp
index 4f209ccb5b2..3f1b3e88328 100644
--- a/src/Common/examples/arena_with_free_lists.cpp
+++ b/src/Common/examples/arena_with_free_lists.cpp
@@ -270,7 +270,7 @@ int main(int argc, char ** argv)
 
         watch.stop();
         std::cerr
-            << "Insert info arena. Bytes: " << arena.size()
+            << "Insert info arena. Bytes: " << arena.allocatedBytes()
             << ", elapsed: " << watch.elapsedSeconds()
             << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.,"
             << " " << sum_strings_size / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"
@@ -298,7 +298,7 @@ int main(int argc, char ** argv)
 
         watch.stop();
         std::cerr
-            << "Randomly remove and insert elements. Bytes: " << arena.size()
+            << "Randomly remove and insert elements. Bytes: " << arena.allocatedBytes()
             << ", elapsed: " << watch.elapsedSeconds()
             << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.,"
             << " " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"
@@ -331,7 +331,7 @@ int main(int argc, char ** argv)
 
         watch.stop();
         std::cerr
-            << "Filling cache. Bytes: " << arena.size()
+            << "Filling cache. Bytes: " << arena.allocatedBytes()
             << ", elapsed: " << watch.elapsedSeconds()
             << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.,"
             << " " << bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)"
diff --git a/src/Common/examples/average.cpp b/src/Common/examples/average.cpp
index d7e2344dc38..f281abdced2 100644
--- a/src/Common/examples/average.cpp
+++ b/src/Common/examples/average.cpp
@@ -473,7 +473,8 @@ Float NO_INLINE buffered(const PODArray<UInt8> & keys, const PODArray<Float> & v
     return map[0].result();
 }
 
-
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wframe-larger-than"
 template <size_t UNROLL_COUNT>
 Float NO_INLINE really_unrolled(const PODArray<UInt8> & keys, const PODArray<Float> & values)
 {
@@ -496,6 +497,7 @@ Float NO_INLINE really_unrolled(const PODArray<UInt8> & keys, const PODArray<Flo
 
     return map[0].result();
 }
+#pragma clang diagnostic pop
 
 
 struct State4
diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp
index 29d90b7dbd6..70ae8e54d97 100644
--- a/src/Compression/CompressionCodecDeflateQpl.cpp
+++ b/src/Compression/CompressionCodecDeflateQpl.cpp
@@ -7,6 +7,7 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Poco/Logger.h>
 #include <Common/logger_useful.h>
+#include "libaccel_config.h"
 
 namespace DB
 {
@@ -16,11 +17,6 @@ namespace ErrorCodes
     extern const int CANNOT_DECOMPRESS;
 }
 
-std::array<qpl_job *, DeflateQplJobHWPool::MAX_HW_JOB_NUMBER> DeflateQplJobHWPool::hw_job_ptr_pool;
-std::array<std::atomic_bool, DeflateQplJobHWPool::MAX_HW_JOB_NUMBER> DeflateQplJobHWPool::hw_job_ptr_locks;
-bool DeflateQplJobHWPool::job_pool_ready = false;
-std::unique_ptr<uint8_t[]> DeflateQplJobHWPool::hw_jobs_buffer;
-
 DeflateQplJobHWPool & DeflateQplJobHWPool::instance()
 {
     static DeflateQplJobHWPool pool;
@@ -28,47 +24,69 @@ DeflateQplJobHWPool & DeflateQplJobHWPool::instance()
 }
 
 DeflateQplJobHWPool::DeflateQplJobHWPool()
-    : random_engine(std::random_device()())
-    , distribution(0, MAX_HW_JOB_NUMBER - 1)
+    : max_hw_jobs(0)
+    , random_engine(std::random_device()())
 {
     Poco::Logger * log = &Poco::Logger::get("DeflateQplJobHWPool");
-    UInt32 job_size = 0;
     const char * qpl_version = qpl_get_library_version();
 
-    /// Get size required for saving a single qpl job object
-    qpl_get_job_size(qpl_path_hardware, &job_size);
-    /// Allocate entire buffer for storing all job objects
-    hw_jobs_buffer = std::make_unique<uint8_t[]>(job_size * MAX_HW_JOB_NUMBER);
-    /// Initialize pool for storing all job object pointers
-    /// Reallocate buffer by shifting address offset for each job object.
-    for (UInt32 index = 0; index < MAX_HW_JOB_NUMBER; ++index)
+    // loop all configured workqueue size to get maximum job number.
+    accfg_ctx * ctx_ptr = nullptr;
+    auto ctx_status = accfg_new(&ctx_ptr);
+    if (ctx_status == 0)
     {
-        qpl_job * qpl_job_ptr = reinterpret_cast<qpl_job *>(hw_jobs_buffer.get() + index * job_size);
-        if (auto status = qpl_init_job(qpl_path_hardware, qpl_job_ptr); status != QPL_STS_OK)
+        auto * dev_ptr = accfg_device_get_first(ctx_ptr);
+        while (dev_ptr != nullptr)
+        {
+            for (auto * wq_ptr = accfg_wq_get_first(dev_ptr); wq_ptr != nullptr; wq_ptr = accfg_wq_get_next(wq_ptr))
+                max_hw_jobs += accfg_wq_get_size(wq_ptr);
+            dev_ptr = accfg_device_get_next(dev_ptr);
+        }
+    }
+    else
+    {
+        job_pool_ready = false;
+        LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Failed to create new libaccel_config context -> status: {}, QPL Version: {}.", ctx_status, qpl_version);
+        return;
+    }
+
+    if (max_hw_jobs == 0)
+    {
+        job_pool_ready = false;
+        LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Failed to get available workqueue size -> total_wq_size: {}, QPL Version: {}.", max_hw_jobs, qpl_version);
+        return;
+    }
+    distribution = std::uniform_int_distribution<int>(0, max_hw_jobs - 1);
+    /// Get size required for saving a single qpl job object
+    qpl_get_job_size(qpl_path_hardware, &per_job_size);
+    /// Allocate job buffer pool for storing all job objects
+    hw_jobs_buffer = std::make_unique<uint8_t[]>(per_job_size * max_hw_jobs);
+    hw_job_ptr_locks = std::make_unique<std::atomic_bool[]>(max_hw_jobs);
+    /// Initialize all job objects in job buffer pool
+    for (UInt32 index = 0; index < max_hw_jobs; ++index)
+    {
+        qpl_job * job_ptr = reinterpret_cast<qpl_job *>(hw_jobs_buffer.get() + index * per_job_size);
+        if (auto status = qpl_init_job(qpl_path_hardware, job_ptr); status != QPL_STS_OK)
         {
             job_pool_ready = false;
-            LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed: {} , falling back to software DeflateQpl codec. Please check if Intel In-Memory Analytics Accelerator (IAA) is properly set up. QPL Version: {}.", static_cast<UInt32>(status), qpl_version);
+            LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Failed to Initialize qpl job -> status: {}, QPL Version: {}.", static_cast<UInt32>(status), qpl_version);
             return;
         }
-        hw_job_ptr_pool[index] = qpl_job_ptr;
         unLockJob(index);
     }
 
     job_pool_ready = true;
-    LOG_DEBUG(log, "Hardware-assisted DeflateQpl codec is ready! QPL Version: {}",qpl_version);
+    LOG_DEBUG(log, "Hardware-assisted DeflateQpl codec is ready! QPL Version: {}, max_hw_jobs: {}",qpl_version, max_hw_jobs);
 }
 
 DeflateQplJobHWPool::~DeflateQplJobHWPool()
 {
-    for (UInt32 i = 0; i < MAX_HW_JOB_NUMBER; ++i)
+    for (UInt32 i = 0; i < max_hw_jobs; ++i)
     {
-        if (hw_job_ptr_pool[i])
-        {
-            while (!tryLockJob(i));
-            qpl_fini_job(hw_job_ptr_pool[i]);
-            unLockJob(i);
-            hw_job_ptr_pool[i] = nullptr;
-        }
+        qpl_job * job_ptr = reinterpret_cast<qpl_job *>(hw_jobs_buffer.get() + i * per_job_size);
+        while (!tryLockJob(i));
+        qpl_fini_job(job_ptr);
+        unLockJob(i);
     }
     job_pool_ready = false;
 }
@@ -83,14 +101,14 @@ qpl_job * DeflateQplJobHWPool::acquireJob(UInt32 & job_id)
         {
             index = distribution(random_engine);
             retry++;
-            if (retry > MAX_HW_JOB_NUMBER)
+            if (retry > max_hw_jobs)
             {
                 return nullptr;
             }
         }
-        job_id = MAX_HW_JOB_NUMBER - index;
-        assert(index < MAX_HW_JOB_NUMBER);
-        return hw_job_ptr_pool[index];
+        job_id = max_hw_jobs - index;
+        assert(index < max_hw_jobs);
+        return reinterpret_cast<qpl_job *>(hw_jobs_buffer.get() + index * per_job_size);
     }
     else
         return nullptr;
@@ -99,19 +117,19 @@ qpl_job * DeflateQplJobHWPool::acquireJob(UInt32 & job_id)
 void DeflateQplJobHWPool::releaseJob(UInt32 job_id)
 {
     if (isJobPoolReady())
-        unLockJob(MAX_HW_JOB_NUMBER - job_id);
+        unLockJob(max_hw_jobs - job_id);
 }
 
 bool DeflateQplJobHWPool::tryLockJob(UInt32 index)
 {
     bool expected = false;
-    assert(index < MAX_HW_JOB_NUMBER);
+    assert(index < max_hw_jobs);
     return hw_job_ptr_locks[index].compare_exchange_strong(expected, true);
 }
 
 void DeflateQplJobHWPool::unLockJob(UInt32 index)
 {
-    assert(index < MAX_HW_JOB_NUMBER);
+    assert(index < max_hw_jobs);
     hw_job_ptr_locks[index].store(false);
 }
 
diff --git a/src/Compression/CompressionCodecDeflateQpl.h b/src/Compression/CompressionCodecDeflateQpl.h
index 3171a898311..7a1a764295d 100644
--- a/src/Compression/CompressionCodecDeflateQpl.h
+++ b/src/Compression/CompressionCodecDeflateQpl.h
@@ -24,22 +24,23 @@ public:
     static DeflateQplJobHWPool & instance();
 
     qpl_job * acquireJob(UInt32 & job_id);
-    static void releaseJob(UInt32 job_id);
-    static const bool & isJobPoolReady() { return job_pool_ready; }
+    void releaseJob(UInt32 job_id);
+    const bool & isJobPoolReady() { return job_pool_ready; }
 
 private:
-    static bool tryLockJob(UInt32 index);
-    static void unLockJob(UInt32 index);
+    bool tryLockJob(UInt32 index);
+    void unLockJob(UInt32 index);
 
+    /// size of each job objects
+    UInt32 per_job_size;
     /// Maximum jobs running in parallel supported by IAA hardware
-    static constexpr auto MAX_HW_JOB_NUMBER = 1024;
+    UInt32 max_hw_jobs;
     /// Entire buffer for storing all job objects
-    static std::unique_ptr<uint8_t[]> hw_jobs_buffer;
-    /// Job pool for storing all job object pointers
-    static std::array<qpl_job *, MAX_HW_JOB_NUMBER> hw_job_ptr_pool;
+    std::unique_ptr<uint8_t[]> hw_jobs_buffer;
     /// Locks for accessing each job object pointers
-    static std::array<std::atomic_bool, MAX_HW_JOB_NUMBER> hw_job_ptr_locks;
-    static bool job_pool_ready;
+    std::unique_ptr<std::atomic_bool[]> hw_job_ptr_locks;
+
+    bool job_pool_ready;
     std::mt19937 random_engine;
     std::uniform_int_distribution<int> distribution;
 };
diff --git a/src/Coordination/KeeperConstants.h b/src/Coordination/KeeperConstants.h
index 952689af01f..4b5a5b54be0 100644
--- a/src/Coordination/KeeperConstants.h
+++ b/src/Coordination/KeeperConstants.h
@@ -9,10 +9,11 @@ enum class KeeperApiVersion : uint8_t
 {
     ZOOKEEPER_COMPATIBLE = 0,
     WITH_FILTERED_LIST,
-    WITH_MULTI_READ
+    WITH_MULTI_READ,
+    WITH_CHECK_NOT_EXISTS,
 };
 
-inline constexpr auto current_keeper_api_version = KeeperApiVersion::WITH_MULTI_READ;
+inline constexpr auto current_keeper_api_version = KeeperApiVersion::WITH_CHECK_NOT_EXISTS;
 
 const std::string keeper_system_path = "/keeper";
 const std::string keeper_api_version_path = keeper_system_path + "/api_version";
diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp
index 9cec0f10668..ffb3a0bc18b 100644
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@@ -102,6 +102,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
             credentials.GetAWSAccessKeyId(),
             credentials.GetAWSSecretKey(),
             auth_settings.server_side_encryption_customer_key_base64,
+            auth_settings.server_side_encryption_kms_config,
             std::move(headers),
             S3::CredentialsConfiguration
             {
diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp
index cfc1c2bd12b..a838de07ecb 100644
--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@@ -1449,24 +1449,39 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc
 
 struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor
 {
-    bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override
+    explicit KeeperStorageCheckRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_)
+        : KeeperStorageRequestProcessor(zk_request_)
     {
-        return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local);
+        check_not_exists = zk_request->getOpNum() == Coordination::OpNum::CheckNotExists;
+    }
+
+    bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override
+    {
+        auto path = zk_request->getPath();
+        return storage.checkACL(check_not_exists ? parentPath(path) : path, Coordination::ACL::Read, session_id, is_local);
     }
 
-    using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor;
     std::vector<KeeperStorage::Delta>
     preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override
     {
         ProfileEvents::increment(ProfileEvents::KeeperCheckRequest);
+
         Coordination::ZooKeeperCheckRequest & request = dynamic_cast<Coordination::ZooKeeperCheckRequest &>(*zk_request);
 
-        if (!storage.uncommitted_state.getNode(request.path))
-            return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}};
-
         auto node = storage.uncommitted_state.getNode(request.path);
-        if (request.version != -1 && request.version != node->stat.version)
-            return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}};
+        if (check_not_exists)
+        {
+            if (node && (request.version == -1 || request.version == node->stat.version))
+                return {KeeperStorage::Delta{zxid, Coordination::Error::ZNODEEXISTS}};
+        }
+        else
+        {
+            if (!node)
+                return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}};
+
+            if (request.version != -1 && request.version != node->stat.version)
+                return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}};
+        }
 
         return {};
     }
@@ -1497,17 +1512,22 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro
 
         auto & container = storage.container;
         auto node_it = container.find(request.path);
-        if (node_it == container.end())
+
+        if (check_not_exists)
         {
-            on_error(Coordination::Error::ZNONODE);
-        }
-        else if (request.version != -1 && request.version != node_it->value.stat.version)
-        {
-            on_error(Coordination::Error::ZBADVERSION);
+            if (node_it != container.end() && (request.version == -1 || request.version == node_it->value.stat.version))
+                on_error(Coordination::Error::ZNODEEXISTS);
+            else
+                response.error = Coordination::Error::ZOK;
         }
         else
         {
-            response.error = Coordination::Error::ZOK;
+            if (node_it == container.end())
+                on_error(Coordination::Error::ZNONODE);
+            else if (request.version != -1 && request.version != node_it->value.stat.version)
+                on_error(Coordination::Error::ZBADVERSION);
+            else
+                response.error = Coordination::Error::ZOK;
         }
 
         return response_ptr;
@@ -1523,6 +1543,9 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro
         ProfileEvents::increment(ProfileEvents::KeeperCheckRequest);
         return processImpl<true>(storage, zxid);
     }
+
+private:
+    bool check_not_exists;
 };
 
 
@@ -1716,6 +1739,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro
                     concrete_requests.push_back(std::make_shared<KeeperStorageSetRequestProcessor>(sub_zk_request));
                     break;
                 case Coordination::OpNum::Check:
+                case Coordination::OpNum::CheckNotExists:
                     check_operation_type(OperationType::Write);
                     concrete_requests.push_back(std::make_shared<KeeperStorageCheckRequestProcessor>(sub_zk_request));
                     break;
@@ -1971,6 +1995,7 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory()
     registerKeeperRequestProcessor<Coordination::OpNum::MultiRead, KeeperStorageMultiRequestProcessor>(*this);
     registerKeeperRequestProcessor<Coordination::OpNum::SetACL, KeeperStorageSetACLRequestProcessor>(*this);
     registerKeeperRequestProcessor<Coordination::OpNum::GetACL, KeeperStorageGetACLRequestProcessor>(*this);
+    registerKeeperRequestProcessor<Coordination::OpNum::CheckNotExists, KeeperStorageCheckRequestProcessor>(*this);
 }
 
 
diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h
index cfa3098b4a1..7db546bd4c8 100644
--- a/src/Coordination/SnapshotableHashTable.h
+++ b/src/Coordination/SnapshotableHashTable.h
@@ -333,10 +333,7 @@ public:
         }
     }
 
-    uint64_t keyArenaSize() const
-    {
-        return arena.size();
-    }
+    uint64_t keyArenaSize() const { return arena.allocatedBytes(); }
 
     iterator begin() { return list.begin(); }
     const_iterator begin() const { return list.cbegin(); }
diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp
index b1bea8ddf24..62217fb2dd3 100644
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@@ -2451,6 +2451,78 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize)
 
 }
 
+TEST_P(CoordinationTest, TestCheckNotExistsRequest)
+{
+    using namespace DB;
+    using namespace Coordination;
+
+    KeeperStorage storage{500, "", keeper_context};
+
+    int32_t zxid = 0;
+
+    const auto create_path = [&](const auto & path)
+    {
+        const auto create_request = std::make_shared<ZooKeeperCreateRequest>();
+        int new_zxid = ++zxid;
+        create_request->path = path;
+        storage.preprocessRequest(create_request, 1, 0, new_zxid);
+        auto responses = storage.processRequest(create_request, 1, new_zxid);
+
+        EXPECT_GE(responses.size(), 1);
+        EXPECT_EQ(responses[0].response->error, Coordination::Error::ZOK) << "Failed to create " << path;
+    };
+
+    const auto check_request = std::make_shared<ZooKeeperCheckRequest>();
+    check_request->path = "/test_node";
+    check_request->not_exists = true;
+
+    {
+        SCOPED_TRACE("CheckNotExists returns ZOK");
+        int new_zxid = ++zxid;
+        storage.preprocessRequest(check_request, 1, 0, new_zxid);
+        auto responses = storage.processRequest(check_request, 1, new_zxid);
+        EXPECT_GE(responses.size(), 1);
+        auto error = responses[0].response->error;
+        EXPECT_EQ(error, Coordination::Error::ZOK) << "CheckNotExists returned invalid result: " << errorMessage(error);
+    }
+
+    create_path("/test_node");
+    auto node_it = storage.container.find("/test_node");
+    ASSERT_NE(node_it, storage.container.end());
+    auto node_version = node_it->value.stat.version;
+
+    {
+        SCOPED_TRACE("CheckNotExists returns ZNODEEXISTS");
+        int new_zxid = ++zxid;
+        storage.preprocessRequest(check_request, 1, 0, new_zxid);
+        auto responses = storage.processRequest(check_request, 1, new_zxid);
+        EXPECT_GE(responses.size(), 1);
+        auto error = responses[0].response->error;
+        EXPECT_EQ(error, Coordination::Error::ZNODEEXISTS) << "CheckNotExists returned invalid result: " << errorMessage(error);
+    }
+
+    {
+        SCOPED_TRACE("CheckNotExists returns ZNODEEXISTS for same version");
+        int new_zxid = ++zxid;
+        check_request->version = node_version;
+        storage.preprocessRequest(check_request, 1, 0, new_zxid);
+        auto responses = storage.processRequest(check_request, 1, new_zxid);
+        EXPECT_GE(responses.size(), 1);
+        auto error = responses[0].response->error;
+        EXPECT_EQ(error, Coordination::Error::ZNODEEXISTS) << "CheckNotExists returned invalid result: " << errorMessage(error);
+    }
+
+    {
+        SCOPED_TRACE("CheckNotExists returns ZOK for different version");
+        int new_zxid = ++zxid;
+        check_request->version = node_version + 1;
+        storage.preprocessRequest(check_request, 1, 0, new_zxid);
+        auto responses = storage.processRequest(check_request, 1, new_zxid);
+        EXPECT_GE(responses.size(), 1);
+        auto error = responses[0].response->error;
+        EXPECT_EQ(error, Coordination::Error::ZOK) << "CheckNotExists returned invalid result: " << errorMessage(error);
+    }
+}
 
 INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite,
     CoordinationTest,
diff --git a/src/Core/Defines.h b/src/Core/Defines.h
index 3fae123fb6b..e9b84b71cae 100644
--- a/src/Core/Defines.h
+++ b/src/Core/Defines.h
@@ -29,11 +29,6 @@
 #define DEFAULT_INSERT_BLOCK_SIZE \
     1048449 /// 1048576 - PADDING_FOR_SIMD - (PADDING_FOR_SIMD - 1) bytes padding that we usually have in arrays
 
-/** The same, but for merge operations. Less DEFAULT_BLOCK_SIZE for saving RAM (since all the columns are read).
-  * Significantly less, since there are 10-way mergers.
-  */
-#define DEFAULT_MERGE_BLOCK_SIZE 8192
-
 #define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60
 #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160)
 #define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3
@@ -83,4 +78,3 @@
 #else
 #define QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS 0
 #endif
-
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index aabc89cc6d7..1ddc2ad0fa7 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -21,6 +21,8 @@ namespace DB
     M(UInt64, max_io_thread_pool_size, 100, "The maximum number of threads that would be used for IO operations", 0) \
     M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \
     M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \
+    M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The maximum number of threads that would be used for loading outdated data parts on startup", 0) \
+    M(UInt64, outdated_part_loading_thread_pool_queue_size, 10000, "Queue size for parts loading thread pool.", 0) \
     M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \
     M(UInt64, max_remote_write_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for write. Zero means unlimited.", 0) \
     M(UInt64, max_local_read_bandwidth_for_server, 0, "The maximum speed of local reads in bytes per second. Zero means unlimited.", 0) \
@@ -74,6 +76,7 @@ namespace DB
     M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
     M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
     M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
+    M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0)
 
 
 DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS)
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 8fd2af5fa23..baaf77d3e15 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -49,6 +49,8 @@ class IColumn;
     M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \
     M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \
     M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \
+    M(UInt64, max_read_buffer_size_local_fs, 128*1024, "The maximum size of the buffer to read from local filesystem. If set to 0 then max_read_buffer_size will be used.", 0) \
+    M(UInt64, max_read_buffer_size_remote_fs, 0, "The maximum size of the buffer to read from remote filesystem. If set to 0 then max_read_buffer_size will be used.", 0) \
     M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \
     M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "The maximum number of bytes of a query string parsed by the SQL parser. Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.", 0) \
     M(UInt64, interactive_delay, 100000, "The interval in microseconds to check if the request is cancelled, and to send progress info.", 0) \
@@ -71,6 +73,7 @@ class IColumn;
     M(UInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \
     M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \
     M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
+    M(UInt64, s3_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \
     M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
     M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \
     M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \
@@ -129,6 +132,7 @@ class IColumn;
     \
     M(Bool, allow_suspicious_low_cardinality_types, false, "In CREATE TABLE statement allows specifying LowCardinality modifier for types of small fixed size (8 or less). Enabling this may increase merge times and memory consumption.", 0) \
     M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \
+    M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \
     M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \
     M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \
     M(Bool, compile_aggregate_expressions, false, "Compile aggregate functions to native code. This feature has a bug and should not be used.", 0) \
@@ -648,7 +652,7 @@ class IColumn;
     M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \
     \
     M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
-    M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
+    M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
     M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \
     M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \
     M(UInt64, async_insert_max_data_size, 1000000, "Maximum size in bytes of unparsed data collected per query before being inserted", 0) \
@@ -661,7 +665,6 @@ class IColumn;
     M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \
     M(Bool, enable_filesystem_cache_log, false, "Allows to record the filesystem caching log for each query", 0) \
     M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, "Allow to use the filesystem cache in passive mode - benefit from the existing cache entries, but don't put more entries into the cache. If you set this setting for heavy ad-hoc queries and leave it disabled for short real-time queries, this will allows to avoid cache threshing by too heavy queries and to improve the overall system efficiency.", 0) \
-    M(Bool, enable_filesystem_cache_on_lower_level, true, "If read buffer supports caching inside threadpool, allow it to do it, otherwise cache outside ot threadpool. Do not use this setting, it is needed for testing", 0) \
     M(Bool, skip_download_if_exceeds_query_cache, true, "Skip download from remote filesystem if exceeds query cache size", 0) \
     M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \
     M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \
@@ -710,6 +713,8 @@ class IColumn;
     \
     M(String, workload, "default", "Name of workload to be used to access resources", 0) \
     \
+    M(Bool, parallelize_output_from_storages, false, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
+    \
     /** Experimental functions */ \
     M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
     M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
@@ -966,6 +971,7 @@ class IColumn;
     M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
     \
     M(Bool, regexp_dict_allow_other_sources, false, "Allow regexp_tree dictionary to use sources other than yaml source.", 0) \
+    M(Bool, format_display_secrets_in_show_and_select, false, "Do not hide secrets in SHOW and SELECT queries.", IMPORTANT) \
     M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
     \
     M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index eced3da2d59..266d14f645b 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -80,6 +80,7 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}}},
     {"23.4", {{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"},
               {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"},
               {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}}},
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 5f5cd2667cb..0c12b78738d 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -117,9 +117,14 @@ DatabaseReplicated::DatabaseReplicated(
         fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef());
 }
 
+String DatabaseReplicated::getFullReplicaName(const String & shard, const String & replica)
+{
+    return shard + '|' + replica;
+}
+
 String DatabaseReplicated::getFullReplicaName() const
 {
-    return shard_name + '|' + replica_name;
+    return getFullReplicaName(shard_name, replica_name);
 }
 
 std::pair<String, String> DatabaseReplicated::parseFullReplicaName(const String & name)
@@ -216,7 +221,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
     assert(!hosts.empty());
     assert(hosts.size() == host_ids.size());
     String current_shard = parseFullReplicaName(hosts.front()).first;
-    std::vector<Strings> shards;
+    std::vector<std::vector<DatabaseReplicaInfo>> shards;
     shards.emplace_back();
     for (size_t i = 0; i < hosts.size(); ++i)
     {
@@ -232,25 +237,61 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
             if (!shards.back().empty())
                 shards.emplace_back();
         }
-        shards.back().emplace_back(unescapeForFileName(host_port));
+        String hostname = unescapeForFileName(host_port);
+        shards.back().push_back(DatabaseReplicaInfo{std::move(hostname), std::move(shard), std::move(replica)});
     }
 
     UInt16 default_port = getContext()->getTCPPort();
 
     bool treat_local_as_remote = false;
     bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
-    return std::make_shared<Cluster>(
-        getContext()->getSettingsRef(),
-        shards,
+    ClusterConnectionParameters params{
         cluster_auth_info.cluster_username,
         cluster_auth_info.cluster_password,
         default_port,
         treat_local_as_remote,
         treat_local_port_as_remote,
         cluster_auth_info.cluster_secure_connection,
-        /*priority=*/1,
+        /*priority=*/ 1,
         TSA_SUPPRESS_WARNING_FOR_READ(database_name),     /// FIXME
-        cluster_auth_info.cluster_secret);
+        cluster_auth_info.cluster_secret};
+
+    return std::make_shared<Cluster>(getContext()->getSettingsRef(), shards, params);
+}
+
+std::vector<UInt8> DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr & cluster_) const
+{
+    Strings paths;
+    const auto & addresses_with_failover = cluster->getShardsAddresses();
+    const auto & shards_info = cluster_->getShardsInfo();
+    for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
+    {
+        for (const auto & replica : addresses_with_failover[shard_index])
+        {
+            String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name);
+            paths.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "active");
+        }
+    }
+
+    try
+    {
+        auto current_zookeeper = getZooKeeper();
+        auto res = current_zookeeper->exists(paths);
+
+        std::vector<UInt8> statuses;
+        statuses.resize(paths.size());
+
+        for (size_t i = 0; i < res.size(); ++i)
+            if (res[i].error == Coordination::Error::ZOK)
+                statuses[i] = 1;
+
+        return statuses;
+    }
+    catch (...)
+    {
+        tryLogCurrentException(log);
+        return {};
+    }
 }
 
 
@@ -1043,12 +1084,14 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node
 }
 
 void DatabaseReplicated::dropReplica(
-    DatabaseReplicated * database, const String & database_zookeeper_path, const String & full_replica_name)
+    DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica)
 {
     assert(!database || database_zookeeper_path == database->zookeeper_path);
 
+    String full_replica_name = shard.empty() ? replica : getFullReplicaName(shard, replica);
+
     if (full_replica_name.find('/') != std::string::npos)
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid replica name: {}", full_replica_name);
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid replica name, '/' is not allowed: {}", full_replica_name);
 
     auto zookeeper = Context::getGlobalContextInstance()->getZooKeeper();
 
diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h
index 6a897f7322a..b3397a832f2 100644
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@@ -55,6 +55,7 @@ public:
     String getShardName() const { return shard_name; }
     String getReplicaName() const { return replica_name; }
     String getFullReplicaName() const;
+    static String getFullReplicaName(const String & shard, const String & replica);
     static std::pair<String, String> parseFullReplicaName(const String & name);
 
     const String & getZooKeeperPath() const { return zookeeper_path; }
@@ -77,7 +78,9 @@ public:
 
     bool shouldReplicateQuery(const ContextPtr & query_context, const ASTPtr & query_ptr) const override;
 
-    static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & full_replica_name);
+    static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica);
+
+    std::vector<UInt8> tryGetAreReplicasActive(const ClusterPtr & cluster_) const;
 
     friend struct DatabaseReplicatedTask;
     friend class DatabaseReplicatedDDLWorker;
diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
index 8c267ea929b..4b42d799661 100644
--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
@@ -17,6 +17,7 @@
 #include <Databases/PostgreSQL/fetchPostgreSQLTableStructure.h>
 #include <Common/quoteString.h>
 #include <Common/filesystemHelpers.h>
+#include <Common/logger_useful.h>
 #include <filesystem>
 
 namespace fs = std::filesystem;
@@ -51,6 +52,7 @@ DatabasePostgreSQL::DatabasePostgreSQL(
     , configuration(configuration_)
     , pool(std::move(pool_))
     , cache_tables(cache_tables_)
+    , log(&Poco::Logger::get("DatabasePostgreSQL(" + dbname_ + ")"))
 {
     cleaner_task = getContext()->getSchedulePool().createTask("PostgreSQLCleanerTask", [this]{ removeOutdatedTables(); });
     cleaner_task->deactivate();
@@ -192,7 +194,10 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr,
                 ColumnsDescription{columns_info->columns}, ConstraintsDescription{}, String{}, configuration.schema, configuration.on_conflict);
 
         if (cache_tables)
+        {
+            LOG_TEST(log, "Cached table `{}`", table_name);
             cached_tables[table_name] = storage;
+        }
 
         return storage;
     }
diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h
index 08583f4b6d9..31fa036c0ee 100644
--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h
@@ -73,6 +73,7 @@ private:
     mutable Tables cached_tables;
     std::unordered_set<std::string> detached_or_dropped;
     BackgroundSchedulePool::TaskHolder cleaner_task;
+    Poco::Logger * log;
 
     String getTableNameForLogs(const String & table_name) const;
 
diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h
index 5b52fbde00d..ba17cebebba 100644
--- a/src/Dictionaries/CacheDictionaryStorage.h
+++ b/src/Dictionaries/CacheDictionaryStorage.h
@@ -157,7 +157,7 @@ public:
             });
         }
 
-        return arena.size() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes;
+        return arena.allocatedBytes() + sizeof(Cell) * configuration.max_size_in_cells + attributes_size_in_bytes;
     }
 
 private:
diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
index 4fc080f2960..1de7be0bf4f 100644
--- a/src/Dictionaries/DictionaryHelpers.h
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <Common/Arena.h>
 #include <Common/HashTable/HashMap.h>
 #include <Columns/IColumn.h>
 #include <Columns/ColumnDecimal.h>
@@ -29,6 +28,8 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
+class Arena;
+
 /** Simple helper for getting default.
   * Initialized with default value and default values column.
   * If default values column is not null default value is taken from column.
diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp
index cc345b97abe..d3699a150c4 100644
--- a/src/Dictionaries/FlatDictionary.cpp
+++ b/src/Dictionaries/FlatDictionary.cpp
@@ -505,7 +505,7 @@ void FlatDictionary::calculateBytesAllocated()
         bytes_allocated += hierarchical_index_bytes_allocated;
     }
 
-    bytes_allocated += string_arena.size();
+    bytes_allocated += string_arena.allocatedBytes();
 }
 
 FlatDictionary::Attribute FlatDictionary::createAttribute(const DictionaryAttribute & dictionary_attribute)
diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp
index 9e6ce0597cb..880f68cea95 100644
--- a/src/Dictionaries/HashedArrayDictionary.cpp
+++ b/src/Dictionaries/HashedArrayDictionary.cpp
@@ -797,7 +797,7 @@ void HashedArrayDictionary<dictionary_key_type>::calculateBytesAllocated()
         bytes_allocated += hierarchical_index_bytes_allocated;
     }
 
-    bytes_allocated += string_arena.size();
+    bytes_allocated += string_arena.allocatedBytes();
 }
 
 template <DictionaryKeyType dictionary_key_type>
diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp
index 805b4f58ae6..191adab8983 100644
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@@ -761,6 +761,9 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::blockToAttributes(c
         auto & attribute = attributes[attribute_index];
         bool attribute_is_nullable = attribute.is_nullable_sets.has_value();
 
+        /// Number of elements should not take into account multiple attributes.
+        new_element_count = 0;
+
         getAttributeContainers(attribute_index, [&](auto & containers)
         {
             using ContainerType = std::decay_t<decltype(containers.front())>;
@@ -957,6 +960,15 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
 
     for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
     {
+        /// bucket_count should be a sum over all shards (CollectionsHolder),
+        /// but it should not be a sum over all attributes, since it is used to
+        /// calculate load_factor like this:
+        ///
+        ///    element_count / bucket_count
+        ///
+        /// While element_count is a sum over all shards, not over all attributes.
+        bucket_count = 0;
+
         getAttributeContainers(attribute_index, [&](const auto & containers)
         {
             for (const auto & container : containers)
@@ -973,12 +985,12 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
                     /// and since this is sparsehash, empty cells should not be significant,
                     /// and since items cannot be removed from the dictionary, deleted is also not important.
                     bytes_allocated += container.size() * (sizeof(KeyType) + sizeof(AttributeValueType));
-                    bucket_count = container.bucket_count();
+                    bucket_count += container.bucket_count();
                 }
                 else
                 {
                     bytes_allocated += container.getBufferSizeInBytes();
-                    bucket_count = container.getBufferSizeInCells();
+                    bucket_count += container.getBufferSizeInCells();
                 }
             }
         });
@@ -1002,12 +1014,12 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
             if constexpr (sparse)
             {
                 bytes_allocated += container.size() * (sizeof(KeyType));
-                bucket_count = container.bucket_count();
+                bucket_count += container.bucket_count();
             }
             else
             {
                 bytes_allocated += container.getBufferSizeInBytes();
-                bucket_count = container.getBufferSizeInCells();
+                bucket_count += container.getBufferSizeInCells();
             }
         }
     }
@@ -1022,7 +1034,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
     }
 
     for (const auto & arena : string_arenas)
-        bytes_allocated += arena->size();
+        bytes_allocated += arena->allocatedBytes();
 }
 
 template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index ff1c784750b..6bb06de7506 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -541,7 +541,7 @@ template <>
 void IPAddressDictionary::addAttributeSize<String>(const Attribute & attribute)
 {
     addAttributeSize<StringRef>(attribute);
-    bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
+    bytes_allocated += sizeof(Arena) + attribute.string_arena->allocatedBytes();
 }
 
 void IPAddressDictionary::calculateBytesAllocated()
diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h
index 67827c6524e..40dc5dd6782 100644
--- a/src/Dictionaries/IPAddressDictionary.h
+++ b/src/Dictionaries/IPAddressDictionary.h
@@ -5,7 +5,6 @@
 #include <variant>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
-#include <Common/Arena.h>
 #include <Common/HashTable/HashMap.h>
 #include <Columns/ColumnFixedString.h>
 #include <Columns/ColumnVector.h>
@@ -18,6 +17,8 @@
 
 namespace DB
 {
+class Arena;
+
 class IPAddressDictionary final : public IDictionary
 {
 public:
diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp
index 922e1e71bbb..b7e342f3c80 100644
--- a/src/Dictionaries/MongoDBDictionarySource.cpp
+++ b/src/Dictionaries/MongoDBDictionarySource.cpp
@@ -3,13 +3,13 @@
 #include "DictionaryStructure.h"
 #include "registerDictionaries.h"
 #include <Storages/ExternalDataSourceConfiguration.h>
-
+#include <Storages/StorageMongoDBSocketFactory.h>
 
 namespace DB
 {
 
 static const std::unordered_set<std::string_view> dictionary_allowed_keys = {
-    "host", "port", "user", "password", "db", "database", "uri", "collection", "name", "method"};
+    "host", "port", "user", "password", "db", "database", "uri", "collection", "name", "method", "options"};
 
 void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
 {
@@ -51,6 +51,7 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
             config.getString(config_prefix + ".method", ""),
             configuration.database,
             config.getString(config_prefix + ".collection"),
+            config.getString(config_prefix + ".options", ""),
             sample_block);
     };
 
@@ -98,6 +99,7 @@ MongoDBDictionarySource::MongoDBDictionarySource(
     const std::string & method_,
     const std::string & db_,
     const std::string & collection_,
+    const std::string & options_,
     const Block & sample_block_)
     : dict_struct{dict_struct_}
     , uri{uri_}
@@ -108,13 +110,15 @@ MongoDBDictionarySource::MongoDBDictionarySource(
     , method{method_}
     , db{db_}
     , collection{collection_}
+    , options(options_)
     , sample_block{sample_block_}
     , connection{std::make_shared<Poco::MongoDB::Connection>()}
 {
+
+    StorageMongoDBSocketFactory socket_factory;
     if (!uri.empty())
     {
         // Connect with URI.
-        Poco::MongoDB::Connection::SocketFactory socket_factory;
         connection->connect(uri, socket_factory);
 
         Poco::URI poco_uri(connection->uri());
@@ -140,8 +144,10 @@ MongoDBDictionarySource::MongoDBDictionarySource(
     }
     else
     {
-        // Connect with host/port/user/etc.
-        connection->connect(host, port);
+        // Connect with host/port/user/etc through constructing the uri
+        std::string uri_constructed("mongodb://" + host + ":" + std::to_string(port) + "/" + db + (options.empty() ? "" : "?" + options));
+        connection->connect(uri_constructed, socket_factory);
+
         if (!user.empty())
         {
             Poco::MongoDB::Database poco_db(db);
@@ -154,7 +160,9 @@ MongoDBDictionarySource::MongoDBDictionarySource(
 
 MongoDBDictionarySource::MongoDBDictionarySource(const MongoDBDictionarySource & other)
     : MongoDBDictionarySource{
-        other.dict_struct, other.uri, other.host, other.port, other.user, other.password, other.method, other.db, other.collection, other.sample_block}
+        other.dict_struct, other.uri, other.host, other.port, other.user, other.password, other.method, other.db,
+        other.collection, other.options, other.sample_block
+    }
 {
 }
 
diff --git a/src/Dictionaries/MongoDBDictionarySource.h b/src/Dictionaries/MongoDBDictionarySource.h
index 4c7ae649f09..fefcb1bff9f 100644
--- a/src/Dictionaries/MongoDBDictionarySource.h
+++ b/src/Dictionaries/MongoDBDictionarySource.h
@@ -41,6 +41,7 @@ public:
         const std::string & method_,
         const std::string & db_,
         const std::string & collection_,
+        const std::string & options,
         const Block & sample_block_);
 
     MongoDBDictionarySource(const MongoDBDictionarySource & other);
@@ -80,6 +81,7 @@ private:
     const std::string method;
     std::string db;
     const std::string collection;
+    const std::string options;
     Block sample_block;
 
     std::shared_ptr<Poco::MongoDB::Connection> connection;
diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h
index e3ec5d32f3f..e1c2168cea3 100644
--- a/src/Dictionaries/RangeHashedDictionary.h
+++ b/src/Dictionaries/RangeHashedDictionary.h
@@ -726,7 +726,7 @@ void RangeHashedDictionary<dictionary_key_type>::calculateBytesAllocated()
     if (update_field_loaded_block)
         bytes_allocated += update_field_loaded_block->allocatedBytes();
 
-    bytes_allocated += string_arena.size();
+    bytes_allocated += string_arena.allocatedBytes();
 }
 
 template <DictionaryKeyType dictionary_key_type>
diff --git a/src/Dictionaries/RegExpTreeDictionary.h b/src/Dictionaries/RegExpTreeDictionary.h
index 17a0c6bbef3..4e8e20bba2d 100644
--- a/src/Dictionaries/RegExpTreeDictionary.h
+++ b/src/Dictionaries/RegExpTreeDictionary.h
@@ -10,7 +10,6 @@
 
 #include <Columns/IColumn.h>
 #include <Columns/ColumnString.h>
-#include <Common/Arena.h>
 #include <Common/Exception.h>
 #include <Common/HashTable/Hash.h>
 #include <Common/HashTable/HashSet.h>
diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index 49f28a19b31..69b70da272a 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -9,9 +9,7 @@
 #include <Common/quoteString.h>
 #include <Common/atomicRename.h>
 #include <Disks/IO/createReadBufferFromFileBase.h>
-#include <Disks/ObjectStorages/LocalObjectStorage.h>
-#include <Disks/ObjectStorages/DiskObjectStorage.h>
-#include <Disks/ObjectStorages/FakeMetadataStorageFromDisk.h>
+#include <Disks/loadLocalDiskConfig.h>
 #include <Disks/TemporaryFileOnDisk.h>
 
 #include <fstream>
@@ -39,7 +37,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int UNKNOWN_ELEMENT_IN_CONFIG;
-    extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
     extern const int PATH_ACCESS_DENIED;
     extern const int LOGICAL_ERROR;
     extern const int CANNOT_TRUNCATE_FILE;
@@ -54,53 +51,6 @@ std::mutex DiskLocal::reservation_mutex;
 
 using DiskLocalPtr = std::shared_ptr<DiskLocal>;
 
-static void loadDiskLocalConfig(const String & name,
-                      const Poco::Util::AbstractConfiguration & config,
-                      const String & config_prefix,
-                      ContextPtr context,
-                      String & path,
-                      UInt64 & keep_free_space_bytes)
-{
-    path = config.getString(config_prefix + ".path", "");
-    if (name == "default")
-    {
-        if (!path.empty())
-            throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG,
-                "\"default\" disk path should be provided in <path> not it <storage_configuration>");
-        path = context->getPath();
-    }
-    else
-    {
-        if (path.empty())
-            throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Disk path can not be empty. Disk {}", name);
-        if (path.back() != '/')
-            throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Disk path must end with /. Disk {}", name);
-        if (path == context->getPath())
-            throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Disk path ('{}') cannot be equal to <path>. Use <default> disk instead.", path);
-    }
-
-    bool has_space_ratio = config.has(config_prefix + ".keep_free_space_ratio");
-
-    if (config.has(config_prefix + ".keep_free_space_bytes") && has_space_ratio)
-        throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG,
-                        "Only one of 'keep_free_space_bytes' and 'keep_free_space_ratio' can be specified");
-
-    keep_free_space_bytes = config.getUInt64(config_prefix + ".keep_free_space_bytes", 0);
-
-    if (has_space_ratio)
-    {
-        auto ratio = config.getDouble(config_prefix + ".keep_free_space_ratio");
-        if (ratio < 0 || ratio > 1)
-            throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "'keep_free_space_ratio' have to be between 0 and 1");
-        String tmp_path = path;
-        if (tmp_path.empty())
-            tmp_path = context->getPath();
-
-        // Create tmp disk for getting total disk space.
-        keep_free_space_bytes = static_cast<UInt64>(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio);
-    }
-}
-
 std::optional<size_t> fileSizeSafe(const fs::path & path)
 {
     std::error_code ec;
@@ -604,25 +554,6 @@ catch (...)
     return false;
 }
 
-DiskObjectStoragePtr DiskLocal::createDiskObjectStorage()
-{
-    auto object_storage = std::make_shared<LocalObjectStorage>();
-    auto metadata_storage = std::make_shared<FakeMetadataStorageFromDisk>(
-        /* metadata_storage */std::static_pointer_cast<DiskLocal>(shared_from_this()),
-        object_storage,
-        /* object_storage_root_path */getPath());
-
-    return std::make_shared<DiskObjectStorage>(
-        getName(),
-        disk_path,
-        "Local",
-        metadata_storage,
-        object_storage,
-        false,
-        /* threadpool_size */16
-    );
-}
-
 void DiskLocal::checkAccessImpl(const String & path)
 {
     try
@@ -750,13 +681,6 @@ void DiskLocal::chmod(const String & path, mode_t mode)
     DB::throwFromErrnoWithPath("Cannot chmod file: " + path, path, DB::ErrorCodes::PATH_ACCESS_DENIED);
 }
 
-MetadataStoragePtr DiskLocal::getMetadataStorage()
-{
-    auto object_storage = std::make_shared<LocalObjectStorage>();
-    return std::make_shared<FakeMetadataStorageFromDisk>(
-        std::static_pointer_cast<IDisk>(shared_from_this()), object_storage, getPath());
-}
-
 void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check)
 {
     auto creator = [global_skip_access_check](
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index 7ea2c04704c..3112198aab3 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -121,16 +121,12 @@ public:
     bool canRead() const noexcept;
     bool canWrite() noexcept;
 
-    DiskObjectStoragePtr createDiskObjectStorage() override;
-
     bool supportsStat() const override { return true; }
     struct stat stat(const String & path) const override;
 
     bool supportsChmod() const override { return true; }
     void chmod(const String & path, mode_t mode) override;
 
-    MetadataStoragePtr getMetadataStorage() override;
-
 protected:
     void checkAccessImpl(const String & path) override;
 
diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h
index 4d099e33a7a..840ed5549e6 100644
--- a/src/Disks/DiskType.h
+++ b/src/Disks/DiskType.h
@@ -15,6 +15,7 @@ enum class DataSourceType
     HDFS,
     WebServer,
     AzureBlobStorage,
+    LocalBlobStorage,
 };
 
 inline String toString(DataSourceType data_source_type)
@@ -35,6 +36,8 @@ inline String toString(DataSourceType data_source_type)
             return "web";
         case DataSourceType::AzureBlobStorage:
             return "azure_blob_storage";
+        case DataSourceType::LocalBlobStorage:
+            return "local_blob_storage";
     }
     UNREACHABLE();
 }
diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp
index 4969cc7c700..22197760d88 100644
--- a/src/Disks/IDisk.cpp
+++ b/src/Disks/IDisk.cpp
@@ -7,9 +7,6 @@
 #include <Common/logger_useful.h>
 #include <Common/setThreadName.h>
 #include <Core/ServerUUID.h>
-#include <Disks/ObjectStorages/MetadataStorageFromDisk.h>
-#include <Disks/ObjectStorages/FakeMetadataStorageFromDisk.h>
-#include <Disks/ObjectStorages/LocalObjectStorage.h>
 #include <Disks/FakeDiskTransaction.h>
 
 namespace DB
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 68798047cfd..b0a57afe169 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -367,7 +367,13 @@ public:
     /// Actually it's a part of IDiskRemote implementation but we have so
     /// complex hierarchy of disks (with decorators), so we cannot even
     /// dynamic_cast some pointer to IDisk to pointer to IDiskRemote.
-    virtual MetadataStoragePtr getMetadataStorage() = 0;
+    virtual MetadataStoragePtr getMetadataStorage()
+    {
+        throw Exception(
+            ErrorCodes::NOT_IMPLEMENTED,
+            "Method getMetadataStorage() is not implemented for disk type: {}",
+            toString(getDataSourceDescription().type));
+    }
 
     /// Very similar case as for getMetadataDiskIfExistsOrSelf(). If disk has "metadata"
     /// it will return mapping for each required path: path -> metadata as string.
diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
index 9448bbaf798..998b9ce0959 100644
--- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
+++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp
@@ -175,7 +175,7 @@ void AsynchronousReadIndirectBufferFromRemoteFS::appendToPrefetchLog(FilesystemP
     {
         .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
         .query_id = query_id,
-        .path = object.getMappedPath(),
+        .path = object.local_path,
         .offset = file_offset_of_buffer_end,
         .size = size,
         .prefetch_submit_time = last_prefetch_info.submit_time,
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
index 68efd3f5d78..73a58611a17 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@@ -1,9 +1,11 @@
 #include "CachedOnDiskReadBufferFromFile.h"
 
 #include <Disks/IO/createReadBufferFromFileBase.h>
+#include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
 #include <IO/ReadBufferFromFile.h>
 #include <base/scope_guard.h>
 #include <Common/assert_cast.h>
+#include <IO/BoundedReadBuffer.h>
 #include <Common/getRandomASCIIString.h>
 #include <Common/logger_useful.h>
 #include <base/hex.h>
@@ -32,7 +34,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int CANNOT_SEEK_THROUGH_FILE;
-    extern const int CANNOT_USE_CACHE;
     extern const int LOGICAL_ERROR;
     extern const int ARGUMENT_OUT_OF_BOUND;
 }
@@ -115,27 +116,25 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
 
     if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
     {
-        file_segments_holder.emplace(cache->get(cache_key, offset, size));
+        file_segments = cache->get(cache_key, offset, size);
     }
     else
     {
         CreateFileSegmentSettings create_settings(is_persistent ? FileSegmentKind::Persistent : FileSegmentKind::Regular);
-        file_segments_holder.emplace(cache->getOrSet(cache_key, offset, size, create_settings));
+        file_segments = cache->getOrSet(cache_key, offset, size, create_settings);
     }
 
     /**
      * Segments in returned list are ordered in ascending order and represent a full contiguous
      * interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY.
      */
-    if (file_segments_holder->file_segments.empty())
+    if (file_segments->empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "List of file segments cannot be empty");
 
     LOG_TEST(
         log,
         "Having {} file segments to read: {}, current offset: {}",
-        file_segments_holder->file_segments.size(), file_segments_holder->toString(), file_offset_of_buffer_end);
-
-    current_file_segment_it = file_segments_holder->file_segments.begin();
+        file_segments->size(), file_segments->toString(), file_offset_of_buffer_end);
 
     initialized = true;
 }
@@ -165,7 +164,7 @@ CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segm
 }
 
 CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
-CachedOnDiskReadBufferFromFile::getRemoteFSReadBuffer(FileSegment & file_segment, ReadType read_type_)
+CachedOnDiskReadBufferFromFile::getRemoteReadBuffer(FileSegment & file_segment, ReadType read_type_)
 {
     switch (read_type_)
     {
@@ -191,18 +190,17 @@ CachedOnDiskReadBufferFromFile::getRemoteFSReadBuffer(FileSegment & file_segment
 
             if (!remote_fs_segment_reader)
             {
-                remote_fs_segment_reader = implementation_buffer_creator();
-
-                if (!remote_fs_segment_reader->supportsRightBoundedReads())
-                    throw Exception(
-                        ErrorCodes::CANNOT_USE_CACHE,
-                        "Cache cannot be used with a ReadBuffer which does not support right bounded reads");
+                auto impl = implementation_buffer_creator();
+                if (impl->supportsRightBoundedReads())
+                    remote_fs_segment_reader = std::move(impl);
+                else
+                    remote_fs_segment_reader = std::make_unique<BoundedReadBuffer>(std::move(impl));
 
                 file_segment.setRemoteFileReader(remote_fs_segment_reader);
             }
             else
             {
-                chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset());
+                chassert(remote_fs_segment_reader->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false));
             }
 
             return remote_fs_segment_reader;
@@ -239,27 +237,27 @@ bool CachedOnDiskReadBufferFromFile::canStartFromCache(size_t current_offset, co
     /// requested_range:    [__________]
     ///                     ^
     ///                     current_offset
-    size_t first_non_downloaded_offset = file_segment.getFirstNonDownloadedOffset();
+    size_t first_non_downloaded_offset = file_segment.getFirstNonDownloadedOffset(true);
     return first_non_downloaded_offset > current_offset;
 }
 
 CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
-CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & file_segment)
+CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_segment)
 {
-    auto download_state = file_segment->state();
+    auto download_state = file_segment.state();
 
     if (settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
     {
         if (download_state == FileSegment::State::DOWNLOADED)
         {
             read_type = ReadType::CACHED;
-            return getCacheReadBuffer(*file_segment);
+            return getCacheReadBuffer(file_segment);
         }
         else
         {
             LOG_TEST(log, "Bypassing cache because `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` option is used");
             read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
-            return getRemoteFSReadBuffer(*file_segment, read_type);
+            return getRemoteReadBuffer(file_segment, read_type);
         }
     }
 
@@ -267,15 +265,15 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
     {
         switch (download_state)
         {
-            case FileSegment::State::SKIP_CACHE:
+            case FileSegment::State::DETACHED:
             {
-                LOG_TRACE(log, "Bypassing cache because file segment state is `SKIP_CACHE`");
+                LOG_TRACE(log, "Bypassing cache because file segment state is `DETACHED`");
                 read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
-                return getRemoteFSReadBuffer(*file_segment, read_type);
+                return getRemoteReadBuffer(file_segment, read_type);
             }
             case FileSegment::State::DOWNLOADING:
             {
-                if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
+                if (canStartFromCache(file_offset_of_buffer_end, file_segment))
                 {
                     ///                      segment{k} state: DOWNLOADING
                     /// cache:           [______|___________
@@ -286,21 +284,21 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
                     ///                     file_offset_of_buffer_end
 
                     read_type = ReadType::CACHED;
-                    return getCacheReadBuffer(*file_segment);
+                    return getCacheReadBuffer(file_segment);
                 }
 
-                download_state = file_segment->wait();
+                download_state = file_segment.wait(file_offset_of_buffer_end);
                 continue;
             }
             case FileSegment::State::DOWNLOADED:
             {
                 read_type = ReadType::CACHED;
-                return getCacheReadBuffer(*file_segment);
+                return getCacheReadBuffer(file_segment);
             }
             case FileSegment::State::EMPTY:
             case FileSegment::State::PARTIALLY_DOWNLOADED:
             {
-                if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
+                if (canStartFromCache(file_offset_of_buffer_end, file_segment))
                 {
                     ///                      segment{k} state: PARTIALLY_DOWNLOADED
                     /// cache:           [______|___________
@@ -311,13 +309,13 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
                     ///                     file_offset_of_buffer_end
 
                     read_type = ReadType::CACHED;
-                    return getCacheReadBuffer(*file_segment);
+                    return getCacheReadBuffer(file_segment);
                 }
 
-                auto downloader_id = file_segment->getOrSetDownloader();
-                if (downloader_id == file_segment->getCallerId())
+                auto downloader_id = file_segment.getOrSetDownloader();
+                if (downloader_id == file_segment.getCallerId())
                 {
-                    if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
+                    if (canStartFromCache(file_offset_of_buffer_end, file_segment))
                     {
                         ///                      segment{k}
                         /// cache:           [______|___________
@@ -328,11 +326,12 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
                         ///                     file_offset_of_buffer_end
 
                         read_type = ReadType::CACHED;
-                        file_segment->resetDownloader();
-                        return getCacheReadBuffer(*file_segment);
+                        file_segment.resetDownloader();
+                        return getCacheReadBuffer(file_segment);
                     }
 
-                    if (file_segment->getCurrentWriteOffset() < file_offset_of_buffer_end)
+                    auto current_write_offset = file_segment.getCurrentWriteOffset(false);
+                    if (current_write_offset < file_offset_of_buffer_end)
                     {
                         ///                   segment{1}
                         /// cache:         [_____|___________
@@ -342,25 +341,25 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
                         ///                           ^
                         ///                           file_offset_of_buffer_end
 
-                        LOG_TEST(log, "Predownload. File segment info: {}", file_segment->getInfoForLog());
-                        chassert(file_offset_of_buffer_end > file_segment->getCurrentWriteOffset());
-                        bytes_to_predownload = file_offset_of_buffer_end - file_segment->getCurrentWriteOffset();
-                        chassert(bytes_to_predownload < file_segment->range().size());
+                        LOG_TEST(log, "Predownload. File segment info: {}", file_segment.getInfoForLog());
+                        chassert(file_offset_of_buffer_end > current_write_offset);
+                        bytes_to_predownload = file_offset_of_buffer_end - current_write_offset;
+                        chassert(bytes_to_predownload < file_segment.range().size());
                     }
 
                     read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
-                    return getRemoteFSReadBuffer(*file_segment, read_type);
+                    return getRemoteReadBuffer(file_segment, read_type);
                 }
 
-                download_state = file_segment->state();
+                download_state = file_segment.state();
                 continue;
             }
             case FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION:
             {
-                if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
+                if (canStartFromCache(file_offset_of_buffer_end, file_segment))
                 {
                     read_type = ReadType::CACHED;
-                    return getCacheReadBuffer(*file_segment);
+                    return getCacheReadBuffer(file_segment);
                 }
                 else
                 {
@@ -368,7 +367,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
                         log,
                         "Bypassing cache because file segment state is `PARTIALLY_DOWNLOADED_NO_CONTINUATION` and downloaded part already used");
                     read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
-                    return getRemoteFSReadBuffer(*file_segment, read_type);
+                    return getRemoteReadBuffer(file_segment, read_type);
                 }
             }
         }
@@ -376,12 +375,12 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
 }
 
 CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
-CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_segment)
+CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegment & file_segment)
 {
-    chassert(!file_segment->isDownloader());
-    chassert(file_offset_of_buffer_end >= file_segment->range().left);
+    chassert(!file_segment.isDownloader());
+    chassert(file_offset_of_buffer_end >= file_segment.range().left);
 
-    auto range = file_segment->range();
+    auto range = file_segment.range();
     bytes_to_predownload = 0;
 
     Stopwatch watch(CLOCK_MONOTONIC);
@@ -393,17 +392,18 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
         ProfileEvents::FileSegmentWaitReadBufferMicroseconds, watch.elapsedMicroseconds());
 
     [[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
-    chassert(download_current_segment == file_segment->isDownloader());
+    chassert(download_current_segment == file_segment.isDownloader());
 
-    chassert(file_segment->range() == range);
+    chassert(file_segment.range() == range);
     chassert(file_offset_of_buffer_end >= range.left && file_offset_of_buffer_end <= range.right);
 
     LOG_TEST(
         log,
-        "Current file segment: {}, read type: {}, current file offset: {}",
-        range.toString(),
+        "Current read type: {}, read offset: {}, impl read range: {}, file segment: {}",
         toString(read_type),
-        file_offset_of_buffer_end);
+        file_offset_of_buffer_end,
+        read_buffer_for_file_segment->getFileOffsetOfBufferEnd(),
+        file_segment.getInfoForLog());
 
     read_buffer_for_file_segment->setReadUntilPosition(range.right + 1); /// [..., range.right]
 
@@ -445,11 +445,11 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
         }
         case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
         {
-            chassert(file_segment->isDownloader());
+            chassert(file_segment.isDownloader());
 
             if (bytes_to_predownload)
             {
-                size_t current_write_offset = file_segment->getCurrentWriteOffset();
+                const size_t current_write_offset = file_segment.getCurrentWriteOffset(false);
                 read_buffer_for_file_segment->seek(current_write_offset, SEEK_SET);
             }
             else
@@ -459,7 +459,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
                 assert(read_buffer_for_file_segment->getFileOffsetOfBufferEnd() == file_offset_of_buffer_end);
             }
 
-            auto current_write_offset = file_segment->getCurrentWriteOffset();
+            const auto current_write_offset = file_segment.getCurrentWriteOffset(false);
             if (current_write_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition()))
             {
                 throw Exception(
@@ -470,7 +470,7 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
                     current_write_offset,
                     read_buffer_for_file_segment->getPosition(),
                     read_buffer_for_file_segment->getFileOffsetOfBufferEnd(),
-                    file_segment->getInfoForLog());
+                    file_segment.getInfoForLog());
             }
 
             break;
@@ -484,52 +484,41 @@ CachedOnDiskReadBufferFromFile::getImplementationBuffer(FileSegmentPtr & file_se
 
 bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext()
 {
-    LOG_TEST(log, "Completed segment: {}", (*current_file_segment_it)->range().toString());
+    auto * current_file_segment = &file_segments->front();
+    auto completed_range = current_file_segment->range();
 
     if (enable_logging)
-        appendFilesystemCacheLog((*current_file_segment_it)->range(), read_type);
+        appendFilesystemCacheLog(completed_range, read_type);
 
-    auto file_segment_it = current_file_segment_it++;
-    auto & file_segment = *file_segment_it;
+    chassert(file_offset_of_buffer_end > completed_range.right);
 
-    [[maybe_unused]] const auto & range = file_segment->range();
-    chassert(file_offset_of_buffer_end > range.right);
-
-    LOG_TEST(
-        log,
-        "Removing file segment: {}, downloader: {}, state: {}",
-        file_segment->range().toString(),
-        file_segment->getDownloader(),
-        file_segment->state());
-
-    /// Do not hold pointer to file segment if it is not needed anymore
-    /// so can become releasable and can be evicted from cache.
-    file_segment->completeWithoutState();
-    file_segments_holder->file_segments.erase(file_segment_it);
-
-    if (current_file_segment_it == file_segments_holder->file_segments.end())
+    file_segments->popFront();
+    if (file_segments->empty())
         return false;
 
-    implementation_buffer = getImplementationBuffer(*current_file_segment_it);
+    current_file_segment = &file_segments->front();
+    current_file_segment->use();
+    implementation_buffer = getImplementationBuffer(*current_file_segment);
 
     if (read_type == ReadType::CACHED)
-        (*current_file_segment_it)->incrementHitsCount();
+        current_file_segment->incrementHitsCount();
+
+    LOG_TEST(
+        log, "New segment range: {}, old range: {}",
+        current_file_segment->range().toString(), completed_range.toString());
 
-    LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString());
     return true;
 }
 
 CachedOnDiskReadBufferFromFile::~CachedOnDiskReadBufferFromFile()
 {
-    if (enable_logging
-        && file_segments_holder
-        && current_file_segment_it != file_segments_holder->file_segments.end())
+    if (enable_logging && file_segments && !file_segments->empty())
     {
-        appendFilesystemCacheLog((*current_file_segment_it)->range(), read_type);
+        appendFilesystemCacheLog(file_segments->front().range(), read_type);
     }
 }
 
-void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
+void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
 {
     Stopwatch predownload_watch(CLOCK_MONOTONIC);
     SCOPE_EXIT({
@@ -548,9 +537,10 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
         /// download from offset a'' < a', but return buffer from offset a'.
         LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId());
 
-        chassert(static_cast<size_t>(implementation_buffer->getPosition()) == file_segment->getCurrentWriteOffset());
-        size_t current_offset = file_segment->getCurrentWriteOffset();
-        const auto & current_range = file_segment->range();
+        /// chassert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment.getCurrentWriteOffset(false));
+        chassert(static_cast<size_t>(implementation_buffer->getPosition()) == file_segment.getCurrentWriteOffset(false));
+        size_t current_offset = file_segment.getCurrentWriteOffset(false);
+        const auto & current_range = file_segment.range();
 
         while (true)
         {
@@ -575,7 +565,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
                         "current download offset: {}, expected: {}, eof: {}",
                         bytes_to_predownload,
                         current_range.toString(),
-                        file_segment->getCurrentWriteOffset(),
+                        file_segment.getCurrentWriteOffset(false),
                         file_offset_of_buffer_end,
                         implementation_buffer->eof());
 
@@ -585,7 +575,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
                 {
                     nextimpl_working_buffer_offset = implementation_buffer->offset();
 
-                    auto current_write_offset = file_segment->getCurrentWriteOffset();
+                    auto current_write_offset = file_segment.getCurrentWriteOffset(false);
                     if (current_write_offset != static_cast<size_t>(implementation_buffer->getPosition())
                         || current_write_offset != file_offset_of_buffer_end)
                     {
@@ -597,7 +587,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
                             current_write_offset,
                             file_offset_of_buffer_end,
                             implementation_buffer->getPosition(),
-                            file_segment->getInfoForLog());
+                            file_segment.getInfoForLog());
                     }
                 }
 
@@ -609,15 +599,15 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
 
             ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size);
 
-            bool continue_predownload = file_segment->reserve(current_predownload_size);
+            bool continue_predownload = file_segment.reserve(current_predownload_size);
             if (continue_predownload)
             {
                 LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size);
 
-                chassert(file_segment->getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
+                chassert(file_segment.getCurrentWriteOffset(false) == static_cast<size_t>(implementation_buffer->getPosition()));
 
-                bool success = writeCache(implementation_buffer->buffer().begin(), current_predownload_size, current_offset, *file_segment);
-                if (success)
+                continue_predownload = writeCache(implementation_buffer->buffer().begin(), current_predownload_size, current_offset, file_segment);
+                if (continue_predownload)
                 {
                     current_offset += current_predownload_size;
 
@@ -627,13 +617,8 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
                 else
                 {
                     LOG_TEST(log, "Bypassing cache because writeCache (in predownload) method failed");
-                    continue_predownload = false;
                 }
             }
-            else
-            {
-                file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
-            }
 
             if (!continue_predownload)
             {
@@ -653,21 +638,21 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
                 /// TODO: allow seek more than once with seek avoiding.
 
                 bytes_to_predownload = 0;
+                file_segment.completePartAndResetDownloader();
+                chassert(file_segment.state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
 
-                chassert(file_segment->state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION
-                         || file_segment->state() == FileSegment::State::SKIP_CACHE);
-                LOG_TEST(log, "Bypassing cache because for {}", file_segment->getInfoForLog());
+                LOG_TEST(log, "Bypassing cache because for {}", file_segment.getInfoForLog());
 
                 read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
 
                 swap(*implementation_buffer);
                 resetWorkingBuffer();
 
-                implementation_buffer = getRemoteFSReadBuffer(*file_segment, read_type);
+                implementation_buffer = getRemoteReadBuffer(file_segment, read_type);
 
                 swap(*implementation_buffer);
 
-                implementation_buffer->setReadUntilPosition(file_segment->range().right + 1); /// [..., range.right]
+                implementation_buffer->setReadUntilPosition(file_segment.range().right + 1); /// [..., range.right]
                 implementation_buffer->seek(file_offset_of_buffer_end, SEEK_SET);
 
                 LOG_TRACE(
@@ -684,12 +669,12 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegmentPtr & file_segment)
 
 bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
 {
-    auto & file_segment = *current_file_segment_it;
-    auto current_read_range = file_segment->range();
-    auto current_state = file_segment->state();
+    auto & file_segment = file_segments->front();
+    const auto & current_read_range = file_segment.range();
+    auto current_state = file_segment.state();
 
     chassert(current_read_range.left <= file_offset_of_buffer_end);
-    chassert(!file_segment->isDownloader());
+    chassert(!file_segment.isDownloader());
 
     if (file_offset_of_buffer_end > current_read_range.right)
     {
@@ -708,7 +693,7 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
         ///                     ^
         ///                     file_offset_of_buffer_end
 
-        auto current_write_offset = file_segment->getCurrentWriteOffset();
+        auto current_write_offset = file_segment.getCurrentWriteOffset(true);
         bool cached_part_is_finished = current_write_offset == file_offset_of_buffer_end;
 
         LOG_TEST(log, "Current write offset: {}, file offset of buffer end: {}", current_write_offset, file_offset_of_buffer_end);
@@ -716,7 +701,7 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
         if (cached_part_is_finished)
         {
             /// TODO: makes sense to reuse local file reader if we return here with CACHED read type again?
-            implementation_buffer = getImplementationBuffer(*current_file_segment_it);
+            implementation_buffer = getImplementationBuffer(file_segment);
 
             return true;
         }
@@ -744,7 +729,7 @@ bool CachedOnDiskReadBufferFromFile::updateImplementationBufferIfNeeded()
         * to read by marks range given to him. Therefore, each nextImpl() call, in case of
         * READ_AND_PUT_IN_CACHE, starts with getOrSetDownloader().
         */
-        implementation_buffer = getImplementationBuffer(*current_file_segment_it);
+        implementation_buffer = getImplementationBuffer(file_segment);
     }
 
     return true;
@@ -795,15 +780,13 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
 {
     last_caller_id = FileSegment::getCallerId();
 
-    assertCorrectness();
-
     if (file_offset_of_buffer_end == read_until_position)
         return false;
 
     if (!initialized)
         initialize(file_offset_of_buffer_end, getTotalSizeToRead());
 
-    if (current_file_segment_it == file_segments_holder->file_segments.end())
+    if (file_segments->empty())
         return false;
 
     bool implementation_buffer_can_be_reused = false;
@@ -813,25 +796,25 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
             /// Save state of current file segment before it is completed.
             nextimpl_step_log_info = getInfoForLog();
 
-            if (current_file_segment_it == file_segments_holder->file_segments.end())
+            if (file_segments->empty())
                 return;
 
-            auto & file_segment = *current_file_segment_it;
+            auto & file_segment = file_segments->front();
 
             bool download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
             if (download_current_segment)
             {
-                bool need_complete_file_segment = file_segment->isDownloader();
+                bool need_complete_file_segment = file_segment.isDownloader();
                 if (need_complete_file_segment)
                 {
                     if (!implementation_buffer_can_be_reused)
-                        file_segment->resetRemoteFileReader();
+                        file_segment.resetRemoteFileReader();
 
-                    file_segment->completePartAndResetDownloader();
+                    file_segment.completePartAndResetDownloader();
                 }
             }
 
-            chassert(!file_segment->isDownloader());
+            chassert(!file_segment.isDownloader());
         }
         catch (...)
         {
@@ -849,10 +832,10 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
     }
     else
     {
-        implementation_buffer = getImplementationBuffer(*current_file_segment_it);
+        implementation_buffer = getImplementationBuffer(file_segments->front());
 
         if (read_type == ReadType::CACHED)
-            (*current_file_segment_it)->incrementHitsCount();
+            file_segments->front().incrementHitsCount();
     }
 
     chassert(!internal_buffer.empty());
@@ -863,16 +846,16 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
     // the caller doesn't try to use this CachedOnDiskReadBufferFromFile after it threw an exception.)
     swap(*implementation_buffer);
 
-    auto & file_segment = *current_file_segment_it;
-    auto current_read_range = file_segment->range();
+    auto & file_segment = file_segments->front();
+    const auto & current_read_range = file_segment.range();
 
     LOG_TEST(
         log,
-        "Current count: {}, position: {}, buffer end: {}, file segment: {}",
-        implementation_buffer->count(),
-        implementation_buffer->getPosition(),
+        "Current read type: {}, read offset: {}, impl offset: {}, file segment: {}",
+        toString(read_type),
+        file_offset_of_buffer_end,
         implementation_buffer->getFileOffsetOfBufferEnd(),
-        file_segment->getInfoForLog());
+        file_segment.getInfoForLog());
 
     chassert(current_read_range.left <= file_offset_of_buffer_end);
     chassert(current_read_range.right >= file_offset_of_buffer_end);
@@ -890,12 +873,12 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
     }
 
     auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
-    if (download_current_segment != file_segment->isDownloader())
+    if (download_current_segment != file_segment.isDownloader())
     {
         throw Exception(
             ErrorCodes::LOGICAL_ERROR,
             "Incorrect segment state. Having read type: {}, file segment info: {}",
-            toString(read_type), file_segment->getInfoForLog());
+            toString(read_type), file_segment.getInfoForLog());
     }
 
     if (!result)
@@ -937,7 +920,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
             log,
             "Read {} bytes, read type {}, position: {}, offset: {}, segment end: {}",
             size, toString(read_type), implementation_buffer->getPosition(),
-            implementation_buffer->getFileOffsetOfBufferEnd(), file_segment->range().right);
+            implementation_buffer->getFileOffsetOfBufferEnd(), file_segment.range().right);
 
         if (read_type == ReadType::CACHED)
         {
@@ -955,20 +938,20 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
     {
         if (download_current_segment)
         {
-            chassert(file_offset_of_buffer_end + size - 1 <= file_segment->range().right);
+            chassert(file_offset_of_buffer_end + size - 1 <= file_segment.range().right);
 
-            bool success = file_segment->reserve(size);
+            bool success = file_segment.reserve(size);
             if (success)
             {
-                chassert(file_segment->getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
+                chassert(file_segment.getCurrentWriteOffset(false) == static_cast<size_t>(implementation_buffer->getPosition()));
 
-                success = writeCache(implementation_buffer->position(), size, file_offset_of_buffer_end, *file_segment);
+                success = writeCache(implementation_buffer->position(), size, file_offset_of_buffer_end, file_segment);
                 if (success)
                 {
-                    chassert(file_segment->getCurrentWriteOffset() <= file_segment->range().right + 1);
+                    chassert(file_segment.getCurrentWriteOffset(false) <= file_segment.range().right + 1);
                     chassert(
-                        std::next(current_file_segment_it) == file_segments_holder->file_segments.end()
-                        || file_segment->getCurrentWriteOffset() == implementation_buffer->getFileOffsetOfBufferEnd());
+                        /* last_file_segment */file_segments->size() == 1
+                        || file_segment.getCurrentWriteOffset(false) == implementation_buffer->getFileOffsetOfBufferEnd());
 
                     LOG_TEST(log, "Successfully written {} bytes", size);
 
@@ -980,20 +963,13 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
                 }
                 else
                 {
-                    chassert(file_segment->state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
+                    chassert(file_segment.state() == FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
                     LOG_TRACE(log, "Bypassing cache because writeCache method failed");
                 }
             }
             else
             {
                 LOG_TRACE(log, "No space left in cache to reserve {} bytes, will continue without cache download", size);
-                file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
-            }
-
-            if (!success)
-            {
-                read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE;
-                download_current_segment = false;
             }
         }
 
@@ -1003,7 +979,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
         /// Therefore need to resize to a smaller size. And resize must be done after write into cache.
         /// - If last file segment was read from local fs, then we could read more than
         /// file_segemnt->range().right, so resize is also needed.
-        if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end())
+        if (file_segments->size() == 1)
         {
             size_t remaining_size_to_read
                 = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1;
@@ -1023,17 +999,17 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
 
     // No necessary because of the SCOPE_EXIT above, but useful for logging below.
     if (download_current_segment)
-        file_segment->completePartAndResetDownloader();
+        file_segment.completePartAndResetDownloader();
 
-    chassert(!file_segment->isDownloader());
+    chassert(!file_segment.isDownloader());
 
     LOG_TEST(
         log,
         "Key: {}. Returning with {} bytes, buffer position: {} (offset: {}, predownloaded: {}), "
-        "buffer available: {}, current range: {}, current offset: {}, file segment state: {}, "
+        "buffer available: {}, current range: {}, file offset of buffer end: {}, impl offset: {}, file segment state: {}, "
         "current write offset: {}, read_type: {}, reading until position: {}, started with offset: {}, "
         "remaining ranges: {}",
-        getHexUIntLowercase(cache_key),
+        cache_key.toString(),
         working_buffer.size(),
         getPosition(),
         offset(),
@@ -1041,12 +1017,13 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
         available(),
         current_read_range.toString(),
         file_offset_of_buffer_end,
-        FileSegment::stateToString(file_segment->state()),
-        file_segment->getCurrentWriteOffset(),
+        implementation_buffer->getFileOffsetOfBufferEnd(),
+        FileSegment::stateToString(file_segment.state()),
+        file_segment.getCurrentWriteOffset(false),
         toString(read_type),
         read_until_position,
         first_offset,
-        file_segments_holder->toString());
+        file_segments->toString());
 
     if (size == 0 && file_offset_of_buffer_end < read_until_position)
     {
@@ -1065,7 +1042,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
             cache_file_size ? std::to_string(cache_file_size) : "None",
             cache_file_path,
             implementation_buffer->getFileOffsetOfBufferEnd(),
-            file_segment->getInfoForLog());
+            file_segment.getInfoForLog());
     }
 
     return result;
@@ -1113,13 +1090,13 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
     first_offset = file_offset_of_buffer_end = new_pos;
     resetWorkingBuffer();
 
-    // if (file_segments_holder && current_file_segment_it != file_segments_holder->file_segments.end())
+    // if (file_segments && current_file_segment_it != file_segments->file_segments.end())
     // {
-    //      auto & file_segments = file_segments_holder->file_segments;
+    //      auto & file_segments = file_segments->file_segments;
     //      LOG_TRACE(
     //          log,
     //          "Having {} file segments to read: {}, current offset: {}",
-    //          file_segments_holder->file_segments.size(), file_segments_holder->toString(), file_offset_of_buffer_end);
+    //          file_segments->file_segments.size(), file_segments->toString(), file_offset_of_buffer_end);
 
     //      auto it = std::upper_bound(
     //          file_segments.begin(),
@@ -1150,7 +1127,7 @@ off_t CachedOnDiskReadBufferFromFile::seek(off_t offset, int whence)
     //      }
     // }
 
-    file_segments_holder.reset();
+    file_segments.reset();
     implementation_buffer.reset();
     initialized = false;
 
@@ -1185,7 +1162,7 @@ void CachedOnDiskReadBufferFromFile::setReadUntilPosition(size_t position)
 
     file_offset_of_buffer_end = getPosition();
     resetWorkingBuffer();
-    file_segments_holder.reset();
+    file_segments.reset();
     implementation_buffer.reset();
     initialized = false;
 
@@ -1204,25 +1181,9 @@ off_t CachedOnDiskReadBufferFromFile::getPosition()
     return file_offset_of_buffer_end - available();
 }
 
-std::optional<size_t> CachedOnDiskReadBufferFromFile::getLastNonDownloadedOffset() const
-{
-    if (!file_segments_holder)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "File segments holder not initialized");
-
-    const auto & file_segments = file_segments_holder->file_segments;
-    for (auto it = file_segments.rbegin(); it != file_segments.rend(); ++it)
-    {
-        const auto & file_segment = *it;
-        if (file_segment->state() != FileSegment::State::DOWNLOADED)
-            return file_segment->range().right;
-    }
-
-    return std::nullopt;
-}
-
 void CachedOnDiskReadBufferFromFile::assertCorrectness() const
 {
-    if (FileCache::isReadOnly()
+    if (!CachedObjectStorage::canUseReadThroughCache()
         && !settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache usage is not allowed (query_id: {})", query_id);
 }
@@ -1230,16 +1191,16 @@ void CachedOnDiskReadBufferFromFile::assertCorrectness() const
 String CachedOnDiskReadBufferFromFile::getInfoForLog()
 {
     String current_file_segment_info;
-    if (current_file_segment_it != file_segments_holder->file_segments.end())
-        current_file_segment_info = (*current_file_segment_it)->getInfoForLog();
-    else
+    if (file_segments->empty())
         current_file_segment_info = "None";
+    else
+        current_file_segment_info = file_segments->front().getInfoForLog();
 
     return fmt::format(
         "Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, read_until_position: {}, "
         "internal buffer end: {}, read_type: {}, last caller: {}, file segment info: {}",
         source_file_path,
-        getHexUIntLowercase(cache_key),
+        cache_key.toString(),
         file_offset_of_buffer_end,
         read_until_position,
         implementation_buffer ? std::to_string(implementation_buffer->getFileOffsetOfBufferEnd()) : "None",
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
index d3c265a522b..9738c997d7a 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
@@ -20,8 +20,7 @@ namespace DB
 class CachedOnDiskReadBufferFromFile : public ReadBufferFromFileBase
 {
 public:
-    using ImplementationBufferPtr = std::shared_ptr<ReadBufferFromFileBase>;
-    using ImplementationBufferCreator = std::function<ImplementationBufferPtr()>;
+    using ImplementationBufferCreator = std::function<std::unique_ptr<ReadBufferFromFileBase>()>;
 
     CachedOnDiskReadBufferFromFile(
         const String & source_file_path_,
@@ -61,26 +60,31 @@ public:
     };
 
 private:
+    using ImplementationBufferPtr = std::shared_ptr<ReadBufferFromFileBase>;
+
     void initialize(size_t offset, size_t size);
+    void assertCorrectness() const;
 
-    ImplementationBufferPtr getImplementationBuffer(FileSegmentPtr & file_segment);
+    /**
+     * Return a list of file segments ordered in ascending order. This list represents
+     * a full contiguous interval (without holes).
+     */
+    FileSegmentsHolderPtr getFileSegments(size_t offset, size_t size) const;
 
-    ImplementationBufferPtr getReadBufferForFileSegment(FileSegmentPtr & file_segment);
+    ImplementationBufferPtr getImplementationBuffer(FileSegment & file_segment);
+
+    ImplementationBufferPtr getReadBufferForFileSegment(FileSegment & file_segment);
 
     ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment) const;
 
-    std::optional<size_t> getLastNonDownloadedOffset() const;
+    ImplementationBufferPtr getRemoteReadBuffer(FileSegment & file_segment, ReadType read_type_);
 
     bool updateImplementationBufferIfNeeded();
 
-    void predownload(FileSegmentPtr & file_segment);
+    void predownload(FileSegment & file_segment);
 
     bool nextImplStep();
 
-    void assertCorrectness() const;
-
-    std::shared_ptr<ReadBufferFromFileBase> getRemoteFSReadBuffer(FileSegment & file_segment, ReadType read_type_);
-
     size_t getTotalSizeToRead();
 
     bool completeFileSegmentAndGetNext();
@@ -107,8 +111,7 @@ private:
     /// Remote read buffer, which can only be owned by current buffer.
     FileSegment::RemoteFileReaderPtr remote_file_reader;
 
-    std::optional<FileSegmentsHolder> file_segments_holder;
-    FileSegments::iterator current_file_segment_it;
+    FileSegmentsHolderPtr file_segments;
 
     ImplementationBufferPtr implementation_buffer;
     bool initialized = false;
@@ -142,7 +145,7 @@ private:
     CurrentMetrics::Increment metric_increment{CurrentMetrics::FilesystemCacheReadBuffers};
     ProfileEvents::Counters current_file_segment_counters;
 
-    FileCache::QueryContextHolder query_context_holder;
+    FileCache::QueryContextHolderPtr query_context_holder;
 
     bool is_persistent;
 };
diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp
index 169bbfac886..d72dcecb484 100644
--- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp
@@ -50,27 +50,29 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset
             offset, expected_write_offset);
     }
 
-    auto & file_segments = file_segments_holder.file_segments;
+    FileSegment * file_segment;
 
-    if (file_segments.empty() || file_segments.back()->isDownloaded())
+    if (file_segments.empty() || file_segments.back().isDownloaded())
     {
-        allocateFileSegment(expected_write_offset, segment_kind);
+        file_segment = &allocateFileSegment(expected_write_offset, segment_kind);
+    }
+    else
+    {
+        file_segment = &file_segments.back();
     }
 
-    auto & file_segment = file_segments.back();
-
     SCOPE_EXIT({
-        if (file_segments.back()->isDownloader())
-            file_segments.back()->completePartAndResetDownloader();
+        if (file_segments.back().isDownloader())
+            file_segments.back().completePartAndResetDownloader();
     });
 
     while (size > 0)
     {
-        size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize();
+        size_t available_size = file_segment->range().size() - file_segment->getDownloadedSize(false);
         if (available_size == 0)
         {
             completeFileSegment(*file_segment);
-            file_segment = allocateFileSegment(expected_write_offset, segment_kind);
+            file_segment = &allocateFileSegment(expected_write_offset, segment_kind);
             continue;
         }
 
@@ -86,7 +88,6 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset
         bool reserved = file_segment->reserve(size_to_write);
         if (!reserved)
         {
-            file_segment->completeWithState(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
             appendFilesystemCacheLog(*file_segment);
 
             LOG_DEBUG(
@@ -113,11 +114,10 @@ void FileSegmentRangeWriter::finalize()
     if (finalized)
         return;
 
-    auto & file_segments = file_segments_holder.file_segments;
     if (file_segments.empty())
         return;
 
-    completeFileSegment(*file_segments.back());
+    completeFileSegment(file_segments.back());
     finalized = true;
 }
 
@@ -134,24 +134,21 @@ FileSegmentRangeWriter::~FileSegmentRangeWriter()
     }
 }
 
-FileSegmentPtr & FileSegmentRangeWriter::allocateFileSegment(size_t offset, FileSegmentKind segment_kind)
+FileSegment & FileSegmentRangeWriter::allocateFileSegment(size_t offset, FileSegmentKind segment_kind)
 {
     /**
     * Allocate a new file segment starting `offset`.
     * File segment capacity will equal `max_file_segment_size`, but actual size is 0.
     */
 
-    std::lock_guard cache_lock(cache->mutex);
-
-    CreateFileSegmentSettings create_settings(segment_kind);
+    CreateFileSegmentSettings create_settings(segment_kind, false);
 
     /// We set max_file_segment_size to be downloaded,
     /// if we have less size to write, file segment will be resized in complete() method.
-    auto file_segment = cache->createFileSegmentForDownload(
-        key, offset, cache->max_file_segment_size, create_settings, cache_lock);
-
-    auto & file_segments = file_segments_holder.file_segments;
-    return *file_segments.insert(file_segments.end(), file_segment);
+    auto holder = cache->set(key, offset, cache->getMaxFileSegmentSize(), create_settings);
+    chassert(holder->size() == 1);
+    holder->moveTo(file_segments);
+    return file_segments.back();
 }
 
 void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_segment)
@@ -159,7 +156,7 @@ void FileSegmentRangeWriter::appendFilesystemCacheLog(const FileSegment & file_s
     if (cache_log)
     {
         auto file_segment_range = file_segment.range();
-        size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize() - 1;
+        size_t file_segment_right_bound = file_segment_range.left + file_segment.getDownloadedSize(false) - 1;
 
         FilesystemCacheLogElement elem
         {
@@ -185,7 +182,7 @@ void FileSegmentRangeWriter::completeFileSegment(FileSegment & file_segment)
     if (file_segment.isDetached() || file_segment.isCompleted())
         return;
 
-    file_segment.completeWithoutState();
+    file_segment.complete();
     appendFilesystemCacheLog(file_segment);
 }
 
@@ -224,7 +221,7 @@ void CachedOnDiskWriteBufferFromFile::nextImpl()
     {
         /// If something was already written to cache, remove it.
         cache_writer.reset();
-        cache->removeIfExists(key);
+        cache->removeKeyIfExists(key);
 
         throw;
     }
diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h
index 834e584c8db..194afe88d88 100644
--- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h
+++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h
@@ -39,7 +39,7 @@ public:
     ~FileSegmentRangeWriter();
 
 private:
-    FileSegmentPtr & allocateFileSegment(size_t offset, FileSegmentKind segment_kind);
+    FileSegment & allocateFileSegment(size_t offset, FileSegmentKind segment_kind);
 
     void appendFilesystemCacheLog(const FileSegment & file_segment);
 
@@ -53,7 +53,7 @@ private:
     String query_id;
     String source_path;
 
-    FileSegmentsHolder file_segments_holder{};
+    FileSegmentsHolder file_segments{};
 
     size_t expected_write_offset = 0;
 
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
index 8450e740ab5..336d5b845f8 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@@ -3,6 +3,7 @@
 #include <IO/SeekableReadBuffer.h>
 
 #include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
+#include <Disks/ObjectStorages/Cached/CachedObjectStorage.h>
 #include <Common/logger_useful.h>
 #include <iostream>
 #include <base/hex.h>
@@ -26,15 +27,11 @@ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(
     , read_buffer_creator(std::move(read_buffer_creator_))
     , blobs_to_read(blobs_to_read_)
     , settings(settings_)
+    , current_object(!blobs_to_read_.empty() ? blobs_to_read_.front() : throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read zero number of objects"))
     , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "")
     , log(&Poco::Logger::get("ReadBufferFromRemoteFSGather"))
     , enable_cache_log(!query_id.empty() && settings.enable_filesystem_cache_log)
 {
-    if (blobs_to_read.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read zero number of objects");
-
-    current_object = blobs_to_read.front();
-
     with_cache = settings.remote_fs_cache
         && settings.enable_filesystem_cache
         && (!query_id.empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache);
@@ -49,14 +46,14 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c
 
     current_object = object;
     total_bytes_read_from_current_file = 0;
-    const auto & object_path = object.absolute_path;
+    const auto & object_path = object.remote_path;
 
     size_t current_read_until_position = read_until_position ? read_until_position : object.bytes_size;
     auto current_read_buffer_creator = [=, this]() { return read_buffer_creator(object_path, current_read_until_position); };
 
     if (with_cache)
     {
-        auto cache_key = settings.remote_fs_cache->hash(object_path);
+        auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path);
         return std::make_shared<CachedOnDiskReadBufferFromFile>(
             object_path,
             cache_key,
@@ -75,12 +72,12 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c
 
 void ReadBufferFromRemoteFSGather::appendFilesystemCacheLog()
 {
-    chassert(!current_object.absolute_path.empty());
+    chassert(!current_object.remote_path.empty());
     FilesystemCacheLogElement elem
     {
         .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()),
         .query_id = query_id,
-        .source_file_path = current_object.absolute_path,
+        .source_file_path = current_object.remote_path,
         .file_segment_range = { 0, current_object.bytes_size },
         .cache_type = FilesystemCacheLogElement::CacheType::READ_FROM_FS_BYPASSING_CACHE,
         .file_segment_size = total_bytes_read_from_current_file,
@@ -122,6 +119,8 @@ void ReadBufferFromRemoteFSGather::initialize()
 
         if (object.bytes_size > current_buf_offset)
         {
+            LOG_TEST(log, "Reading from file: {} ({})", object.remote_path, object.local_path);
+
             /// Do not create a new buffer if we already have what we need.
             if (!current_buf || current_buf_idx != i)
             {
@@ -173,6 +172,7 @@ bool ReadBufferFromRemoteFSGather::moveToNextBuffer()
     ++current_buf_idx;
 
     const auto & object = blobs_to_read[current_buf_idx];
+    LOG_TEST(log, "Reading from next file: {} ({})", object.remote_path, object.local_path);
     current_buf = createImplementationBuffer(object);
 
     return true;
@@ -245,7 +245,7 @@ void ReadBufferFromRemoteFSGather::reset()
 
 String ReadBufferFromRemoteFSGather::getFileName() const
 {
-    return current_object.absolute_path;
+    return current_object.remote_path;
 }
 
 size_t ReadBufferFromRemoteFSGather::getFileSize() const
diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h
index abe57647a48..d8e55b648fa 100644
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h
@@ -20,7 +20,7 @@ class ReadBufferFromRemoteFSGather final : public ReadBuffer
 friend class ReadIndirectBufferFromRemoteFS;
 
 public:
-    using ReadBufferCreator = std::function<std::shared_ptr<ReadBufferFromFileBase>(const std::string & path, size_t read_until_position)>;
+    using ReadBufferCreator = std::function<std::unique_ptr<ReadBufferFromFileBase>(const std::string & path, size_t read_until_position)>;
 
     ReadBufferFromRemoteFSGather(
         ReadBufferCreator && read_buffer_creator_,
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index ad938e345bb..62b72dc82d5 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -50,7 +50,7 @@ bool AzureObjectStorage::exists(const StoredObject & object) const
 
     /// What a shame, no Exists method...
     Azure::Storage::Blobs::ListBlobsOptions options;
-    options.Prefix = object.absolute_path;
+    options.Prefix = object.remote_path;
     options.PageSizeHint = 1;
 
     auto blobs_list_response = client_ptr->ListBlobs(options);
@@ -58,7 +58,7 @@ bool AzureObjectStorage::exists(const StoredObject & object) const
 
     for (const auto & blob : blobs_list)
     {
-        if (object.absolute_path == blob.Name)
+        if (object.remote_path == blob.Name)
             return true;
     }
 
@@ -74,7 +74,7 @@ std::unique_ptr<ReadBufferFromFileBase> AzureObjectStorage::readObject( /// NOLI
     auto settings_ptr = settings.get();
 
     return std::make_unique<ReadBufferFromAzureBlobStorage>(
-        client.get(), object.absolute_path, patchSettings(read_settings), settings_ptr->max_single_read_retries,
+        client.get(), object.remote_path, patchSettings(read_settings), settings_ptr->max_single_read_retries,
         settings_ptr->max_single_download_retries);
 }
 
@@ -89,7 +89,7 @@ std::unique_ptr<ReadBufferFromFileBase> AzureObjectStorage::readObjects( /// NOL
 
     auto read_buffer_creator =
         [this, settings_ptr, disk_read_settings]
-        (const std::string & path, size_t read_until_position) -> std::shared_ptr<ReadBufferFromFileBase>
+        (const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
     {
         return std::make_unique<ReadBufferFromAzureBlobStorage>(
             client.get(),
@@ -130,16 +130,16 @@ std::unique_ptr<WriteBufferFromFileBase> AzureObjectStorage::writeObject( /// NO
     if (mode != WriteMode::Rewrite)
         throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Azure storage doesn't support append");
 
-    LOG_TEST(log, "Writing file: {}", object.absolute_path);
+    LOG_TEST(log, "Writing file: {}", object.remote_path);
 
     auto buffer = std::make_unique<WriteBufferFromAzureBlobStorage>(
         client.get(),
-        object.absolute_path,
+        object.remote_path,
         settings.get()->max_single_part_upload_size,
         buf_size,
         patchSettings(write_settings));
 
-    return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(buffer), std::move(finalize_callback), object.absolute_path);
+    return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(buffer), std::move(finalize_callback), object.remote_path);
 }
 
 void AzureObjectStorage::findAllFiles(const std::string & path, RelativePathsWithSize & children, int max_keys) const
@@ -172,7 +172,7 @@ void AzureObjectStorage::findAllFiles(const std::string & path, RelativePathsWit
 /// Remove file. Throws exception if file doesn't exists or it's a directory.
 void AzureObjectStorage::removeObject(const StoredObject & object)
 {
-    const auto & path = object.absolute_path;
+    const auto & path = object.remote_path;
     LOG_TEST(log, "Removing single object: {}", path);
     auto client_ptr = client.get();
     auto delete_info = client_ptr->DeleteBlob(path);
@@ -187,12 +187,12 @@ void AzureObjectStorage::removeObjects(const StoredObjects & objects)
     auto client_ptr = client.get();
     for (const auto & object : objects)
     {
-        LOG_TEST(log, "Removing object: {} (total: {})", object.absolute_path, objects.size());
-        auto delete_info = client_ptr->DeleteBlob(object.absolute_path);
+        LOG_TEST(log, "Removing object: {} (total: {})", object.remote_path, objects.size());
+        auto delete_info = client_ptr->DeleteBlob(object.remote_path);
         if (!delete_info.Value.Deleted)
             throw Exception(
                 ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Failed to delete file (path: {}) in AzureBlob Storage, reason: {}",
-                object.absolute_path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown");
+                object.remote_path, delete_info.RawResponse ? delete_info.RawResponse->GetReasonPhrase() : "Unknown");
     }
 }
 
@@ -201,8 +201,8 @@ void AzureObjectStorage::removeObjectIfExists(const StoredObject & object)
     auto client_ptr = client.get();
     try
     {
-        LOG_TEST(log, "Removing single object: {}", object.absolute_path);
-        auto delete_info = client_ptr->DeleteBlob(object.absolute_path);
+        LOG_TEST(log, "Removing single object: {}", object.remote_path);
+        auto delete_info = client_ptr->DeleteBlob(object.remote_path);
     }
     catch (const Azure::Storage::StorageException & e)
     {
@@ -221,7 +221,7 @@ void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
     {
         try
         {
-            auto delete_info = client_ptr->DeleteBlob(object.absolute_path);
+            auto delete_info = client_ptr->DeleteBlob(object.remote_path);
         }
         catch (const Azure::Storage::StorageException & e)
         {
@@ -259,8 +259,8 @@ void AzureObjectStorage::copyObject( /// NOLINT
     std::optional<ObjectAttributes> object_to_attributes)
 {
     auto client_ptr = client.get();
-    auto dest_blob_client = client_ptr->GetBlobClient(object_to.absolute_path);
-    auto source_blob_client = client_ptr->GetBlobClient(object_from.absolute_path);
+    auto dest_blob_client = client_ptr->GetBlobClient(object_to.remote_path);
+    auto source_blob_client = client_ptr->GetBlobClient(object_from.remote_path);
 
     Azure::Storage::Blobs::CopyBlobFromUriOptions copy_options;
     if (object_to_attributes.has_value())
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
index 851b6ba24c9..2448d2d1101 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@@ -43,13 +43,7 @@ DataSourceDescription CachedObjectStorage::getDataSourceDescription() const
 
 FileCache::Key CachedObjectStorage::getCacheKey(const std::string & path) const
 {
-    return cache->hash(path);
-}
-
-String CachedObjectStorage::getCachePath(const std::string & path) const
-{
-    FileCache::Key cache_key = getCacheKey(path);
-    return cache->getPathInLocalCache(cache_key);
+    return cache->createKeyForPath(path);
 }
 
 std::string CachedObjectStorage::generateBlobNameForPath(const std::string & path)
@@ -62,10 +56,10 @@ ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settin
     ReadSettings modified_settings{read_settings};
     modified_settings.remote_fs_cache = cache;
 
-    if (FileCache::isReadOnly())
+    if (!canUseReadThroughCache())
         modified_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
 
-    return IObjectStorage::patchSettings(modified_settings);
+    return object_storage->patchSettings(modified_settings);
 }
 
 void CachedObjectStorage::startup()
@@ -86,45 +80,7 @@ std::unique_ptr<ReadBufferFromFileBase> CachedObjectStorage::readObjects( /// NO
 {
     if (objects.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Received empty list of objects to read");
-
-    assert(!objects[0].getPathKeyForCache().empty());
-
-    /// Add cache relating settings to ReadSettings.
-    auto modified_read_settings = patchSettings(read_settings);
-    auto implementation_buffer = object_storage->readObjects(objects, modified_read_settings, read_hint, file_size);
-
-    /// If underlying read buffer does caching on its own, do not wrap it in caching buffer.
-    if (implementation_buffer->isIntegratedWithFilesystemCache()
-        && modified_read_settings.enable_filesystem_cache_on_lower_level)
-    {
-        return implementation_buffer;
-    }
-    else
-    {
-        if (!file_size)
-            file_size = implementation_buffer->getFileSize();
-
-        auto implementation_buffer_creator = [objects, modified_read_settings, read_hint, file_size, this]()
-        {
-            return std::make_unique<BoundedReadBuffer>(
-                object_storage->readObjects(objects, modified_read_settings, read_hint, file_size));
-        };
-
-        /// TODO: A test is needed for the case of non-s3 storage and *Log family engines.
-        std::string path = objects[0].absolute_path;
-        FileCache::Key key = getCacheKey(objects[0].getPathKeyForCache());
-
-        return std::make_unique<CachedOnDiskReadBufferFromFile>(
-            path,
-            key,
-            cache,
-            implementation_buffer_creator,
-            modified_read_settings,
-            CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() ? std::string(CurrentThread::getQueryId()) : "",
-            file_size.value(),
-            /* allow_seeks */true,
-            /* use_external_buffer */false);
-    }
+    return object_storage->readObjects(objects, patchSettings(read_settings), read_hint, file_size);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> CachedObjectStorage::readObject( /// NOLINT
@@ -133,42 +89,9 @@ std::unique_ptr<ReadBufferFromFileBase> CachedObjectStorage::readObject( /// NOL
     std::optional<size_t> read_hint,
     std::optional<size_t> file_size) const
 {
-    /// Add cache relating settings to ReadSettings.
-    auto modified_read_settings = patchSettings(read_settings);
-    auto implementation_buffer = object_storage->readObject(object, read_settings, read_hint, file_size);
-
-    /// If underlying read buffer does caching on its own, do not wrap it in caching buffer.
-    if (implementation_buffer->isIntegratedWithFilesystemCache()
-        && modified_read_settings.enable_filesystem_cache_on_lower_level)
-    {
-        return implementation_buffer;
-    }
-    else
-    {
-        if (!file_size)
-            file_size = implementation_buffer->getFileSize();
-
-        auto implementation_buffer_creator = [object, read_settings, read_hint, file_size, this]()
-        {
-            return std::make_unique<BoundedReadBuffer>(object_storage->readObject(object, read_settings, read_hint, file_size));
-        };
-
-        FileCache::Key key = getCacheKey(object.getPathKeyForCache());
-        LOG_TEST(log, "Reading from file `{}` with cache key `{}`", object.absolute_path, key.toString());
-        return std::make_unique<CachedOnDiskReadBufferFromFile>(
-            object.absolute_path,
-            key,
-            cache,
-            implementation_buffer_creator,
-            read_settings,
-            CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() ? std::string(CurrentThread::getQueryId()) : "",
-            file_size.value(),
-            /* allow_seeks */true,
-            /* use_external_buffer */false);
-    }
+    return object_storage->readObject(object, patchSettings(read_settings), read_hint, file_size);
 }
 
-
 std::unique_ptr<WriteBufferFromFileBase> CachedObjectStorage::writeObject( /// NOLINT
     const StoredObject & object,
     WriteMode mode, // Cached doesn't support append, only rewrite
@@ -183,17 +106,14 @@ std::unique_ptr<WriteBufferFromFileBase> CachedObjectStorage::writeObject( /// N
 
     bool cache_on_write = modified_write_settings.enable_filesystem_cache_on_write_operations
         && FileCacheFactory::instance().getByName(cache_config_name).settings.cache_on_write_operations
-        && fs::path(object.absolute_path).extension() != ".tmp";
+        && fs::path(object.remote_path).extension() != ".tmp";
 
-    auto path_key_for_cache = object.getPathKeyForCache();
     /// Need to remove even if cache_on_write == false.
-    removeCacheIfExists(path_key_for_cache);
+    removeCacheIfExists(object.remote_path);
 
     if (cache_on_write)
     {
-        auto key = getCacheKey(path_key_for_cache);
-        LOG_TEST(log, "Caching file `{}` to `{}` with key {}", object.absolute_path, getCachePath(path_key_for_cache), key.toString());
-
+        auto key = getCacheKey(object.remote_path);
         return std::make_unique<CachedOnDiskWriteBufferFromFile>(
             std::move(implementation_buffer),
             cache,
@@ -213,33 +133,32 @@ void CachedObjectStorage::removeCacheIfExists(const std::string & path_key_for_c
         return;
 
     /// Add try catch?
-    cache->removeIfExists(getCacheKey(path_key_for_cache));
+    cache->removeKeyIfExists(getCacheKey(path_key_for_cache));
 }
 
 void CachedObjectStorage::removeObject(const StoredObject & object)
 {
-    removeCacheIfExists(object.getPathKeyForCache());
     object_storage->removeObject(object);
 }
 
 void CachedObjectStorage::removeObjects(const StoredObjects & objects)
 {
     for (const auto & object : objects)
-        removeCacheIfExists(object.getPathKeyForCache());
+        removeCacheIfExists(object.remote_path);
 
     object_storage->removeObjects(objects);
 }
 
 void CachedObjectStorage::removeObjectIfExists(const StoredObject & object)
 {
-    removeCacheIfExists(object.getPathKeyForCache());
+    removeCacheIfExists(object.remote_path);
     object_storage->removeObjectIfExists(object);
 }
 
 void CachedObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
 {
     for (const auto & object : objects)
-        removeCacheIfExists(object.getPathKeyForCache());
+        removeCacheIfExists(object.remote_path);
 
     object_storage->removeObjectsIfExist(objects);
 }
@@ -308,4 +227,11 @@ String CachedObjectStorage::getObjectsNamespace() const
     return object_storage->getObjectsNamespace();
 }
 
+bool CachedObjectStorage::canUseReadThroughCache()
+{
+    return CurrentThread::isInitialized()
+        && CurrentThread::get().getQueryContext()
+        && !CurrentThread::getQueryId().empty();
+}
+
 }
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index e61142f4d04..276f6669bfa 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -113,11 +113,11 @@ public:
 
     WriteSettings getAdjustedSettingsFromMetadataFile(const WriteSettings & settings, const std::string & path) const override;
 
+    static bool canUseReadThroughCache();
+
 private:
     FileCache::Key getCacheKey(const std::string & path) const;
 
-    String getCachePath(const std::string & path) const;
-
     ReadSettings patchSettings(const ReadSettings & read_settings) const override;
 
     ObjectStoragePtr object_storage;
diff --git a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
index 3a624d8a18d..779ec6120f8 100644
--- a/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
+++ b/src/Disks/ObjectStorages/Cached/registerDiskCache.cpp
@@ -47,6 +47,9 @@ void registerDiskCache(DiskFactory & factory, bool /* global_skip_access_check *
 
         auto cache = FileCacheFactory::instance().getOrCreate(name, file_cache_settings);
         auto disk = disk_it->second;
+        if (!dynamic_cast<const DiskObjectStorage *>(disk.get()))
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cached disk is allowed only on top of object storage");
+
         auto disk_object_storage = disk->createDiskObjectStorage();
 
         disk_object_storage->wrapWithCache(cache, file_cache_settings, name);
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index 0b57b14eb1c..f832ba5b7b6 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -15,6 +15,7 @@
 #include <Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h>
 #include <Disks/ObjectStorages/DiskObjectStorageTransaction.h>
 #include <Disks/FakeDiskTransaction.h>
+#include <Common/ThreadPool.h>
 #include <Poco/Util/AbstractConfiguration.h>
 #include <Interpreters/Context.h>
 
@@ -296,7 +297,7 @@ String DiskObjectStorage::getUniqueId(const String & path) const
     String id;
     auto blobs_paths = metadata_storage->getStorageObjects(path);
     if (!blobs_paths.empty())
-        id = blobs_paths[0].absolute_path;
+        id = blobs_paths[0].remote_path;
     return id;
 }
 
@@ -308,7 +309,7 @@ bool DiskObjectStorage::checkUniqueId(const String & id) const
         return false;
     }
 
-    auto object = StoredObject::create(*object_storage, id, {}, {}, true);
+    auto object = StoredObject(id);
     return object_storage->exists(object);
 }
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
index 5c897ae3300..4cca89b9a4f 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
@@ -98,7 +98,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateFileToRestorableSchema
         ObjectAttributes metadata {
             {"path", path}
         };
-        updateObjectMetadata(object.absolute_path, metadata);
+        updateObjectMetadata(object.remote_path, metadata);
     }
 }
 void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results)
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index 072e747aa4a..6cafc35f8fa 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -109,7 +109,7 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation
 
             if (hardlink_count == 0)
             {
-                objects_to_remove = objects;
+                objects_to_remove = std::move(objects);
             }
         }
         catch (const Exception & e)
@@ -194,7 +194,7 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati
 
                 /// File is really redundant
                 if (hardlink_count == 0 && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename()))
-                    objects_to_remove.insert(objects_to_remove.end(), objects.begin(), objects.end());
+                    std::move(objects.begin(), objects.end(), std::back_inserter(objects_to_remove));
             }
             catch (const Exception & e)
             {
@@ -267,7 +267,7 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp
 
                 if (hardlink_count == 0)
                 {
-                    objects_to_remove[path_to_remove] = objects_paths;
+                    objects_to_remove[path_to_remove] = std::move(objects_paths);
                 }
             }
             catch (const Exception & e)
@@ -321,7 +321,7 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp
             {
                 if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename()))
                 {
-                    remove_from_remote.insert(remove_from_remote.end(), remote_paths.begin(), remote_paths.end());
+                    std::move(remote_paths.begin(), remote_paths.end(), std::back_inserter(remove_from_remote));
                 }
             }
             /// Read comment inside RemoveObjectStorageOperation class
@@ -450,8 +450,7 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation
         for (const auto & object_from : source_blobs)
         {
             std::string blob_name = object_storage.generateBlobNameForPath(to_path);
-            auto object_to = StoredObject::create(
-                object_storage, fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
+            auto object_to = StoredObject(fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
 
             object_storage.copyObject(object_from, object_to);
 
@@ -616,7 +615,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
         blob_name = "r" + revisionToString(revision) + "-file-" + blob_name;
     }
 
-    auto object = StoredObject::create(object_storage, fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
+    auto object = StoredObject(fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
     auto write_operation = std::make_unique<WriteFileObjectStorageOperation>(object_storage, metadata_storage, object);
     std::function<void(size_t count)> create_metadata_callback;
 
@@ -690,7 +689,7 @@ void DiskObjectStorageTransaction::writeFileUsingCustomWriteObject(
         blob_name = "r" + revisionToString(revision) + "-file-" + blob_name;
     }
 
-    auto object = StoredObject::create(object_storage, fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
+    auto object = StoredObject(fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
     auto write_operation = std::make_unique<WriteFileObjectStorageOperation>(object_storage, metadata_storage, object);
 
     operations_to_execute.emplace_back(std::move(write_operation));
diff --git a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp
deleted file mode 100644
index b9658821893..00000000000
--- a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-#include "FakeMetadataStorageFromDisk.h"
-#include <Disks/IDisk.h>
-#include <Common/filesystemHelpers.h>
-#include <Common/logger_useful.h>
-#include <IO/WriteHelpers.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-FakeMetadataStorageFromDisk::FakeMetadataStorageFromDisk(
-    DiskPtr disk_,
-    ObjectStoragePtr object_storage_,
-    const std::string & object_storage_root_path_)
-    : disk(disk_)
-    , object_storage(object_storage_)
-    , object_storage_root_path(object_storage_root_path_)
-{
-}
-
-MetadataTransactionPtr FakeMetadataStorageFromDisk::createTransaction()
-{
-    return std::make_shared<FakeMetadataStorageFromDiskTransaction>(*this, disk);
-}
-
-const std::string & FakeMetadataStorageFromDisk::getPath() const
-{
-    return disk->getPath();
-}
-
-bool FakeMetadataStorageFromDisk::exists(const std::string & path) const
-{
-    return disk->exists(path);
-}
-
-bool FakeMetadataStorageFromDisk::isFile(const std::string & path) const
-{
-    return disk->isFile(path);
-}
-
-bool FakeMetadataStorageFromDisk::isDirectory(const std::string & path) const
-{
-    return disk->isDirectory(path);
-}
-
-Poco::Timestamp FakeMetadataStorageFromDisk::getLastModified(const std::string & path) const
-{
-    return disk->getLastModified(path);
-}
-
-time_t FakeMetadataStorageFromDisk::getLastChanged(const std::string & path) const
-{
-    return disk->getLastChanged(path);
-}
-
-uint64_t FakeMetadataStorageFromDisk::getFileSize(const String & path) const
-{
-    return disk->getFileSize(path);
-}
-
-std::vector<std::string> FakeMetadataStorageFromDisk::listDirectory(const std::string & path) const
-{
-    std::vector<std::string> result;
-    disk->listFiles(path, result);
-    return result;
-}
-
-DirectoryIteratorPtr FakeMetadataStorageFromDisk::iterateDirectory(const std::string & path) const
-{
-    return disk->iterateDirectory(path);
-}
-
-std::string FakeMetadataStorageFromDisk::readFileToString(const std::string &) const
-{
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "readFileToString is not implemented for FakeMetadataStorageFromDisk");
-}
-
-std::string FakeMetadataStorageFromDisk::readInlineDataToString(const std::string & path) const
-{
-    auto rb = disk->readFile(path);
-    std::string result;
-    std::array<char, 1000> buf;
-    while (!rb->eof())
-    {
-        auto sz = rb->read(buf.data(), buf.size());
-        result.append(buf.data(), buf.data() + sz);
-    }
-    return result;
-}
-
-std::unordered_map<String, String> FakeMetadataStorageFromDisk::getSerializedMetadata(const std::vector<String> &) const
-{
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "getSerializedMetadata is not implemented for FakeMetadataStorageFromDisk");
-}
-
-StoredObjects FakeMetadataStorageFromDisk::getStorageObjects(const std::string & path) const
-{
-    std::string blob_name = object_storage->generateBlobNameForPath(path);
-
-    std::string object_path = fs::path(object_storage_root_path) / blob_name;
-    size_t object_size = getFileSize(path);
-
-    auto object = StoredObject::create(*object_storage, object_path, object_size, path, /* exists */true);
-    return {std::move(object)};
-}
-
-uint32_t FakeMetadataStorageFromDisk::getHardlinkCount(const std::string & path) const
-{
-    return disk->getRefCount(path);
-}
-
-const IMetadataStorage & FakeMetadataStorageFromDiskTransaction::getStorageForNonTransactionalReads() const
-{
-    return metadata_storage;
-}
-
-void FakeMetadataStorageFromDiskTransaction::writeStringToFile(const std::string & path, const std::string & data)
-{
-    auto wb = disk->writeFile(path);
-    wb->write(data.data(), data.size());
-    wb->finalize();
-}
-
-void FakeMetadataStorageFromDiskTransaction::writeInlineDataToFile(const std::string & path, const std::string & data)
-{
-    auto wb = disk->writeFile(path);
-    wb->write(data.data(), data.size());
-    wb->finalize();
-}
-
-void FakeMetadataStorageFromDiskTransaction::setLastModified(const std::string & path, const Poco::Timestamp & timestamp)
-{
-    disk->setLastModified(path, timestamp);
-}
-
-void FakeMetadataStorageFromDiskTransaction::unlinkFile(const std::string & path)
-{
-    disk->removeFile(path);
-}
-
-void FakeMetadataStorageFromDiskTransaction::removeRecursive(const std::string & path)
-{
-    disk->removeRecursive(path);
-}
-
-void FakeMetadataStorageFromDiskTransaction::createDirectory(const std::string & path)
-{
-    disk->createDirectory(path);
-}
-
-void FakeMetadataStorageFromDiskTransaction::createDirectoryRecursive(const std::string & path)
-{
-    disk->createDirectories(path);
-}
-
-void FakeMetadataStorageFromDiskTransaction::removeDirectory(const std::string & path)
-{
-    disk->removeDirectory(path);
-}
-
-void FakeMetadataStorageFromDiskTransaction::moveFile(const std::string & path_from, const std::string & path_to)
-{
-    disk->moveFile(path_from, path_to);
-}
-
-void FakeMetadataStorageFromDiskTransaction::moveDirectory(const std::string & path_from, const std::string & path_to)
-{
-    disk->moveDirectory(path_from, path_to);
-}
-
-void FakeMetadataStorageFromDiskTransaction::replaceFile(const std::string & path_from, const std::string & path_to)
-{
-    disk->replaceFile(path_from, path_to);
-}
-
-void FakeMetadataStorageFromDiskTransaction::setReadOnly(const std::string & path)
-{
-    disk->setReadOnly(path);
-}
-
-void FakeMetadataStorageFromDiskTransaction::createHardLink(const std::string & path_from, const std::string & path_to)
-{
-    disk->createHardLink(path_from, path_to);
-}
-
-void FakeMetadataStorageFromDiskTransaction::createEmptyMetadataFile(const std::string & /* path */)
-{
-    /// Noop.
-}
-
-void FakeMetadataStorageFromDiskTransaction::createMetadataFile(
-    const std::string & /* path */, const std::string & /* blob_name */, uint64_t /* size_in_bytes */)
-{
-    /// Noop.
-}
-
-void FakeMetadataStorageFromDiskTransaction::addBlobToMetadata(
-    const std::string & /* path */, const std::string & /* blob_name */, uint64_t /* size_in_bytes */)
-{
-    /// Noop, local metadata files is only one file, it is the metadata file itself.
-}
-
-void FakeMetadataStorageFromDiskTransaction::unlinkMetadata(const std::string & path)
-{
-    disk->removeFile(path);
-}
-
-}
diff --git a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h b/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h
deleted file mode 100644
index 246d2aebfaa..00000000000
--- a/src/Disks/ObjectStorages/FakeMetadataStorageFromDisk.h
+++ /dev/null
@@ -1,131 +0,0 @@
-#pragma once
-
-#include <Common/SharedMutex.h>
-#include <Disks/IDisk.h>
-#include <Disks/ObjectStorages/IMetadataStorage.h>
-#include <Disks/ObjectStorages/MetadataFromDiskTransactionState.h>
-#include <Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h>
-
-
-namespace DB
-{
-
-/// Store metadata in the disk itself.
-class FakeMetadataStorageFromDisk final : public IMetadataStorage
-{
-private:
-    friend class FakeMetadataStorageFromDiskTransaction;
-
-    mutable SharedMutex metadata_mutex;
-
-    DiskPtr disk;
-    ObjectStoragePtr object_storage;
-    std::string object_storage_root_path;
-
-public:
-    FakeMetadataStorageFromDisk(
-        DiskPtr disk_,
-        ObjectStoragePtr object_storage_,
-        const std::string & object_storage_root_path_);
-
-    MetadataTransactionPtr createTransaction() override;
-
-    const std::string & getPath() const override;
-
-    bool exists(const std::string & path) const override;
-
-    bool isFile(const std::string & path) const override;
-
-    bool isDirectory(const std::string & path) const override;
-
-    uint64_t getFileSize(const String & path) const override;
-
-    Poco::Timestamp getLastModified(const std::string & path) const override;
-
-    time_t getLastChanged(const std::string & path) const override;
-
-    bool supportsChmod() const override { return disk->supportsChmod(); }
-
-    bool supportsStat() const override { return disk->supportsStat(); }
-
-    struct stat stat(const String & path) const override { return disk->stat(path); }
-
-    std::vector<std::string> listDirectory(const std::string & path) const override;
-
-    DirectoryIteratorPtr iterateDirectory(const std::string & path) const override;
-
-    std::string readFileToString(const std::string & path) const override;
-
-    std::string readInlineDataToString(const std::string & path) const override;
-
-    std::unordered_map<String, String> getSerializedMetadata(const std::vector<String> & file_paths) const override;
-
-    uint32_t getHardlinkCount(const std::string & path) const override;
-
-    DiskPtr getDisk() const { return disk; }
-
-    StoredObjects getStorageObjects(const std::string & path) const override;
-
-    std::string getObjectStorageRootPath() const override { return object_storage_root_path; }
-};
-
-class FakeMetadataStorageFromDiskTransaction final : public IMetadataTransaction
-{
-private:
-    DiskPtr disk;
-    const FakeMetadataStorageFromDisk & metadata_storage;
-
-    std::vector<MetadataOperationPtr> operations;
-public:
-    FakeMetadataStorageFromDiskTransaction(
-        const FakeMetadataStorageFromDisk & metadata_storage_, DiskPtr disk_)
-        : disk(disk_)
-        , metadata_storage(metadata_storage_)
-    {}
-
-    ~FakeMetadataStorageFromDiskTransaction() override = default;
-
-    const IMetadataStorage & getStorageForNonTransactionalReads() const final;
-
-    void commit() final {}
-
-    void writeStringToFile(const std::string & path, const std::string & data) override;
-
-    void writeInlineDataToFile(const std::string & path, const std::string & data) override;
-
-    void createEmptyMetadataFile(const std::string & path) override;
-
-    void createMetadataFile(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override;
-
-    void addBlobToMetadata(const std::string & path, const std::string & blob_name, uint64_t size_in_bytes) override;
-
-    void setLastModified(const std::string & path, const Poco::Timestamp & timestamp) override;
-
-    bool supportsChmod() const override { return disk->supportsChmod(); }
-
-    void chmod(const String & path, mode_t mode) override { disk->chmod(path, mode); }
-
-    void setReadOnly(const std::string & path) override;
-
-    void unlinkFile(const std::string & path) override;
-
-    void createDirectory(const std::string & path) override;
-
-    void createDirectoryRecursive(const std::string & path) override;
-
-    void removeDirectory(const std::string & path) override;
-
-    void removeRecursive(const std::string & path) override;
-
-    void createHardLink(const std::string & path_from, const std::string & path_to) override;
-
-    void moveFile(const std::string & path_from, const std::string & path_to) override;
-
-    void moveDirectory(const std::string & path_from, const std::string & path_to) override;
-
-    void replaceFile(const std::string & path_from, const std::string & path_to) override;
-
-    void unlinkMetadata(const std::string & path) override;
-};
-
-}
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index 1d6c6f92280..ccba60153f2 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -40,7 +40,7 @@ std::string HDFSObjectStorage::generateBlobNameForPath(const std::string & /* pa
 
 bool HDFSObjectStorage::exists(const StoredObject & object) const
 {
-    const auto & path = object.absolute_path;
+    const auto & path = object.remote_path;
     const size_t begin_of_path = path.find('/', path.find("//") + 2);
     const String remote_fs_object_path = path.substr(begin_of_path);
     return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str()));
@@ -52,7 +52,7 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLIN
     std::optional<size_t>,
     std::optional<size_t>) const
 {
-    return std::make_unique<ReadBufferFromHDFS>(object.absolute_path, object.absolute_path, config, patchSettings(read_settings));
+    return std::make_unique<ReadBufferFromHDFS>(object.remote_path, object.remote_path, config, patchSettings(read_settings));
 }
 
 std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLINT
@@ -64,7 +64,7 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLI
     auto disk_read_settings = patchSettings(read_settings);
     auto read_buffer_creator =
         [this, disk_read_settings]
-        (const std::string & path, size_t /* read_until_position */) -> std::shared_ptr<ReadBufferFromFileBase>
+        (const std::string & path, size_t /* read_until_position */) -> std::unique_ptr<ReadBufferFromFileBase>
     {
         size_t begin_of_path = path.find('/', path.find("//") + 2);
         auto hdfs_path = path.substr(begin_of_path);
@@ -94,17 +94,17 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
 
     /// Single O_WRONLY in libhdfs adds O_TRUNC
     auto hdfs_buffer = std::make_unique<WriteBufferFromHDFS>(
-        object.absolute_path, config, settings->replication, patchSettings(write_settings), buf_size,
+        object.remote_path, config, settings->replication, patchSettings(write_settings), buf_size,
         mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND);
 
-    return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(hdfs_buffer), std::move(finalize_callback), object.absolute_path);
+    return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(hdfs_buffer), std::move(finalize_callback), object.remote_path);
 }
 
 
 /// Remove file. Throws exception if file doesn't exists or it's a directory.
 void HDFSObjectStorage::removeObject(const StoredObject & object)
 {
-    const auto & path = object.absolute_path;
+    const auto & path = object.remote_path;
     const size_t begin_of_path = path.find('/', path.find("//") + 2);
 
     /// Add path from root to file name
diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp
index f70e2d96285..f741c96006c 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/IObjectStorage.cpp
@@ -1,5 +1,6 @@
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Disks/IO/ThreadPoolRemoteFSReader.h>
+#include <Common/getRandomASCIIString.h>
 #include <IO/WriteBufferFromFileBase.h>
 #include <IO/copyData.h>
 #include <Interpreters/Context.h>
@@ -88,4 +89,21 @@ WriteSettings IObjectStorage::patchSettings(const WriteSettings & write_settings
     return settings;
 }
 
+std::string IObjectStorage::generateBlobNameForPath(const std::string & /* path */)
+{
+    /// Path to store the new S3 object.
+
+    /// Total length is 32 a-z characters for enough randomness.
+    /// First 3 characters are used as a prefix for
+    /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/
+
+    constexpr size_t key_name_total_size = 32;
+    constexpr size_t key_name_prefix_size = 3;
+
+    /// Path to store new S3 object.
+    return fmt::format("{}/{}",
+        getRandomASCIIString(key_name_prefix_size),
+        getRandomASCIIString(key_name_total_size - key_name_prefix_size));
+}
+
 }
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 44fe30499ff..5c15c5210e1 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -184,15 +184,12 @@ public:
 
     /// Generate blob name for passed absolute local path.
     /// Path can be generated either independently or based on `path`.
-    virtual std::string generateBlobNameForPath(const std::string & path) = 0;
+    virtual std::string generateBlobNameForPath(const std::string & path);
 
     /// Get unique id for passed absolute path in object storage.
     virtual std::string getUniqueId(const std::string & path) const { return path; }
 
-    virtual bool supportsAppend() const { return false; }
-
-    /// Remove filesystem cache. `path` is a result of object.getPathKeyForCache() method,
-    /// which is used to define a cache key for the source object path.
+    /// Remove filesystem cache.
     virtual void removeCacheIfExists(const std::string & /* path */) {}
 
     virtual bool supportsCache() const { return false; }
@@ -206,14 +203,14 @@ public:
 
     virtual WriteSettings getAdjustedSettingsFromMetadataFile(const WriteSettings & settings, const std::string & /* path */) const { return settings; }
 
+    virtual ReadSettings patchSettings(const ReadSettings & read_settings) const;
+
+    virtual WriteSettings patchSettings(const WriteSettings & write_settings) const;
+
 protected:
     /// Should be called from implementation of applyNewSettings()
     void applyRemoteThrottlingSettings(ContextPtr context);
 
-    /// Should be used by implementation of read* and write* methods
-    virtual ReadSettings patchSettings(const ReadSettings & read_settings) const;
-    virtual WriteSettings patchSettings(const WriteSettings & write_settings) const;
-
 private:
     mutable std::mutex throttlers_mutex;
     ThrottlerPtr remote_read_throttler;
diff --git a/src/Disks/ObjectStorages/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
similarity index 56%
rename from src/Disks/ObjectStorages/LocalObjectStorage.cpp
rename to src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
index 67e2cc2d74b..f2b892b96eb 100644
--- a/src/Disks/ObjectStorages/LocalObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
@@ -1,10 +1,17 @@
-#include <Disks/ObjectStorages/LocalObjectStorage.h>
+#include <Disks/ObjectStorages/Local/LocalObjectStorage.h>
 
 #include <Disks/ObjectStorages/DiskObjectStorageCommon.h>
+#include <Interpreters/Context.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/logger_useful.h>
+#include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
+#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
 #include <Disks/IO/createReadBufferFromFileBase.h>
+#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
+#include <IO/SeekAvoidingReadBuffer.h>
 #include <IO/WriteBufferFromFile.h>
+#include <IO/copyData.h>
+#include <Common/getRandomASCIIString.h>
 #include <filesystem>
 
 namespace fs = std::filesystem;
@@ -14,12 +21,8 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
     extern const int NOT_IMPLEMENTED;
-}
-
-namespace ErrorCodes
-{
+    extern const int BAD_ARGUMENTS;
     extern const int CANNOT_UNLINK;
 }
 
@@ -38,7 +41,7 @@ LocalObjectStorage::LocalObjectStorage()
 
 bool LocalObjectStorage::exists(const StoredObject & object) const
 {
-    return fs::exists(object.absolute_path);
+    return fs::exists(object.remote_path);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> LocalObjectStorage::readObjects( /// NOLINT
@@ -47,30 +50,39 @@ std::unique_ptr<ReadBufferFromFileBase> LocalObjectStorage::readObjects( /// NOL
     std::optional<size_t> read_hint,
     std::optional<size_t> file_size) const
 {
-    if (objects.size() != 1)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "LocalObjectStorage support read only from single object");
+    auto modified_settings = patchSettings(read_settings);
+    auto read_buffer_creator =
+        [=] (const std::string & file_path, size_t /* read_until_position */)
+        -> std::unique_ptr<ReadBufferFromFileBase>
+    {
+        return createReadBufferFromFileBase(file_path, modified_settings, read_hint, file_size);
+    };
 
-    return readObject(objects[0], read_settings, read_hint, file_size);
+    auto impl = std::make_unique<ReadBufferFromRemoteFSGather>(
+        std::move(read_buffer_creator), objects, modified_settings);
+
+    /// We use `remove_fs_method` (not `local_fs_method`) because we are about to use
+    /// AsynchronousReadIndirectBufferFromRemoteFS which works by the remote_fs_* settings.
+    if (modified_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
+    {
+        auto & reader = getThreadPoolReader();
+        return std::make_unique<AsynchronousReadIndirectBufferFromRemoteFS>(reader, modified_settings, std::move(impl));
+    }
+    else
+    {
+        auto buf = std::make_unique<ReadIndirectBufferFromRemoteFS>(std::move(impl), modified_settings);
+        return std::make_unique<SeekAvoidingReadBuffer>(
+            std::move(buf), modified_settings.remote_read_min_bytes_for_seek);
+    }
 }
 
-std::string LocalObjectStorage::getUniqueId(const std::string & path) const
+ReadSettings LocalObjectStorage::patchSettings(const ReadSettings & read_settings) const
 {
-    return toString(getINodeNumberFromPath(path));
-}
-
-std::unique_ptr<ReadBufferFromFileBase> LocalObjectStorage::readObject( /// NOLINT
-    const StoredObject & object,
-    const ReadSettings & read_settings,
-    std::optional<size_t> read_hint,
-    std::optional<size_t> file_size) const
-{
-    const auto & path = object.absolute_path;
-
-    if (!file_size)
-        file_size = tryGetSizeFromFilePath(path);
+    if (!read_settings.enable_filesystem_cache)
+        return IObjectStorage::patchSettings(read_settings);
 
+    auto modified_settings{read_settings};
     /// For now we cannot allow asynchronous reader from local filesystem when CachedObjectStorage is used.
-    ReadSettings modified_settings{read_settings};
     switch (modified_settings.local_fs_method)
     {
         case LocalFSReadMethod::pread_threadpool:
@@ -85,23 +97,39 @@ std::unique_ptr<ReadBufferFromFileBase> LocalObjectStorage::readObject( /// NOLI
             break;
         }
     }
+    return IObjectStorage::patchSettings(modified_settings);
+}
+
+std::unique_ptr<ReadBufferFromFileBase> LocalObjectStorage::readObject( /// NOLINT
+    const StoredObject & object,
+    const ReadSettings & read_settings,
+    std::optional<size_t> read_hint,
+    std::optional<size_t> file_size) const
+{
+    const auto & path = object.remote_path;
+
+    if (!file_size)
+        file_size = tryGetSizeFromFilePath(path);
 
     LOG_TEST(log, "Read object: {}", path);
-    return createReadBufferFromFileBase(path, modified_settings, read_hint, file_size);
+    return createReadBufferFromFileBase(path, patchSettings(read_settings), read_hint, file_size);
 }
 
 std::unique_ptr<WriteBufferFromFileBase> LocalObjectStorage::writeObject( /// NOLINT
     const StoredObject & object,
     WriteMode mode,
     std::optional<ObjectAttributes> /* attributes */,
-    FinalizeCallback && /* finalize_callback */,
+    FinalizeCallback && finalize_callback,
     size_t buf_size,
     const WriteSettings & /* write_settings */)
 {
-    const auto & path = object.absolute_path;
-    int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1;
-    LOG_TEST(log, "Write object: {}", path);
-    return std::make_unique<WriteBufferFromFile>(path, buf_size, flags);
+    if (mode != WriteMode::Rewrite)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "LocalObjectStorage doesn't support append to files");
+
+    LOG_TEST(log, "Write object: {}", object.remote_path);
+    auto impl = std::make_unique<WriteBufferFromFile>(object.remote_path, buf_size);
+    return std::make_unique<WriteIndirectBufferFromRemoteFS>(
+        std::move(impl), std::move(finalize_callback), object.remote_path);
 }
 
 void LocalObjectStorage::removeObject(const StoredObject & object)
@@ -110,8 +138,8 @@ void LocalObjectStorage::removeObject(const StoredObject & object)
     if (!exists(object))
         return;
 
-    if (0 != unlink(object.absolute_path.data()))
-        throwFromErrnoWithPath("Cannot unlink file " + object.absolute_path, object.absolute_path, ErrorCodes::CANNOT_UNLINK);
+    if (0 != unlink(object.remote_path.data()))
+        throwFromErrnoWithPath("Cannot unlink file " + object.remote_path, object.remote_path, ErrorCodes::CANNOT_UNLINK);
 }
 
 void LocalObjectStorage::removeObjects(const StoredObjects & objects)
@@ -140,16 +168,10 @@ ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & /* path
 void LocalObjectStorage::copyObject( // NOLINT
     const StoredObject & object_from, const StoredObject & object_to, std::optional<ObjectAttributes> /* object_to_attributes */)
 {
-    fs::path to = object_to.absolute_path;
-    fs::path from = object_from.absolute_path;
-
-    /// Same logic as in DiskLocal.
-    if (object_from.absolute_path.ends_with('/'))
-        from = from.parent_path();
-    if (fs::is_directory(from))
-        to /= from.filename();
-
-    fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing);
+    auto in = readObject(object_from);
+    auto out = writeObject(object_to, WriteMode::Rewrite);
+    copyData(*in, *out);
+    out->finalize();
 }
 
 void LocalObjectStorage::shutdown()
@@ -173,4 +195,10 @@ void LocalObjectStorage::applyNewSettings(
 {
 }
 
+std::string LocalObjectStorage::generateBlobNameForPath(const std::string & /* path */)
+{
+    constexpr size_t key_name_total_size = 32;
+    return getRandomASCIIString(key_name_total_size);
+}
+
 }
diff --git a/src/Disks/ObjectStorages/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
similarity index 94%
rename from src/Disks/ObjectStorages/LocalObjectStorage.h
rename to src/Disks/ObjectStorages/Local/LocalObjectStorage.h
index b04e3fa6285..2d31ccff398 100644
--- a/src/Disks/ObjectStorages/LocalObjectStorage.h
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
@@ -77,14 +77,12 @@ public:
         const std::string & config_prefix,
         ContextPtr context) override;
 
-    bool supportsAppend() const override { return true; }
-
-    std::string generateBlobNameForPath(const std::string & path) override { return path; }
-
-    std::string getUniqueId(const std::string & path) const override;
+    std::string generateBlobNameForPath(const std::string & path) override;
 
     bool isRemote() const override { return false; }
 
+    ReadSettings patchSettings(const ReadSettings & read_settings) const override;
+
 private:
     Poco::Logger * log;
     DataSourceDescription data_source_description;
diff --git a/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp
new file mode 100644
index 00000000000..251fc77d1f8
--- /dev/null
+++ b/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp
@@ -0,0 +1,44 @@
+#include <Disks/DiskFactory.h>
+#include <Disks/loadLocalDiskConfig.h>
+#include <Disks/ObjectStorages/Local/LocalObjectStorage.h>
+#include <Disks/ObjectStorages/DiskObjectStorageCommon.h>
+#include <Disks/ObjectStorages/MetadataStorageFromDisk.h>
+#include <Disks/ObjectStorages/DiskObjectStorage.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <filesystem>
+
+namespace fs = std::filesystem;
+
+namespace DB
+{
+void registerDiskLocalObjectStorage(DiskFactory & factory, bool global_skip_access_check)
+{
+    auto creator = [global_skip_access_check](
+        const String & name,
+        const Poco::Util::AbstractConfiguration & config,
+        const String & config_prefix,
+        ContextPtr context,
+        const DisksMap & /*map*/) -> DiskPtr
+    {
+        String path;
+        UInt64 keep_free_space_bytes;
+        loadDiskLocalConfig(name, config, config_prefix, context, path, keep_free_space_bytes);
+        fs::create_directories(path);
+
+        String type = config.getString(config_prefix + ".type");
+        chassert(type == "local_blob_storage");
+
+        std::shared_ptr<LocalObjectStorage> local_storage = std::make_shared<LocalObjectStorage>();
+        MetadataStoragePtr metadata_storage;
+        auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context);
+        metadata_storage = std::make_shared<MetadataStorageFromDisk>(metadata_disk, path);
+
+        auto disk = std::make_shared<DiskObjectStorage>(
+            name, path, "Local", metadata_storage, local_storage, false, /* threadpool_size */16);
+        disk->startup(context, global_skip_access_check);
+        return disk;
+    };
+    factory.registerDiskType("local_blob_storage", creator);
+}
+
+}
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
index 96c8b3daf04..6adf24b5bda 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
@@ -145,7 +145,7 @@ StoredObjects MetadataStorageFromDisk::getStorageObjects(const std::string & pat
     for (auto & [object_relative_path, size] : object_storage_relative_paths)
     {
         auto object_path = fs::path(metadata->getBlobsCommonPrefix()) / object_relative_path;
-        StoredObject object{ object_path, size, path, [](const String & path_){ return path_; }};
+        StoredObject object{ object_path, size, path };
         object_storage_paths.push_back(object);
     }
 
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
index 214252530a5..a680a344746 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
@@ -108,7 +108,7 @@ StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std
 {
     std::string blob_name = object_storage->generateBlobNameForPath(path);
     size_t object_size = getFileSize(blob_name);
-    auto object = StoredObject::create(*object_storage, getAbsolutePath(blob_name), object_size, path, /* exists */true);
+    auto object = StoredObject(getAbsolutePath(blob_name), object_size, path);
     return {std::move(object)};
 }
 
@@ -119,7 +119,7 @@ const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getSt
 
 void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::string & path)
 {
-    auto object = StoredObject::create(*metadata_storage.object_storage, metadata_storage.getAbsolutePath(path));
+    auto object = StoredObject(metadata_storage.getAbsolutePath(path));
     metadata_storage.object_storage->removeObject(object);
 }
 
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 83c0c7446a8..786a2ec9ede 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -19,7 +19,6 @@
 #include <Interpreters/threadPoolCallbackRunner.h>
 #include <Disks/ObjectStorages/S3/diskSettings.h>
 
-#include <Common/getRandomASCIIString.h>
 #include <Common/ProfileEvents.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/logger_useful.h>
@@ -86,27 +85,10 @@ void logIfError(const Aws::Utils::Outcome<Result, Error> & response, std::functi
 
 }
 
-std::string S3ObjectStorage::generateBlobNameForPath(const std::string & /* path */)
-{
-    /// Path to store the new S3 object.
-
-    /// Total length is 32 a-z characters for enough randomness.
-    /// First 3 characters are used as a prefix for
-    /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-object-key-naming-pattern/
-
-    constexpr size_t key_name_total_size = 32;
-    constexpr size_t key_name_prefix_size = 3;
-
-    /// Path to store new S3 object.
-    return fmt::format("{}/{}",
-        getRandomASCIIString(key_name_prefix_size),
-        getRandomASCIIString(key_name_total_size - key_name_prefix_size));
-}
-
 bool S3ObjectStorage::exists(const StoredObject & object) const
 {
     auto settings_ptr = s3_settings.get();
-    return S3::objectExists(*client.get(), bucket, object.absolute_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+    return S3::objectExists(*client.get(), bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
 }
 
 std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
@@ -115,17 +97,15 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
     std::optional<size_t>,
     std::optional<size_t>) const
 {
-    assert(!objects[0].getPathKeyForCache().empty());
-
     ReadSettings disk_read_settings = patchSettings(read_settings);
 
     auto settings_ptr = s3_settings.get();
 
     auto read_buffer_creator =
         [this, settings_ptr, disk_read_settings]
-        (const std::string & path, size_t read_until_position) -> std::shared_ptr<ReadBufferFromFileBase>
+        (const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
     {
-        return std::make_shared<ReadBufferFromS3>(
+        return std::make_unique<ReadBufferFromS3>(
             client.get(),
             bucket,
             path,
@@ -146,7 +126,8 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObjects( /// NOLINT
     if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
     {
         auto & reader = getThreadPoolReader();
-        return std::make_unique<AsynchronousReadIndirectBufferFromRemoteFS>(reader, disk_read_settings, std::move(s3_impl));
+        return std::make_unique<AsynchronousReadIndirectBufferFromRemoteFS>(
+            reader, disk_read_settings, std::move(s3_impl), disk_read_settings.remote_read_min_bytes_for_seek);
     }
     else
     {
@@ -165,7 +146,7 @@ std::unique_ptr<ReadBufferFromFileBase> S3ObjectStorage::readObject( /// NOLINT
     return std::make_unique<ReadBufferFromS3>(
         client.get(),
         bucket,
-        object.absolute_path,
+        object.remote_path,
         version_id,
         settings_ptr->request_settings,
         patchSettings(read_settings));
@@ -192,7 +173,7 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
     auto s3_buffer = std::make_unique<WriteBufferFromS3>(
         client.get(),
         bucket,
-        object.absolute_path,
+        object.remote_path,
         settings_ptr->request_settings,
         attributes,
         buf_size,
@@ -200,7 +181,7 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
         disk_write_settings);
 
     return std::make_unique<WriteIndirectBufferFromRemoteFS>(
-        std::move(s3_buffer), std::move(finalize_callback), object.absolute_path);
+        std::move(s3_buffer), std::move(finalize_callback), object.remote_path);
 }
 
 void S3ObjectStorage::findAllFiles(const std::string & path, RelativePathsWithSize & children, int max_keys) const
@@ -301,12 +282,12 @@ void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exis
     ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects);
     S3::DeleteObjectRequest request;
     request.SetBucket(bucket);
-    request.SetKey(object.absolute_path);
+    request.SetKey(object.remote_path);
     auto outcome = client_ptr->DeleteObject(request);
 
     throwIfUnexpectedError(outcome, if_exists);
 
-    LOG_TRACE(log, "Object with path {} was removed from S3", object.absolute_path);
+    LOG_TRACE(log, "Object with path {} was removed from S3", object.remote_path);
 }
 
 void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_exists)
@@ -334,12 +315,12 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
             for (; current_position < objects.size() && current_chunk.size() < chunk_size_limit; ++current_position)
             {
                 Aws::S3::Model::ObjectIdentifier obj;
-                obj.SetKey(objects[current_position].absolute_path);
+                obj.SetKey(objects[current_position].remote_path);
                 current_chunk.push_back(obj);
 
                 if (!keys.empty())
                     keys += ", ";
-                keys += objects[current_position].absolute_path;
+                keys += objects[current_position].remote_path;
             }
 
             Aws::S3::Model::Delete delkeys;
@@ -403,9 +384,9 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
     {
         auto client_ptr = client.get();
         auto settings_ptr = s3_settings.get();
-        auto size = S3::getObjectSize(*client_ptr, bucket, object_from.absolute_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+        auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
         auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
-        copyS3File(client_ptr, bucket, object_from.absolute_path, 0, size, dest_s3->bucket, object_to.absolute_path,
+        copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path,
                    settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true);
     }
     else
@@ -419,9 +400,9 @@ void S3ObjectStorage::copyObject( // NOLINT
 {
     auto client_ptr = client.get();
     auto settings_ptr = s3_settings.get();
-    auto size = S3::getObjectSize(*client_ptr, bucket, object_from.absolute_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
+    auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true);
     auto scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "S3ObjStor_copy");
-    copyS3File(client_ptr, bucket, object_from.absolute_path, 0, size, bucket, object_to.absolute_path,
+    copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path,
                settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true);
 }
 
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index b49f4dafef0..766c1aa033d 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -144,8 +144,6 @@ public:
 
     std::string getObjectsNamespace() const override { return bucket; }
 
-    std::string generateBlobNameForPath(const std::string & path) override;
-
     bool isRemote() const override { return true; }
 
     void setCapabilitiesSupportBatchDelete(bool value) { s3_capabilities.support_batch_delete = value; }
diff --git a/src/Disks/ObjectStorages/S3/copyS3FileToDisk.cpp b/src/Disks/ObjectStorages/S3/copyS3FileToDisk.cpp
index 098e02595f5..f89415f789a 100644
--- a/src/Disks/ObjectStorages/S3/copyS3FileToDisk.cpp
+++ b/src/Disks/ObjectStorages/S3/copyS3FileToDisk.cpp
@@ -55,7 +55,7 @@ void copyS3FileToDisk(
         /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
         chassert(write_mode_ == WriteMode::Rewrite);
 
-        copyS3File(s3_client, src_bucket, src_key, *src_offset, *src_size, dest_bucket, /* dest_key= */ object_.absolute_path,
+        copyS3File(s3_client, src_bucket, src_key, *src_offset, *src_size, dest_bucket, /* dest_key= */ object_.remote_path,
                    request_settings, object_attributes_, scheduler, /* for_disk_s3= */ true);
 
         return *src_size;
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 529466a83c2..2aa0a22e4f5 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -128,7 +128,7 @@ std::unique_ptr<S3::Client> getClient(
     if (uri.key.back() != '/')
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key);
 
-    client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000);
+    client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000);
     client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000);
     client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
     client_configuration.endpointOverride = uri.endpoint;
@@ -142,6 +142,9 @@ std::unique_ptr<S3::Client> getClient(
             = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
     }
 
+    HTTPHeaderEntries headers = S3::getHTTPHeaders(config_prefix, config);
+    S3::ServerSideEncryptionKMSConfig sse_kms_config = S3::getSSEKMSConfig(config_prefix, config);
+
     client_configuration.retryStrategy
         = std::make_shared<Aws::Client::DefaultRetryStrategy>(config.getUInt(config_prefix + ".retry_attempts", 10));
 
@@ -151,7 +154,8 @@ std::unique_ptr<S3::Client> getClient(
         config.getString(config_prefix + ".access_key_id", ""),
         config.getString(config_prefix + ".secret_access_key", ""),
         config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
-        {},
+        std::move(sse_kms_config),
+        std::move(headers),
         S3::CredentialsConfiguration
         {
             config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", true)),
diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
index 70f39d893f7..f3a57069a30 100644
--- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
+++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp
@@ -174,6 +174,6 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check)
 
 #else
 
-void registerDiskS3(DiskFactory &, bool /* global_skip_access_check */) {}
+void registerDiskS3(DB::DiskFactory &, bool /* global_skip_access_check */) {}
 
 #endif
diff --git a/src/Disks/ObjectStorages/StoredObject.cpp b/src/Disks/ObjectStorages/StoredObject.cpp
deleted file mode 100644
index 20f50d7676d..00000000000
--- a/src/Disks/ObjectStorages/StoredObject.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-#include <Disks/ObjectStorages/StoredObject.h>
-
-#include <Disks/ObjectStorages/IMetadataStorage.h>
-#include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Common/logger_useful.h>
-
-
-namespace DB
-{
-
-StoredObject::StoredObject(
-    const std::string & absolute_path_,
-    uint64_t bytes_size_,
-    const std::string & mapped_path_,
-    PathKeyForCacheCreator && path_key_for_cache_creator_)
-    : absolute_path(absolute_path_)
-    , mapped_path(mapped_path_)
-    , bytes_size(bytes_size_)
-    , path_key_for_cache_creator(std::move(path_key_for_cache_creator_))
-{
-}
-
-std::string StoredObject::getPathKeyForCache() const
-{
-    if (!path_key_for_cache_creator)
-        return ""; /// This empty result need to be used with care.
-
-    return path_key_for_cache_creator(absolute_path);
-}
-
-const std::string & StoredObject::getMappedPath() const
-{
-    return mapped_path;
-}
-
-StoredObject StoredObject::create(
-    const IObjectStorage & object_storage,
-    const std::string & object_path,
-    size_t object_size,
-    const std::string & mapped_path,
-    bool exists,
-    bool object_bypasses_cache)
-{
-    if (object_bypasses_cache)
-        return StoredObject(object_path, object_size, mapped_path, {});
-
-    PathKeyForCacheCreator path_key_for_cache_creator = [&object_storage](const std::string & path) -> std::string
-    {
-        try
-        {
-            return object_storage.getUniqueId(path);
-        }
-        catch (...)
-        {
-            LOG_DEBUG(
-               &Poco::Logger::get("StoredObject"),
-                "Object does not exist while getting cache path hint (object path: {})",
-                path);
-
-            return "";
-        }
-    };
-
-    if (exists)
-    {
-        path_key_for_cache_creator = [path = path_key_for_cache_creator(object_path)](const std::string &) { return path; };
-    }
-
-    return StoredObject(object_path, object_size, mapped_path, std::move(path_key_for_cache_creator));
-}
-
-}
diff --git a/src/Disks/ObjectStorages/StoredObject.h b/src/Disks/ObjectStorages/StoredObject.h
index 2b6e76eec01..94c9fd0946d 100644
--- a/src/Disks/ObjectStorages/StoredObject.h
+++ b/src/Disks/ObjectStorages/StoredObject.h
@@ -11,38 +11,20 @@ namespace DB
 /// Object metadata: path, size, path_key_for_cache.
 struct StoredObject
 {
-    /// Absolute path of the blob in object storage.
-    std::string absolute_path;
-    /// A map which is mapped to current blob (for example, a corresponding local path as clickhouse sees it).
-    std::string mapped_path;
+    std::string remote_path;
+    std::string local_path; /// or equivalent "metadata_path"
 
     uint64_t bytes_size = 0;
 
-    std::string getPathKeyForCache() const;
-
-    const std::string & getMappedPath() const;
-
-    /// Create `StoredObject` based on metadata storage and blob name of the object.
-    static StoredObject create(
-        const IObjectStorage & object_storage,
-        const std::string & object_path,
-        size_t object_size = 0,
-        const std::string & mapped_path = "",
-        bool exists = false,
-        bool object_bypasses_cache = false);
-
-    /// Optional hint for cache. Use delayed initialization
-    /// because somecache hint implementation requires it.
-    using PathKeyForCacheCreator = std::function<std::string(const std::string &)>;
-    PathKeyForCacheCreator path_key_for_cache_creator;
-
     StoredObject() = default;
 
     explicit StoredObject(
-        const std::string & absolute_path_,
+        const std::string & remote_path_,
         uint64_t bytes_size_ = 0,
-        const std::string & mapped_path_ = "",
-        PathKeyForCacheCreator && path_key_for_cache_creator_ = {});
+        const std::string & local_path_ = "")
+    : remote_path(remote_path_)
+    , local_path(local_path_)
+    , bytes_size(bytes_size_) {}
 };
 
 using StoredObjects = std::vector<StoredObject>;
diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
index 69536354aa9..59e66969ec0 100644
--- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
+++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
@@ -108,7 +108,7 @@ StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const s
     auto fs_path = fs::path(object_storage.url) / path;
     std::string remote_path = fs_path.parent_path() / (escapeForFileName(fs_path.stem()) + fs_path.extension().string());
     remote_path = remote_path.substr(object_storage.url.size());
-    return {StoredObject::create(object_storage, remote_path, object_storage.files.at(path).size, path, true)};
+    return {StoredObject(remote_path, object_storage.files.at(path).size, path)};
 }
 
 std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(const std::string & path) const
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
index 4365156d93d..2e85ee2c7d2 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@@ -119,7 +119,7 @@ WebObjectStorage::WebObjectStorage(
 
 bool WebObjectStorage::exists(const StoredObject & object) const
 {
-    const auto & path = object.absolute_path;
+    const auto & path = object.remote_path;
 
     LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Checking existence of path: {}", path);
 
@@ -169,9 +169,9 @@ std::unique_ptr<ReadBufferFromFileBase> WebObjectStorage::readObject( /// NOLINT
 {
     auto read_buffer_creator =
          [this, read_settings]
-         (const std::string & path_, size_t read_until_position) -> std::shared_ptr<ReadBufferFromFileBase>
+         (const std::string & path_, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
      {
-         return std::make_shared<ReadBufferFromWebServer>(
+         return std::make_unique<ReadBufferFromWebServer>(
              fs::path(url) / path_,
              getContext(),
              read_settings,
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
index 2dab8fdb62d..f55e853b4fa 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
@@ -87,8 +87,6 @@ public:
         const std::string & config_prefix,
         ContextPtr context) override;
 
-    bool supportsAppend() const override { return false; }
-
     std::string generateBlobNameForPath(const std::string & path) override { return path; }
 
     bool isRemote() const override { return true; }
diff --git a/src/Disks/loadLocalDiskConfig.cpp b/src/Disks/loadLocalDiskConfig.cpp
new file mode 100644
index 00000000000..0e5eca17ca7
--- /dev/null
+++ b/src/Disks/loadLocalDiskConfig.cpp
@@ -0,0 +1,63 @@
+#include <Disks/loadLocalDiskConfig.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Interpreters/Context.h>
+#include <Disks/DiskLocal.h>
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
+    extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
+}
+
+void loadDiskLocalConfig(const String & name,
+                      const Poco::Util::AbstractConfiguration & config,
+                      const String & config_prefix,
+                      ContextPtr context,
+                      String & path,
+                      UInt64 & keep_free_space_bytes)
+{
+    path = config.getString(config_prefix + ".path", "");
+    if (name == "default")
+    {
+        if (!path.empty())
+            throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG,
+                "\"default\" disk path should be provided in <path> not it <storage_configuration>");
+        path = context->getPath();
+    }
+    else
+    {
+        if (path.empty())
+            throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Disk path can not be empty. Disk {}", name);
+        if (path.back() != '/')
+            throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Disk path must end with /. Disk {}", name);
+        if (path == context->getPath())
+            throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Disk path ('{}') cannot be equal to <path>. Use <default> disk instead.", path);
+    }
+
+    bool has_space_ratio = config.has(config_prefix + ".keep_free_space_ratio");
+
+    if (config.has(config_prefix + ".keep_free_space_bytes") && has_space_ratio)
+        throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG,
+                        "Only one of 'keep_free_space_bytes' and 'keep_free_space_ratio' can be specified");
+
+    keep_free_space_bytes = config.getUInt64(config_prefix + ".keep_free_space_bytes", 0);
+
+    if (has_space_ratio)
+    {
+        auto ratio = config.getDouble(config_prefix + ".keep_free_space_ratio");
+        if (ratio < 0 || ratio > 1)
+            throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "'keep_free_space_ratio' have to be between 0 and 1");
+        String tmp_path = path;
+        if (tmp_path.empty())
+            tmp_path = context->getPath();
+
+        // Create tmp disk for getting total disk space.
+        keep_free_space_bytes = static_cast<UInt64>(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio);
+    }
+}
+
+}
diff --git a/src/Disks/loadLocalDiskConfig.h b/src/Disks/loadLocalDiskConfig.h
new file mode 100644
index 00000000000..ba1ac5378a4
--- /dev/null
+++ b/src/Disks/loadLocalDiskConfig.h
@@ -0,0 +1,16 @@
+#pragma once
+#include <Core/Types.h>
+#include <Interpreters/Context_fwd.h>
+
+namespace Poco::Util { class AbstractConfiguration; }
+
+namespace DB
+{
+void loadDiskLocalConfig(
+    const String & name,
+    const Poco::Util::AbstractConfiguration & config,
+    const String & config_prefix,
+    ContextPtr context,
+    String & path,
+    UInt64 & keep_free_space_bytes);
+}
diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp
index cae83203553..48d5a19fb61 100644
--- a/src/Disks/registerDisks.cpp
+++ b/src/Disks/registerDisks.cpp
@@ -29,6 +29,9 @@ void registerDiskWebServer(DiskFactory & factory, bool global_skip_access_check)
 
 void registerDiskCache(DiskFactory & factory, bool global_skip_access_check);
 
+void registerDiskLocalObjectStorage(DiskFactory & factory, bool global_skip_access_check);
+
+
 void registerDisks(bool global_skip_access_check)
 {
     auto & factory = DiskFactory::instance();
@@ -54,6 +57,8 @@ void registerDisks(bool global_skip_access_check)
     registerDiskWebServer(factory, global_skip_access_check);
 
     registerDiskCache(factory, global_skip_access_check);
+
+    registerDiskLocalObjectStorage(factory, global_skip_access_check);
 }
 
 }
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 628b81a79f8..423915518fd 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -5,7 +5,7 @@
 #include <Formats/FormatSettings.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ProcessList.h>
-#include <IO/IOThreadPool.h>
+#include <IO/SharedThreadPools.h>
 #include <Processors/Formats/IRowInputFormat.h>
 #include <Processors/Formats/IRowOutputFormat.h>
 #include <Processors/Formats/Impl/MySQLOutputFormat.h>
diff --git a/src/Functions/FunctionStringReplace.h b/src/Functions/FunctionStringReplace.h
index 6199e146210..4d723a5632c 100644
--- a/src/Functions/FunctionStringReplace.h
+++ b/src/Functions/FunctionStringReplace.h
@@ -47,7 +47,9 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
     {
-        const ColumnPtr column_haystack = arguments[0].column;
+        ColumnPtr column_haystack = arguments[0].column;
+        column_haystack = column_haystack->convertToFullColumnIfConst();
+
         const ColumnPtr column_needle = arguments[1].column;
         const ColumnPtr column_replacement = arguments[2].column;
 
diff --git a/src/Functions/FunctionUnaryArithmetic.h b/src/Functions/FunctionUnaryArithmetic.h
index 2e4f8abd323..4098d58299c 100644
--- a/src/Functions/FunctionUnaryArithmetic.h
+++ b/src/Functions/FunctionUnaryArithmetic.h
@@ -1,15 +1,18 @@
 #pragma once
 
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypesDecimal.h>
-#include <DataTypes/DataTypeFixedString.h>
-#include <DataTypes/DataTypeInterval.h>
-#include <DataTypes/Native.h>
-#include <Columns/ColumnVector.h>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnFixedString.h>
-#include <Functions/IFunction.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypeFixedString.h>
+#include <DataTypes/DataTypeInterval.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/Native.h>
 #include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
 #include <Functions/IsOperation.h>
 #include <Functions/castTypeToEither.h>
 
@@ -30,7 +33,6 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-
 template <typename A, typename Op>
 struct UnaryOperationImpl
 {
@@ -130,6 +132,47 @@ struct FixedStringUnaryOperationImpl
     }
 };
 
+template <typename Op>
+struct StringUnaryOperationReduceImpl
+{
+    MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
+        MULTITARGET_FUNCTION_HEADER(static UInt64 NO_INLINE),
+        vectorImpl,
+        MULTITARGET_FUNCTION_BODY((const UInt8 * start, const UInt8 * end) /// NOLINT
+        {
+            UInt64 res = 0;
+            while (start < end)
+                res += Op::apply(*start++);
+            return res;
+        }))
+
+    static UInt64 NO_INLINE vector(const UInt8 * start, const UInt8 * end)
+    {
+#if USE_MULTITARGET_CODE
+        if (isArchSupported(TargetArch::AVX512BW))
+        {
+            return vectorImplAVX512BW(start, end);
+        }
+
+        if (isArchSupported(TargetArch::AVX512F))
+        {
+            return vectorImplAVX512F(start, end);
+        }
+
+        if (isArchSupported(TargetArch::AVX2))
+        {
+            return vectorImplAVX2(start, end);
+        }
+
+        if (isArchSupported(TargetArch::SSE42))
+        {
+            return vectorImplSSE42(start, end);
+        }
+#endif
+
+        return vectorImpl(start, end);
+    }
+};
 
 template <typename FunctionName>
 struct FunctionUnaryArithmeticMonotonicity;
@@ -142,7 +185,8 @@ template <template <typename> class Op, typename Name, bool is_injective>
 class FunctionUnaryArithmetic : public IFunction
 {
     static constexpr bool allow_decimal = IsUnaryOperation<Op>::negate || IsUnaryOperation<Op>::abs || IsUnaryOperation<Op>::sign;
-    static constexpr bool allow_fixed_string = Op<UInt8>::allow_fixed_string;
+    static constexpr bool allow_string_or_fixed_string = Op<UInt8>::allow_string_or_fixed_string;
+    static constexpr bool is_bit_count = IsUnaryOperation<Op>::bit_count;
     static constexpr bool is_sign_function = IsUnaryOperation<Op>::sign;
 
     ContextPtr context;
@@ -170,8 +214,8 @@ class FunctionUnaryArithmetic : public IFunction
             DataTypeDecimal<Decimal128>,
             DataTypeDecimal<Decimal256>,
             DataTypeFixedString,
-            DataTypeInterval
-        >(type, std::forward<F>(f));
+            DataTypeString,
+            DataTypeInterval>(type, std::forward<F>(f));
     }
 
     static FunctionOverloadResolverPtr
@@ -204,7 +248,10 @@ public:
 
     size_t getNumberOfArguments() const override { return 1; }
     bool isInjective(const ColumnsWithTypeAndName &) const override { return is_injective; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override
+    {
+        return false;
+    }
 
     bool useDefaultImplementationForConstants() const override { return true; }
 
@@ -232,9 +279,33 @@ public:
             using DataType = std::decay_t<decltype(type)>;
             if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
             {
-                if constexpr (!Op<DataTypeFixedString>::allow_fixed_string)
+                if constexpr (!allow_string_or_fixed_string)
                     return false;
-                result = std::make_shared<DataType>(type.getN());
+                /// For `bitCount`, when argument is FixedString, it's return type
+                /// should be integer instead of FixedString, the return value is
+                /// the sum of `bitCount` apply to each chars.
+                else
+                {
+                    /// UInt16 can save bitCount of FixedString less than 8192,
+                    /// it's should enough for almost all cases, and the setting
+                    /// `allow_suspicious_fixed_string_types` is disabled by default.
+                    if constexpr (is_bit_count)
+                        result = std::make_shared<DataTypeUInt16>();
+                    else
+                        result = std::make_shared<DataType>(type.getN());
+                }
+            }
+            else if constexpr (std::is_same_v<DataTypeString, DataType>)
+            {
+                if constexpr (!allow_string_or_fixed_string)
+                    return false;
+                else
+                {
+                    if constexpr (is_bit_count)
+                        result = std::make_shared<DataTypeUInt64>();
+                    else
+                        result = std::make_shared<DataType>();
+                }
             }
             else if constexpr (std::is_same_v<DataTypeInterval, DataType>)
             {
@@ -278,16 +349,80 @@ public:
 
             if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
             {
-                if constexpr (allow_fixed_string)
+                if constexpr (allow_string_or_fixed_string)
                 {
                     if (const auto * col = checkAndGetColumn<ColumnFixedString>(arguments[0].column.get()))
                     {
-                        auto col_res = ColumnFixedString::create(col->getN());
-                        auto & vec_res = col_res->getChars();
-                        vec_res.resize(col->size() * col->getN());
-                        FixedStringUnaryOperationImpl<Op<UInt8>>::vector(col->getChars(), vec_res);
-                        result_column = std::move(col_res);
-                        return true;
+                        if constexpr (is_bit_count)
+                        {
+                            auto size = col->size();
+
+                            auto col_res = ColumnUInt16::create(size);
+                            auto & vec_res = col_res->getData();
+                            vec_res.resize(col->size());
+
+                            const auto & chars = col->getChars();
+                            auto n = col->getN();
+                            for (size_t i = 0; i < size; ++i)
+                            {
+                                vec_res[i] = StringUnaryOperationReduceImpl<Op<UInt8>>::vector(
+                                    chars.data() + n * i, chars.data() + n * (i + 1));
+                            }
+                            result_column = std::move(col_res);
+                            return true;
+                        }
+                        else
+                        {
+                            auto col_res = ColumnFixedString::create(col->getN());
+                            auto & vec_res = col_res->getChars();
+                            vec_res.resize(col->size() * col->getN());
+                            FixedStringUnaryOperationImpl<Op<UInt8>>::vector(col->getChars(), vec_res);
+                            result_column = std::move(col_res);
+                            return true;
+                        }
+                    }
+                }
+            }
+            else if constexpr (std::is_same_v<DataTypeString, DataType>)
+            {
+                if constexpr (allow_string_or_fixed_string)
+                {
+                    if (const auto * col = checkAndGetColumn<ColumnString>(arguments[0].column.get()))
+                    {
+                        if constexpr (is_bit_count)
+                        {
+                            auto size = col->size();
+
+                            auto col_res = ColumnUInt64::create(size);
+                            auto & vec_res = col_res->getData();
+
+                            const auto & chars = col->getChars();
+                            const auto & offsets = col->getOffsets();
+                            for (size_t i = 0; i < size; ++i)
+                            {
+                                vec_res[i] = StringUnaryOperationReduceImpl<Op<UInt8>>::vector(
+                                    chars.data() + offsets[i - 1], chars.data() + offsets[i] - 1);
+                            }
+                            result_column = std::move(col_res);
+                            return true;
+                        }
+                        else
+                        {
+                            auto col_res = ColumnString::create();
+                            auto & vec_res = col_res->getChars();
+                            auto & offset_res = col_res->getOffsets();
+
+                            const auto & vec_col = col->getChars();
+                            const auto & offset_col = col->getOffsets();
+
+                            vec_res.resize(vec_col.size());
+                            offset_res.resize(offset_col.size());
+                            memcpy(offset_res.data(), offset_col.data(), offset_res.size() * sizeof(UInt64));
+
+                            FixedStringUnaryOperationImpl<Op<UInt8>>::vector(vec_col, vec_res);
+                            result_column = std::move(col_res);
+                            return true;
+                        }
                     }
                 }
             }
@@ -350,7 +485,7 @@ public:
         return castType(arguments[0].get(), [&](const auto & type)
         {
             using DataType = std::decay_t<decltype(type)>;
-            if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
+            if constexpr (std::is_same_v<DataTypeFixedString, DataType> || std::is_same_v<DataTypeString, DataType>)
                 return false;
             else
                 return !IsDataTypeDecimal<DataType> && Op<typename DataType::FieldType>::compilable;
@@ -365,7 +500,7 @@ public:
         castType(types[0].get(), [&](const auto & type)
         {
             using DataType = std::decay_t<decltype(type)>;
-            if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
+            if constexpr (std::is_same_v<DataTypeFixedString, DataType> || std::is_same_v<DataTypeString, DataType>)
                 return false;
             else
             {
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index 46717923f7e..9c4085f9745 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -3301,7 +3301,7 @@ private:
         };
     }
 
-    WrapperType createMapToMapWrrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const
+    WrapperType createMapToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const
     {
         return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types]
             (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr
@@ -3322,7 +3322,7 @@ private:
     }
 
     /// The case of: [(key1, value1), (key2, value2), ...]
-    WrapperType createArrayToMapWrrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const
+    WrapperType createArrayToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const
     {
         return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types]
             (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr
@@ -3348,8 +3348,12 @@ private:
         if (const auto * from_tuple = checkAndGetDataType<DataTypeTuple>(from_type_untyped.get()))
         {
             if (from_tuple->getElements().size() != 2)
-                throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Map from tuple requeires 2 elements. "
-                    "Left type: {}, right type: {}", from_tuple->getName(), to_type->getName());
+                throw Exception(
+                    ErrorCodes::TYPE_MISMATCH,
+                    "CAST AS Map from tuple requires 2 elements. "
+                    "Left type: {}, right type: {}",
+                    from_tuple->getName(),
+                    to_type->getName());
 
             DataTypes from_kv_types;
             const auto & to_kv_types = to_type->getKeyValueTypes();
@@ -3370,14 +3374,18 @@ private:
         {
             const auto * nested_tuple = typeid_cast<const DataTypeTuple *>(from_array->getNestedType().get());
             if (!nested_tuple || nested_tuple->getElements().size() != 2)
-                throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Map from array requeires nested tuple of 2 elements. "
-                    "Left type: {}, right type: {}", from_array->getName(), to_type->getName());
+                throw Exception(
+                    ErrorCodes::TYPE_MISMATCH,
+                    "CAST AS Map from array requires nested tuple of 2 elements. "
+                    "Left type: {}, right type: {}",
+                    from_array->getName(),
+                    to_type->getName());
 
-            return createArrayToMapWrrapper(nested_tuple->getElements(), to_type->getKeyValueTypes());
+            return createArrayToMapWrapper(nested_tuple->getElements(), to_type->getKeyValueTypes());
         }
         else if (const auto * from_type = checkAndGetDataType<DataTypeMap>(from_type_untyped.get()))
         {
-            return createMapToMapWrrapper(from_type->getKeyValueTypes(), to_type->getKeyValueTypes());
+            return createMapToMapWrapper(from_type->getKeyValueTypes(), to_type->getKeyValueTypes());
         }
         else
         {
diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h
index 39f9114f5e0..0c54901579e 100644
--- a/src/Functions/IsOperation.h
+++ b/src/Functions/IsOperation.h
@@ -5,7 +5,9 @@ namespace DB
 
 /// These classes should be present in DB namespace (cannot place them into namelesspace)
 template <typename> struct AbsImpl;
+template <typename> struct BitCountImpl;
 template <typename> struct NegateImpl;
+template <typename> struct SignImpl;
 template <typename, typename> struct PlusImpl;
 template <typename, typename> struct MinusImpl;
 template <typename, typename> struct MultiplyImpl;
@@ -22,9 +24,6 @@ template <typename, typename> struct LessOrEqualsOp;
 template <typename, typename> struct GreaterOrEqualsOp;
 template <typename, typename> struct BitHammingDistanceImpl;
 
-template <typename>
-struct SignImpl;
-
 template <template <typename, typename> typename Op1, template <typename, typename> typename Op2>
 struct IsSameOperation
 {
@@ -37,6 +36,7 @@ struct IsUnaryOperation
     static constexpr bool abs = std::is_same_v<Op<Int8>, AbsImpl<Int8>>;
     static constexpr bool negate = std::is_same_v<Op<Int8>, NegateImpl<Int8>>;
     static constexpr bool sign = std::is_same_v<Op<Int8>, SignImpl<Int8>>;
+    static constexpr bool bit_count = std::is_same_v<Op<Int8>, BitCountImpl<Int8>>;
 };
 
 template <template <typename, typename> typename Op>
diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp
index f0ed178e1c2..709ef89dd3c 100644
--- a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp
+++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.cpp
@@ -1,9 +1,11 @@
 #include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
 #include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h>
 
+#include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTIdentifier_fwd.h>
 #include <Parsers/ExpressionElementParsers.h>
 #include <Parsers/Lexer.h>
+#include <Common/StringUtils/StringUtils.h>
 
 namespace DB
 {
@@ -16,18 +18,60 @@ namespace DB
  */
 bool ParserJSONPathMemberAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    if (pos->type != TokenType::Dot)
+    // There's a special case, that a path member can begin with number
+    // some invalid cases as following
+    //   - ".123" is parsed as a number, not a dot and a number
+    //   - ".123abc" is parsed as two parts, a number ".123" and a token "abc"
+    //   - ".abc" is parsed as two parts. a dot and a token "abc"
+    // "$..123abc" is parsed into three parts, ".", ".123" and "abc"
+    if (pos->type != TokenType::Dot && pos->type != TokenType::Number)
         return false;
+    if (pos->type != TokenType::Number)
+    {
+        ++pos;
+        // Check the case "$..123abc"
+        if (pos->type == TokenType::Number)
+        {
+            return false;
+        }
+    }
 
-    ++pos;
-
-    if (pos->type != TokenType::BareWord && pos->type !=TokenType::QuotedIdentifier)
-        return false;
-
-    ParserIdentifier name_p;
     ASTPtr member_name;
-    if (!name_p.parse(pos, member_name, expected))
-        return false;
+
+    if (pos->type == TokenType::Number)[[unlikely]]
+    {
+        for (const auto * c = pos->begin; c != pos->end; ++c)
+        {
+            if (*c == '.' && c == pos->begin)
+                continue;
+            if (!isNumericASCII(*c))
+            {
+                return false;
+            }
+        }
+        const auto * last_begin = *pos->begin == '.' ? pos->begin + 1 : pos->begin;
+        const auto * last_end = pos->end;
+        ++pos;
+
+        if (pos.isValid() && pos->type == TokenType::BareWord && pos->begin == last_end)
+        {
+            member_name = std::make_shared<ASTIdentifier>(String(last_begin, pos->end));
+            ++pos;
+        }
+        else
+        {
+            return false;
+        }
+    }
+    else
+    {
+        if (pos->type != TokenType::BareWord && pos->type != TokenType::QuotedIdentifier)
+            return false;
+
+        ParserIdentifier name_p;
+        if (!name_p.parse(pos, member_name, expected))
+            return false;
+    }
 
     auto member_access = std::make_shared<ASTJSONPathMemberAccess>();
     node = member_access;
diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp
new file mode 100644
index 00000000000..93e0639ccfe
--- /dev/null
+++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.cpp
@@ -0,0 +1,44 @@
+#include "ParserJSONPathMemberSquareBracketAccess.h"
+#include <memory>
+#include <Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h>
+#include <IO/ReadBufferFromMemory.h>
+#include <IO/ReadHelpers.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ExpressionElementParsers.h>
+
+namespace DB
+{
+bool ParserJSONPathMemberSquareBracketAccess::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    if (pos->type != TokenType::OpeningSquareBracket)
+        return false;
+    ++pos;
+    ASTPtr member_name;
+    if (pos->type == TokenType::BareWord || pos->type == TokenType::QuotedIdentifier)
+    {
+        ParserIdentifier name_p;
+        if (!name_p.parse(pos, member_name, expected))
+            return false;
+    }
+    else if (pos->type == TokenType::StringLiteral)
+    {
+        ReadBufferFromMemory in(pos->begin, pos->size());
+        String name;
+        readQuotedStringWithSQLStyle(name, in);
+        member_name = std::make_shared<ASTIdentifier>(name);
+        ++pos;
+    }
+    else
+    {
+        return false;
+    }
+    if (pos->type != TokenType::ClosingSquareBracket)
+    {
+        return false;
+    }
+    ++pos;
+    auto member_access = std::make_shared<ASTJSONPathMemberAccess>();
+    node = member_access;
+    return tryGetIdentifierNameInto(member_name, member_access->member_name);
+}
+}
diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h
new file mode 100644
index 00000000000..b682ec5bb96
--- /dev/null
+++ b/src/Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h
@@ -0,0 +1,17 @@
+#pragma once
+#include <Parsers/IParserBase.h>
+// cases
+// - [ident]
+// - ['ident']
+// - ["ident"]
+namespace DB
+{
+class ParserJSONPathMemberSquareBracketAccess : public IParserBase
+{
+private:
+    const char * getName() const override { return "ParserJSONPathMemberSquareBracketAccess"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+public:
+    explicit ParserJSONPathMemberSquareBracketAccess() = default;
+};
+}
diff --git a/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp b/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp
index c18b2ad9b31..d8d633a1ec9 100644
--- a/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp
+++ b/src/Functions/JSONPath/Parsers/ParserJSONPathQuery.cpp
@@ -2,6 +2,7 @@
 #include <Functions/JSONPath/Parsers/ParserJSONPathQuery.h>
 #include <Functions/JSONPath/Parsers/ParserJSONPathRoot.h>
 #include <Functions/JSONPath/Parsers/ParserJSONPathMemberAccess.h>
+#include <Functions/JSONPath/Parsers/ParserJSONPathMemberSquareBracketAccess.h>
 #include <Functions/JSONPath/Parsers/ParserJSONPathRange.h>
 #include <Functions/JSONPath/Parsers/ParserJSONPathStar.h>
 
@@ -19,6 +20,7 @@ bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expect
 {
     query = std::make_shared<ASTJSONPathQuery>();
     ParserJSONPathMemberAccess parser_jsonpath_member_access;
+    ParserJSONPathMemberSquareBracketAccess parser_jsonpath_member_square_bracket_access;
     ParserJSONPathRange parser_jsonpath_range;
     ParserJSONPathStar parser_jsonpath_star;
     ParserJSONPathRoot parser_jsonpath_root;
@@ -32,6 +34,7 @@ bool ParserJSONPathQuery::parseImpl(Pos & pos, ASTPtr & query, Expected & expect
 
     ASTPtr accessor;
     while (parser_jsonpath_member_access.parse(pos, accessor, expected)
+           || parser_jsonpath_member_square_bracket_access.parse(pos, accessor, expected)
            || parser_jsonpath_range.parse(pos, accessor, expected)
            || parser_jsonpath_star.parse(pos, accessor, expected))
     {
diff --git a/src/Functions/abs.cpp b/src/Functions/abs.cpp
index a7afdbacad6..25ff6cc93d0 100644
--- a/src/Functions/abs.cpp
+++ b/src/Functions/abs.cpp
@@ -10,8 +10,7 @@ template <typename A>
 struct AbsImpl
 {
     using ResultType = std::conditional_t<is_decimal<A>, A, typename NumberTraits::ResultOfAbs<A>::Type>;
-    static const constexpr bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_string_or_fixed_string = false;
 
     static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
     {
diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp
index 28f61ec66e1..8efc5181919 100644
--- a/src/Functions/bitAnd.cpp
+++ b/src/Functions/bitAnd.cpp
@@ -16,8 +16,8 @@ template <typename A, typename B>
 struct BitAndImpl
 {
     using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
-    static constexpr const bool allow_fixed_string = true;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_fixed_string = true;
+    static constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
     static inline Result apply(A a, B b)
diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp
index d425dd1dca2..984f33b7001 100644
--- a/src/Functions/bitCount.cpp
+++ b/src/Functions/bitCount.cpp
@@ -6,15 +6,11 @@
 namespace DB
 {
 
-namespace
-{
-
 template <typename A>
 struct BitCountImpl
 {
     using ResultType = UInt8;
-    static constexpr bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_string_or_fixed_string = true;
 
     static inline ResultType apply(A a)
     {
@@ -41,8 +37,6 @@ struct BitCountImpl
 struct NameBitCount { static constexpr auto name = "bitCount"; };
 using FunctionBitCount = FunctionUnaryArithmetic<BitCountImpl, NameBitCount, false /* is injective */>;
 
-}
-
 /// The function has no ranges of monotonicity.
 template <> struct FunctionUnaryArithmeticMonotonicity<NameBitCount>
 {
diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp
index 75928c2a8af..040f45eaedd 100644
--- a/src/Functions/bitHammingDistance.cpp
+++ b/src/Functions/bitHammingDistance.cpp
@@ -8,8 +8,8 @@ template <typename A, typename B>
 struct BitHammingDistanceImpl
 {
     using ResultType = UInt8;
-    static const constexpr bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_fixed_string = false;
+    static constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
     static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
diff --git a/src/Functions/bitNot.cpp b/src/Functions/bitNot.cpp
index d7aa87b288f..62ebdc7c52a 100644
--- a/src/Functions/bitNot.cpp
+++ b/src/Functions/bitNot.cpp
@@ -17,8 +17,7 @@ template <typename A>
 struct BitNotImpl
 {
     using ResultType = typename NumberTraits::ResultOfBitNot<A>::Type;
-    static const constexpr bool allow_fixed_string = true;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_string_or_fixed_string = true;
 
     static inline ResultType NO_SANITIZE_UNDEFINED apply(A a)
     {
diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp
index acdad33f38c..9e19fc55219 100644
--- a/src/Functions/bitOr.cpp
+++ b/src/Functions/bitOr.cpp
@@ -15,8 +15,8 @@ template <typename A, typename B>
 struct BitOrImpl
 {
     using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
-    static constexpr const bool allow_fixed_string = true;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_fixed_string = true;
+    static constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
     static inline Result apply(A a, B b)
diff --git a/src/Functions/bitSwapLastTwo.cpp b/src/Functions/bitSwapLastTwo.cpp
index 4ca57f9b103..d8957598c62 100644
--- a/src/Functions/bitSwapLastTwo.cpp
+++ b/src/Functions/bitSwapLastTwo.cpp
@@ -19,8 +19,7 @@ template <typename A>
 struct BitSwapLastTwoImpl
 {
     using ResultType = UInt8;
-    static constexpr const bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr const bool allow_string_or_fixed_string = false;
 
     static inline ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a)
     {
diff --git a/src/Functions/bitWrapperFunc.cpp b/src/Functions/bitWrapperFunc.cpp
index f0851176513..99c06172c30 100644
--- a/src/Functions/bitWrapperFunc.cpp
+++ b/src/Functions/bitWrapperFunc.cpp
@@ -19,8 +19,7 @@ template <typename A>
 struct BitWrapperFuncImpl
 {
     using ResultType = UInt8;
-    static constexpr const bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr const bool allow_string_or_fixed_string = false;
 
     static inline ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]])
     {
diff --git a/src/Functions/caseWithExpression.cpp b/src/Functions/caseWithExpression.cpp
index c8b8dbd76cf..9547cd200b2 100644
--- a/src/Functions/caseWithExpression.cpp
+++ b/src/Functions/caseWithExpression.cpp
@@ -24,6 +24,9 @@ public:
 
     explicit FunctionCaseWithExpression(ContextPtr context_) : context(context_) {}
     bool isVariadic() const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return false; }
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForNothing() const override { return false; }
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
     size_t getNumberOfArguments() const override { return 0; }
     String getName() const override { return name; }
diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp
index 1bdd155aaa1..8fefc2d5b8a 100644
--- a/src/Functions/concat.cpp
+++ b/src/Functions/concat.cpp
@@ -205,6 +205,10 @@ public:
         {
             return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments);
         }
+        else if (isMap(arguments.at(0).type))
+        {
+            return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments);
+        }
         else
             return std::make_unique<FunctionToFunctionBaseAdaptor>(
                 FunctionConcat::create(context), collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }), return_type);
diff --git a/src/Functions/date_trunc.cpp b/src/Functions/date_trunc.cpp
index 016b8f4da5e..87fff0b7f3c 100644
--- a/src/Functions/date_trunc.cpp
+++ b/src/Functions/date_trunc.cpp
@@ -1,6 +1,6 @@
 #include <Columns/ColumnConst.h>
-#include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeInterval.h>
@@ -25,7 +25,7 @@ class FunctionDateTrunc : public IFunction
 public:
     static constexpr auto name = "dateTrunc";
 
-    explicit FunctionDateTrunc(ContextPtr context_) : context(context_) {}
+    explicit FunctionDateTrunc(ContextPtr context_) : context(context_) { }
 
     static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionDateTrunc>(context); }
 
@@ -39,51 +39,58 @@ public:
     {
         /// The first argument is a constant string with the name of datepart.
 
-        auto result_type_is_date = false;
+        intermediate_type_is_date = false;
         String datepart_param;
-        auto check_first_argument = [&] {
+        auto check_first_argument = [&]
+        {
             const ColumnConst * datepart_column = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
             if (!datepart_column)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be constant string: "
-                    "name of datepart", getName());
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "First argument for function {} must be constant string: "
+                    "name of datepart",
+                    getName());
 
             datepart_param = datepart_column->getValue<String>();
             if (datepart_param.empty())
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument (name of datepart) for function {} cannot be empty",
-                    getName());
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS, "First argument (name of datepart) for function {} cannot be empty", getName());
 
             if (!IntervalKind::tryParseString(datepart_param, datepart_kind))
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} doesn't look like datepart name in {}", datepart_param, getName());
 
-            result_type_is_date = (datepart_kind == IntervalKind::Year)
-                || (datepart_kind == IntervalKind::Quarter) || (datepart_kind == IntervalKind::Month)
-                || (datepart_kind == IntervalKind::Week);
+            intermediate_type_is_date = (datepart_kind == IntervalKind::Year) || (datepart_kind == IntervalKind::Quarter)
+                || (datepart_kind == IntervalKind::Month) || (datepart_kind == IntervalKind::Week);
         };
 
         bool second_argument_is_date = false;
-        auto check_second_argument = [&] {
+        auto check_second_argument = [&]
+        {
             if (!isDate(arguments[1].type) && !isDateTime(arguments[1].type) && !isDateTime64(arguments[1].type))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}. "
-                    "Should be a date or a date with time", arguments[1].type->getName(), getName());
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Illegal type {} of 2nd argument of function {}. "
+                    "Should be a date or a date with time",
+                    arguments[1].type->getName(),
+                    getName());
 
             second_argument_is_date = isDate(arguments[1].type);
 
-            if (second_argument_is_date && ((datepart_kind == IntervalKind::Hour)
-                || (datepart_kind == IntervalKind::Minute) || (datepart_kind == IntervalKind::Second)))
+            if (second_argument_is_date
+                && ((datepart_kind == IntervalKind::Hour) || (datepart_kind == IntervalKind::Minute)
+                    || (datepart_kind == IntervalKind::Second)))
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date of argument for function {}", getName());
         };
 
-        auto check_timezone_argument = [&] {
+        auto check_timezone_argument = [&]
+        {
             if (!WhichDataType(arguments[2].type).isString())
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. "
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Illegal type {} of argument of function {}. "
                     "This argument is optional and must be a constant string with timezone name",
-                    arguments[2].type->getName(), getName());
-
-            if (second_argument_is_date && result_type_is_date)
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                                "The timezone argument of function {} with datepart '{}' "
-                                "is allowed only when the 2nd argument has the type DateTime",
-                                getName(), datepart_param);
+                    arguments[2].type->getName(),
+                    getName());
         };
 
         if (arguments.size() == 2)
@@ -99,15 +106,14 @@ public:
         }
         else
         {
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                 "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3",
-                getName(), arguments.size());
+                getName(),
+                arguments.size());
         }
 
-        if (result_type_is_date)
-            return std::make_shared<DataTypeDate>();
-        else
-            return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 1));
+        return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 1));
     }
 
     bool useDefaultImplementationForConstants() const override { return true; }
@@ -124,26 +130,40 @@ public:
 
         auto to_start_of_interval = FunctionFactory::instance().get("toStartOfInterval", context);
 
+        ColumnPtr truncated_column;
+        auto date_type = std::make_shared<DataTypeDate>();
+
         if (arguments.size() == 2)
-            return to_start_of_interval->build(temp_columns)->execute(temp_columns, result_type, input_rows_count);
+            truncated_column = to_start_of_interval->build(temp_columns)
+                                    ->execute(temp_columns, intermediate_type_is_date ? date_type : result_type, input_rows_count);
+        else
+        {
+            temp_columns[2] = arguments[2];
+            truncated_column = to_start_of_interval->build(temp_columns)
+                                    ->execute(temp_columns, intermediate_type_is_date ? date_type : result_type, input_rows_count);
+        }
 
-        temp_columns[2] = arguments[2];
-        return to_start_of_interval->build(temp_columns)->execute(temp_columns, result_type, input_rows_count);
+        if (!intermediate_type_is_date)
+            return truncated_column;
+
+        ColumnsWithTypeAndName temp_truncated_column(1);
+        temp_truncated_column[0] = {truncated_column, date_type, ""};
+
+        auto to_date_time_or_default = FunctionFactory::instance().get("toDateTime", context);
+        return to_date_time_or_default->build(temp_truncated_column)->execute(temp_truncated_column, result_type, input_rows_count);
     }
 
-    bool hasInformationAboutMonotonicity() const override
-    {
-        return true;
-    }
+    bool hasInformationAboutMonotonicity() const override { return true; }
 
     Monotonicity getMonotonicityForRange(const IDataType &, const Field &, const Field &) const override
     {
-        return { .is_monotonic = true, .is_always_monotonic = true };
+        return {.is_monotonic = true, .is_always_monotonic = true};
     }
 
 private:
     ContextPtr context;
     mutable IntervalKind::Kind datepart_kind = IntervalKind::Kind::Second;
+    mutable bool intermediate_type_is_date = false;
 };
 
 }
diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp
index 4e96391bccd..b1a9d4a8415 100644
--- a/src/Functions/factorial.cpp
+++ b/src/Functions/factorial.cpp
@@ -17,8 +17,7 @@ struct FactorialImpl
 {
     using ResultType = UInt64;
     static const constexpr bool allow_decimal = false;
-    static const constexpr bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static const constexpr bool allow_string_or_fixed_string = false;
 
     static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
     {
diff --git a/src/Functions/hasColumnInTable.cpp b/src/Functions/hasColumnInTable.cpp
index 824056a452b..4676b4083b7 100644
--- a/src/Functions/hasColumnInTable.cpp
+++ b/src/Functions/hasColumnInTable.cpp
@@ -130,14 +130,18 @@ ColumnPtr FunctionHasColumnInTable::executeImpl(const ColumnsWithTypeAndName & a
 
         bool treat_local_as_remote = false;
         bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
-        auto cluster = std::make_shared<Cluster>(
-            getContext()->getSettings(),
-            host_names,
+        ClusterConnectionParameters params{
             !user_name.empty() ? user_name : "default",
             password,
             getContext()->getTCPPort(),
             treat_local_as_remote,
-            treat_local_port_as_remote);
+            treat_local_port_as_remote,
+            /* secure= */ false,
+            /* priority= */ 1,
+            /* cluster_name= */ "",
+            /* password= */ ""
+        };
+        auto cluster = std::make_shared<Cluster>(getContext()->getSettings(), host_names, params);
 
         // FIXME this (probably) needs a non-constant access to query context,
         // because it might initialized a storage. Ideally, the tables required
diff --git a/src/Functions/intExp10.cpp b/src/Functions/intExp10.cpp
index 909afc4df17..6944c4701bc 100644
--- a/src/Functions/intExp10.cpp
+++ b/src/Functions/intExp10.cpp
@@ -17,8 +17,7 @@ template <typename A>
 struct IntExp10Impl
 {
     using ResultType = UInt64;
-    static constexpr const bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr const bool allow_string_or_fixed_string = false;
 
     static inline ResultType apply([[maybe_unused]] A a)
     {
diff --git a/src/Functions/intExp2.cpp b/src/Functions/intExp2.cpp
index 7d04f329e3f..4e5cc60a731 100644
--- a/src/Functions/intExp2.cpp
+++ b/src/Functions/intExp2.cpp
@@ -18,8 +18,7 @@ template <typename A>
 struct IntExp2Impl
 {
     using ResultType = UInt64;
-    static constexpr const bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_string_or_fixed_string = false;
 
     static inline ResultType apply([[maybe_unused]] A a)
     {
diff --git a/src/Functions/makeDate.cpp b/src/Functions/makeDate.cpp
index 7ebca71af13..97271386286 100644
--- a/src/Functions/makeDate.cpp
+++ b/src/Functions/makeDate.cpp
@@ -50,8 +50,8 @@ public:
     bool useDefaultImplementationForConstants() const override { return true; }
 
 protected:
-    template <class AgrumentNames>
-    void checkRequiredArguments(const ColumnsWithTypeAndName & arguments, const AgrumentNames & argument_names, const size_t optional_argument_count) const
+    template <class ArgumentNames>
+    void checkRequiredArguments(const ColumnsWithTypeAndName & arguments, const ArgumentNames & argument_names, const size_t optional_argument_count) const
     {
         if (arguments.size() < argument_names.size() || arguments.size() > argument_names.size() + optional_argument_count)
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
@@ -67,8 +67,8 @@ protected:
         }
     }
 
-    template <class AgrumentNames>
-    void convertRequiredArguments(const ColumnsWithTypeAndName & arguments, const AgrumentNames & argument_names, Columns & converted_arguments) const
+    template <class ArgumentNames>
+    void convertRequiredArguments(const ColumnsWithTypeAndName & arguments, const ArgumentNames & argument_names, Columns & converted_arguments) const
     {
         const DataTypePtr converted_argument_type = std::make_shared<DataTypeFloat32>();
         converted_arguments.clear();
@@ -87,7 +87,7 @@ template <typename Traits>
 class FunctionMakeDate : public FunctionWithNumericParamsBase
 {
 private:
-    static constexpr std::array<const char*, 3> argument_names = {"year", "month", "day"};
+    static constexpr std::array argument_names = {"year", "month", "day"};
 
 public:
     static constexpr auto name = Traits::name;
@@ -112,7 +112,7 @@ public:
         Columns converted_arguments;
         convertRequiredArguments(arguments, argument_names, converted_arguments);
 
-        auto res_column = Traits::ReturnColumnType::create(input_rows_count);
+        auto res_column = Traits::ReturnDataType::ColumnType::create(input_rows_count);
         auto & result_data = res_column->getData();
 
         const auto & year_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[0]).getData();
@@ -150,7 +150,6 @@ struct MakeDateTraits
 {
     static constexpr auto name = "makeDate";
     using ReturnDataType = DataTypeDate;
-    using ReturnColumnType = ColumnDate;
 
     static constexpr auto MIN_YEAR = 1970;
     static constexpr auto MAX_YEAR = 2149;
@@ -163,7 +162,6 @@ struct MakeDate32Traits
 {
     static constexpr auto name = "makeDate32";
     using ReturnDataType = DataTypeDate32;
-    using ReturnColumnType = ColumnDate32;
 
     static constexpr auto MIN_YEAR = 1900;
     static constexpr auto MAX_YEAR = 2299;
@@ -174,7 +172,7 @@ struct MakeDate32Traits
 class FunctionMakeDateTimeBase : public FunctionWithNumericParamsBase
 {
 protected:
-    static constexpr std::array<const char*, 6> argument_names = {"year", "month", "day", "hour", "minute", "second"};
+    static constexpr std::array argument_names = {"year", "month", "day", "hour", "minute", "second"};
 
 public:
     bool isVariadic() const override { return true; }
@@ -197,13 +195,13 @@ protected:
     {
         ///  Note that hour, minute and second are checked against 99 to behave consistently with parsing DateTime from String
         ///  E.g. "select cast('1984-01-01 99:99:99' as DateTime);" returns "1984-01-05 04:40:39"
-        if (unlikely(std::isnan(year) || std::isnan(month) || std::isnan(day_of_month) ||
+        if (std::isnan(year) || std::isnan(month) || std::isnan(day_of_month) ||
             std::isnan(hour) || std::isnan(minute) || std::isnan(second) ||
             year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31 ||
-            hour < 0 || hour > 99 || minute < 0 || minute > 99 || second < 0 || second > 99))
+            hour < 0 || hour > 99 || minute < 0 || minute > 99 || second < 0 || second > 99) [[unlikely]]
             return minDateTime(lut);
 
-        if (unlikely(year > DATE_LUT_MAX_YEAR))
+        if (year > DATE_LUT_MAX_YEAR) [[unlikely]]
             return maxDateTime(lut);
 
         return lut.makeDateTime(
@@ -290,9 +288,9 @@ public:
             const auto second = second_data[i];
 
             auto date_time = dateTime(year, month, day, hour, minute, second, date_lut);
-            if (unlikely(date_time < 0))
+            if (date_time < 0) [[unlikely]]
                 date_time = 0;
-            else if (unlikely(date_time > 0x0ffffffffll))
+            else if (date_time > 0x0ffffffffll) [[unlikely]]
                 date_time = 0x0ffffffffll;
 
             result_data[i] = static_cast<UInt32>(date_time);
@@ -394,21 +392,21 @@ public:
             auto date_time = dateTime(year, month, day, hour, minute, second, date_lut);
 
             double fraction = 0;
-            if (unlikely(date_time == min_date_time))
+            if (date_time == min_date_time) [[unlikely]]
                 fraction = 0;
-            else if (unlikely(date_time == max_date_time))
+            else if (date_time == max_date_time) [[unlikely]]
                 fraction = 999999999;
             else
             {
                 fraction = fraction_data ? (*fraction_data)[i] : 0;
-                if (unlikely(std::isnan(fraction)))
+                if (std::isnan(fraction)) [[unlikely]]
                 {
                     date_time = min_date_time;
                     fraction = 0;
                 }
-                else if (unlikely(fraction < 0))
+                else if (fraction < 0) [[unlikely]]
                     fraction = 0;
-                else if (unlikely(fraction > max_fraction))
+                else if (fraction > max_fraction) [[unlikely]]
                     fraction = max_fraction;
             }
 
diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp
index 5ea1d468620..bd47780dea8 100644
--- a/src/Functions/negate.cpp
+++ b/src/Functions/negate.cpp
@@ -9,8 +9,7 @@ template <typename A>
 struct NegateImpl
 {
     using ResultType = std::conditional_t<is_decimal<A>, A, typename NumberTraits::ResultOfNegate<A>::Type>;
-    static constexpr const bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr const bool allow_string_or_fixed_string = false;
 
     static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
     {
diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp
index c4843ecadb0..c3fbc08c4a9 100644
--- a/src/Functions/parseDateTime.cpp
+++ b/src/Functions/parseDateTime.cpp
@@ -150,7 +150,7 @@ namespace
             if (text == "bc")
                 throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Era BC exceeds the range of DateTime");
             else if (text != "ad")
-                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Unknown era {}", text);
+                throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Unknown era {} (expected 'ad' or 'bc')", text);
         }
 
         void setCentury(Int32 century)
diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp
index d2503bb6938..cca92c19b0c 100644
--- a/src/Functions/roundAge.cpp
+++ b/src/Functions/roundAge.cpp
@@ -10,8 +10,7 @@ template <typename A>
 struct RoundAgeImpl
 {
     using ResultType = UInt8;
-    static constexpr const bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr const bool allow_string_or_fixed_string = false;
 
     static inline ResultType apply(A x)
     {
diff --git a/src/Functions/roundDuration.cpp b/src/Functions/roundDuration.cpp
index 62d35ea194d..918f0b3425d 100644
--- a/src/Functions/roundDuration.cpp
+++ b/src/Functions/roundDuration.cpp
@@ -10,8 +10,7 @@ template <typename A>
 struct RoundDurationImpl
 {
     using ResultType = UInt16;
-    static constexpr const bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_string_or_fixed_string = false;
 
     static inline ResultType apply(A x)
     {
diff --git a/src/Functions/roundToExp2.cpp b/src/Functions/roundToExp2.cpp
index ef29b5eaf4a..607c67b742e 100644
--- a/src/Functions/roundToExp2.cpp
+++ b/src/Functions/roundToExp2.cpp
@@ -63,8 +63,7 @@ template <typename T>
 struct RoundToExp2Impl
 {
     using ResultType = T;
-    static constexpr const bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr const bool allow_string_or_fixed_string = false;
 
     static inline T apply(T x)
     {
diff --git a/src/Functions/sign.cpp b/src/Functions/sign.cpp
index 60ad6ba5365..6c849760eed 100644
--- a/src/Functions/sign.cpp
+++ b/src/Functions/sign.cpp
@@ -9,8 +9,7 @@ template <typename A>
 struct SignImpl
 {
     using ResultType = Int8;
-    static const constexpr bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_string_or_fixed_string = false;
 
     static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
     {
diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index df0f13ed97c..33e03b541e9 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -1,24 +1,26 @@
 #include <mutex>
 #include <base/bit_cast.h>
 
-#include <Common/FieldVisitorDump.h>
-#include <Common/FieldVisitorConvertToNumber.h>
-#include <DataTypes/DataTypeArray.h>
-#include <Columns/ColumnString.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnConst.h>
-#include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
+#include <Core/DecimalFunctions.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/getLeastSupertype.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <Interpreters/castColumn.h>
+#include <Interpreters/convertFieldToType.h>
+#include <base/StringRef.h>
 #include <Common/Arena.h>
+#include <Common/FieldVisitorConvertToNumber.h>
+#include <Common/FieldVisitorDump.h>
 #include <Common/HashTable/HashMap.h>
 #include <Common/typeid_cast.h>
-#include <base/StringRef.h>
-#include <Functions/IFunction.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/FunctionFactory.h>
-#include <DataTypes/getLeastSupertype.h>
-#include <Interpreters/convertFieldToType.h>
-
 
 namespace DB
 {
@@ -32,11 +34,9 @@ namespace ErrorCodes
 
 namespace
 {
-
-/** transform(x, from_array, to_array[, default]) - convert x according to an explicitly passed match.
+    /** transform(x, from_array, to_array[, default]) - convert x according to an explicitly passed match.
   */
-
-/** transform(x, [from...], [to...], default)
+    /** transform(x, [from...], [to...], default)
   * - converts the values according to the explicitly specified mapping.
   *
   * x - what to transform.
@@ -56,1147 +56,658 @@ namespace
   *
   * Note: the implementation is rather cumbersome.
   */
-class FunctionTransform : public IFunction
-{
-public:
-    static constexpr auto name = "transform";
-    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionTransform>(); }
-
-    String getName() const override
+    class FunctionTransform : public IFunction
     {
-        return name;
-    }
+    public:
+        static constexpr auto name = "transform";
+        static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionTransform>(); }
 
-    bool isVariadic() const override { return true; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-    size_t getNumberOfArguments() const override { return 0; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
+        String getName() const override { return name; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        const auto args_size = arguments.size();
-        if (args_size != 3 && args_size != 4)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: "
-                "passed {}, should be 3 or 4", getName(), args_size);
+        bool isVariadic() const override { return true; }
+        bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+        size_t getNumberOfArguments() const override { return 0; }
+        bool useDefaultImplementationForConstants() const override { return false; }
+        bool useDefaultImplementationForNulls() const override { return false; }
+        bool useDefaultImplementationForNothing() const override { return false; }
+        ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
 
-        const DataTypePtr & type_x = arguments[0];
-
-        if (!type_x->isValueRepresentedByNumber() && !isString(type_x))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Unsupported type {} of first argument "
-                            "of function {}, must be numeric type or Date/DateTime or String",
-                            type_x->getName(), getName());
-
-        const DataTypeArray * type_arr_from = checkAndGetDataType<DataTypeArray>(arguments[1].get());
-
-        if (!type_arr_from)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Second argument of function {}, must be array of source values to transform from.",
-                            getName());
-
-        const auto type_arr_from_nested = type_arr_from->getNestedType();
-
-        if ((type_x->isValueRepresentedByNumber() != type_arr_from_nested->isValueRepresentedByNumber())
-            || (isString(type_x) != isString(type_arr_from_nested)))
+        DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
         {
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "First argument and elements of array "
-                            "of second argument of function {} must have compatible types: "
-                            "both numeric or both strings.", getName());
-        }
+            const auto args_size = arguments.size();
+            if (args_size != 3 && args_size != 4)
+                throw Exception(
+                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                    "Number of arguments for function {} doesn't match: "
+                    "passed {}, should be 3 or 4",
+                    getName(),
+                    args_size);
 
-        const DataTypeArray * type_arr_to = checkAndGetDataType<DataTypeArray>(arguments[2].get());
+            const DataTypePtr & type_x = arguments[0];
+            const auto & type_x_nn = removeNullable(type_x);
 
-        if (!type_arr_to)
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                            "Third argument of function {}, must be array of destination values to transform to.",
-                            getName());
+            if (!type_x_nn->isValueRepresentedByNumber() && !isString(type_x_nn) && !isNothing(type_x_nn))
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Unsupported type {} of first argument "
+                    "of function {}, must be numeric type or Date/DateTime or String",
+                    type_x->getName(),
+                    getName());
 
-        const DataTypePtr & type_arr_to_nested = type_arr_to->getNestedType();
+            const DataTypeArray * type_arr_from = checkAndGetDataType<DataTypeArray>(arguments[1].get());
 
-        if (args_size == 3)
-        {
-            if ((type_x->isValueRepresentedByNumber() != type_arr_to_nested->isValueRepresentedByNumber())
-                || (isString(type_x) != isString(type_arr_to_nested)))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} has signature: "
-                                "transform(T, Array(T), Array(U), U) -> U; "
-                                "or transform(T, Array(T), Array(T)) -> T; where T and U are types.", getName());
+            if (!type_arr_from)
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Second argument of function {}, must be array of source values to transform from.",
+                    getName());
 
-            return getLeastSupertype(DataTypes{type_x, type_arr_to_nested});
-        }
-        else
-        {
-            const DataTypePtr & type_default = arguments[3];
+            const auto type_arr_from_nested = type_arr_from->getNestedType();
 
-            if (!type_default->isValueRepresentedByNumber() && !isString(type_default))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                                "Unsupported type {} of fourth argument (default value) "
-                                "of function {}, must be numeric type or Date/DateTime or String",
-                                type_default->getName(), getName());
-
-            bool default_is_string = WhichDataType(type_default).isString();
-            bool nested_is_string = WhichDataType(type_arr_to_nested).isString();
-
-            if ((type_default->isValueRepresentedByNumber() != type_arr_to_nested->isValueRepresentedByNumber())
-                || (default_is_string != nested_is_string))
-                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} have signature: "
-                                "transform(T, Array(T), Array(U), U) -> U; "
-                                "or transform(T, Array(T), Array(T)) -> T; where T and U are types.", getName());
-
-            if (type_arr_to_nested->isValueRepresentedByNumber() && type_default->isValueRepresentedByNumber())
+            if ((type_x->isValueRepresentedByNumber() != type_arr_from_nested->isValueRepresentedByNumber())
+                || (isString(type_x) != isString(type_arr_from_nested)))
             {
-                /// We take the smallest common type for the elements of the array of values `to` and for `default`.
-                return getLeastSupertype(DataTypes{type_arr_to_nested, type_default});
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "First argument and elements of array "
+                    "of second argument of function {} must have compatible types: "
+                    "both numeric or both strings.",
+                    getName());
             }
 
-            /// TODO More checks.
-            return type_arr_to_nested;
-        }
-    }
+            const DataTypeArray * type_arr_to = checkAndGetDataType<DataTypeArray>(arguments[2].get());
 
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
-    {
-        const ColumnConst * array_from = checkAndGetColumnConst<ColumnArray>(arguments[1].column.get());
-        const ColumnConst * array_to = checkAndGetColumnConst<ColumnArray>(arguments[2].column.get());
+            if (!type_arr_to)
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Third argument of function {}, must be array of destination values to transform to.",
+                    getName());
 
-        if (!array_from || !array_to)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second and third arguments of function {} must be constant arrays.", getName());
+            const DataTypePtr & type_arr_to_nested = type_arr_to->getNestedType();
 
-        initialize(array_from->getValue<Array>(), array_to->getValue<Array>(), arguments);
-
-        const auto * in = arguments.front().column.get();
-
-        if (isColumnConst(*in))
-            return executeConst(arguments, result_type, input_rows_count);
-
-        const IColumn * default_column = nullptr;
-        if (arguments.size() == 4)
-            default_column = arguments[3].column.get();
-
-        auto column_result = result_type->createColumn();
-        auto * out = column_result.get();
-
-        if (!executeNum<UInt8>(in, out, default_column)
-            && !executeNum<UInt16>(in, out, default_column)
-            && !executeNum<UInt32>(in, out, default_column)
-            && !executeNum<UInt64>(in, out, default_column)
-            && !executeNum<Int8>(in, out, default_column)
-            && !executeNum<Int16>(in, out, default_column)
-            && !executeNum<Int32>(in, out, default_column)
-            && !executeNum<Int64>(in, out, default_column)
-            && !executeNum<Float32>(in, out, default_column)
-            && !executeNum<Float64>(in, out, default_column)
-            && !executeDecimal<Decimal32>(in, out, default_column)
-            && !executeDecimal<Decimal64>(in, out, default_column)
-            && !executeString(in, out, default_column))
-        {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName());
-        }
-
-        return column_result;
-    }
-
-private:
-    static ColumnPtr executeConst(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count)
-    {
-        /// Materialize the input column and compute the function as usual.
-
-        ColumnsWithTypeAndName args = arguments;
-        args[0].column = args[0].column->cloneResized(input_rows_count)->convertToFullColumnIfConst();
-
-        auto impl = FunctionToOverloadResolverAdaptor(std::make_shared<FunctionTransform>()).build(args);
-
-        return impl->execute(args, result_type, input_rows_count);
-    }
-
-    template <typename T>
-    bool executeNum(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        if (const auto in = checkAndGetColumn<ColumnVector<T>>(in_untyped))
-        {
-            if (!default_untyped)
+            if (args_size == 3)
             {
-                auto out = typeid_cast<ColumnVector<T> *>(out_untyped);
-                if (!out)
-                {
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                                    "Illegal column {} of elements "
-                                    "of array of third argument of function {}, must be {}",
-                                    out_untyped->getName(), getName(), in->getName());
-                }
+                if ((type_x->isValueRepresentedByNumber() != type_arr_to_nested->isValueRepresentedByNumber())
+                    || (isString(type_x) != isString(type_arr_to_nested)))
+                    throw Exception(
+                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Function {} has signature: "
+                        "transform(T, Array(T), Array(U), U) -> U; "
+                        "or transform(T, Array(T), Array(T)) -> T; where T and U are types.",
+                        getName());
 
-                executeImplNumToNum<T>(in->getData(), out->getData());
-            }
-            else if (isColumnConst(*default_untyped))
-            {
-                if (!executeNumToNumWithConstDefault<T, UInt8>(in, out_untyped)
-                    && !executeNumToNumWithConstDefault<T, UInt16>(in, out_untyped)
-                    && !executeNumToNumWithConstDefault<T, UInt32>(in, out_untyped)
-                    && !executeNumToNumWithConstDefault<T, UInt64>(in, out_untyped)
-                    && !executeNumToNumWithConstDefault<T, Int8>(in, out_untyped)
-                    && !executeNumToNumWithConstDefault<T, Int16>(in, out_untyped)
-                    && !executeNumToNumWithConstDefault<T, Int32>(in, out_untyped)
-                    && !executeNumToNumWithConstDefault<T, Int64>(in, out_untyped)
-                    && !executeNumToNumWithConstDefault<T, Float32>(in, out_untyped)
-                    && !executeNumToNumWithConstDefault<T, Float64>(in, out_untyped)
-                    && !executeNumToDecimalWithConstDefault<T, Decimal32>(in, out_untyped)
-                    && !executeNumToDecimalWithConstDefault<T, Decimal64>(in, out_untyped)
-                    && !executeNumToStringWithConstDefault<T>(in, out_untyped))
-                {
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}",
-                        in->getName(), getName());
-                }
+                auto ret = tryGetLeastSupertype(DataTypes{type_arr_to_nested, type_x});
+                if (!ret)
+                    throw Exception(
+                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Function {} has signature: "
+                        "transform(T, Array(T), Array(U), U) -> U; "
+                        "or transform(T, Array(T), Array(T)) -> T; where T and U are types.",
+                        getName());
+                checkAllowedType(ret);
+                return ret;
             }
             else
             {
-                if (!executeNumToNumWithNonConstDefault<T, UInt8>(in, out_untyped, default_untyped)
-                    && !executeNumToNumWithNonConstDefault<T, UInt16>(in, out_untyped, default_untyped)
-                    && !executeNumToNumWithNonConstDefault<T, UInt32>(in, out_untyped, default_untyped)
-                    && !executeNumToNumWithNonConstDefault<T, UInt64>(in, out_untyped, default_untyped)
-                    && !executeNumToNumWithNonConstDefault<T, Int8>(in, out_untyped, default_untyped)
-                    && !executeNumToNumWithNonConstDefault<T, Int16>(in, out_untyped, default_untyped)
-                    && !executeNumToNumWithNonConstDefault<T, Int32>(in, out_untyped, default_untyped)
-                    && !executeNumToNumWithNonConstDefault<T, Int64>(in, out_untyped, default_untyped)
-                    && !executeNumToNumWithNonConstDefault<T, Float32>(in, out_untyped, default_untyped)
-                    && !executeNumToNumWithNonConstDefault<T, Float64>(in, out_untyped, default_untyped)
-                    && !executeNumToDecimalWithNonConstDefault<T, Decimal32>(in, out_untyped, default_untyped)
-                    && !executeNumToDecimalWithNonConstDefault<T, Decimal64>(in, out_untyped, default_untyped)
-                    && !executeNumToStringWithNonConstDefault<T>(in, out_untyped, default_untyped))
-                {
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}",
-                        in->getName(), getName());
-                }
+                auto ret = tryGetLeastSupertype(DataTypes{type_arr_to_nested, arguments[3]});
+                if (!ret)
+                    throw Exception(
+                        ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                        "Function {} have signature: "
+                        "transform(T, Array(T), Array(U), U) -> U; "
+                        "or transform(T, Array(T), Array(T)) -> T; where T and U are types.",
+                        getName());
+                checkAllowedType(ret);
+                return ret;
             }
-
-            return true;
         }
 
-        return false;
-    }
-
-    template <typename T>
-    bool executeDecimal(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        if (const auto in = checkAndGetColumn<ColumnDecimal<T>>(in_untyped))
+        ColumnPtr
+        executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
         {
-            if (!default_untyped)
-            {
-                auto out = typeid_cast<ColumnDecimal<T> *>(out_untyped);
-                if (!out)
-                {
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                                    "Illegal column {} of elements "
-                                    "of array of third argument of function {}, must be {}",
-                                    out_untyped->getName(), getName(), in->getName());
-                }
+            initialize(arguments, result_type);
 
-                executeImplNumToNum<T>(in->getData(), out->getData());
+            const auto * in = arguments.front().column.get();
+
+            if (isColumnConst(*in))
+                return executeConst(arguments, result_type, input_rows_count);
+
+            ColumnPtr default_non_const;
+            if (!cache.default_column && arguments.size() == 4)
+                default_non_const = castColumn(arguments[3], result_type);
+
+            auto column_result = result_type->createColumn();
+            if (!executeNum<ColumnVector<UInt8>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnVector<UInt16>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnVector<UInt32>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnVector<UInt64>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnVector<Int8>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnVector<Int16>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnVector<Int32>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnVector<Int64>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnVector<Float32>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnVector<Float64>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnDecimal<Decimal32>>(in, *column_result, default_non_const)
+                && !executeNum<ColumnDecimal<Decimal64>>(in, *column_result, default_non_const)
+                && !executeString(in, *column_result, default_non_const))
+            {
+                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName());
             }
-            else if (isColumnConst(*default_untyped))
+            return column_result;
+        }
+
+    private:
+        static ColumnPtr executeConst(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count)
+        {
+            /// Materialize the input column and compute the function as usual.
+
+            ColumnsWithTypeAndName args = arguments;
+            args[0].column = args[0].column->cloneResized(input_rows_count)->convertToFullColumnIfConst();
+
+            auto impl = FunctionToOverloadResolverAdaptor(std::make_shared<FunctionTransform>()).build(args);
+
+            return impl->execute(args, result_type, input_rows_count);
+        }
+
+        template <typename T>
+        bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const
+        {
+            const auto * const in = checkAndGetColumn<T>(in_untyped);
+            if (!in)
+                return false;
+            const auto & pod = in->getData();
+            UInt32 in_scale = 0;
+            if constexpr (std::is_same_v<ColumnDecimal<Decimal32>, T> || std::is_same_v<ColumnDecimal<Decimal64>, T>)
+                in_scale = in->getScale();
+
+            if (!executeNumToString(pod, column_result, default_non_const)
+                && !executeNumToNum<ColumnVector<UInt8>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnVector<UInt16>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnVector<UInt32>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnVector<UInt64>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnVector<Int8>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnVector<Int16>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnVector<Int32>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnVector<Int64>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnVector<Float32>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnVector<Float64>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnDecimal<Decimal32>>(pod, column_result, default_non_const, in_scale)
+                && !executeNumToNum<ColumnDecimal<Decimal64>>(pod, column_result, default_non_const, in_scale))
             {
-                if (!executeDecimalToNumWithConstDefault<T, UInt8>(in, out_untyped)
-                    && !executeDecimalToNumWithConstDefault<T, UInt16>(in, out_untyped)
-                    && !executeDecimalToNumWithConstDefault<T, UInt32>(in, out_untyped)
-                    && !executeDecimalToNumWithConstDefault<T, UInt64>(in, out_untyped)
-                    && !executeDecimalToNumWithConstDefault<T, Int8>(in, out_untyped)
-                    && !executeDecimalToNumWithConstDefault<T, Int16>(in, out_untyped)
-                    && !executeDecimalToNumWithConstDefault<T, Int32>(in, out_untyped)
-                    && !executeDecimalToNumWithConstDefault<T, Int64>(in, out_untyped)
-                    && !executeDecimalToNumWithConstDefault<T, Float32>(in, out_untyped)
-                    && !executeDecimalToNumWithConstDefault<T, Float64>(in, out_untyped)
-                    && !executeDecimalToDecimalWithConstDefault<T, Decimal32>(in, out_untyped)
-                    && !executeDecimalToDecimalWithConstDefault<T, Decimal64>(in, out_untyped)
-                    && !executeDecimalToStringWithConstDefault<T>(in, out_untyped))
-                {
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}",
-                                    in->getName(), getName());
-                }
-            }
-            else
-            {
-                if (!executeDecimalToNumWithNonConstDefault<T, UInt8>(in, out_untyped, default_untyped)
-                    && !executeDecimalToNumWithNonConstDefault<T, UInt16>(in, out_untyped, default_untyped)
-                    && !executeDecimalToNumWithNonConstDefault<T, UInt32>(in, out_untyped, default_untyped)
-                    && !executeDecimalToNumWithNonConstDefault<T, UInt64>(in, out_untyped, default_untyped)
-                    && !executeDecimalToNumWithNonConstDefault<T, Int8>(in, out_untyped, default_untyped)
-                    && !executeDecimalToNumWithNonConstDefault<T, Int16>(in, out_untyped, default_untyped)
-                    && !executeDecimalToNumWithNonConstDefault<T, Int32>(in, out_untyped, default_untyped)
-                    && !executeDecimalToNumWithNonConstDefault<T, Int64>(in, out_untyped, default_untyped)
-                    && !executeDecimalToNumWithNonConstDefault<T, Float32>(in, out_untyped, default_untyped)
-                    && !executeDecimalToNumWithNonConstDefault<T, Float64>(in, out_untyped, default_untyped)
-                    && !executeDecimalToDecimalWithNonConstDefault<T, Decimal32>(in, out_untyped, default_untyped)
-                    && !executeDecimalToDecimalWithNonConstDefault<T, Decimal64>(in, out_untyped, default_untyped)
-                    && !executeDecimalToStringWithNonConstDefault<T>(in, out_untyped, default_untyped))
-                {
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}",
-                                    in->getName(), getName());
-                }
-            }
-
-            return true;
-        }
-
-        return false;
-    }
-
-    bool executeString(const IColumn * in_untyped, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        if (const auto * in = checkAndGetColumn<ColumnString>(in_untyped))
-        {
-            if (!default_untyped)
-            {
-                if (!executeStringToString(in, out_untyped))
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}",
-                        in->getName(), getName());
-            }
-            else if (isColumnConst(*default_untyped))
-            {
-                if (!executeStringToNumWithConstDefault<UInt8>(in, out_untyped)
-                    && !executeStringToNumWithConstDefault<UInt16>(in, out_untyped)
-                    && !executeStringToNumWithConstDefault<UInt32>(in, out_untyped)
-                    && !executeStringToNumWithConstDefault<UInt64>(in, out_untyped)
-                    && !executeStringToNumWithConstDefault<Int8>(in, out_untyped)
-                    && !executeStringToNumWithConstDefault<Int16>(in, out_untyped)
-                    && !executeStringToNumWithConstDefault<Int32>(in, out_untyped)
-                    && !executeStringToNumWithConstDefault<Int64>(in, out_untyped)
-                    && !executeStringToNumWithConstDefault<Float32>(in, out_untyped)
-                    && !executeStringToNumWithConstDefault<Float64>(in, out_untyped)
-                    && !executeStringToDecimalWithConstDefault<Decimal32>(in, out_untyped)
-                    && !executeStringToDecimalWithConstDefault<Decimal64>(in, out_untyped)
-                    && !executeStringToStringWithConstDefault(in, out_untyped))
-                {
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}",
-                        in->getName(), getName());
-                }
-            }
-            else
-            {
-                if (!executeStringToNumWithNonConstDefault<UInt8>(in, out_untyped, default_untyped)
-                    && !executeStringToNumWithNonConstDefault<UInt16>(in, out_untyped, default_untyped)
-                    && !executeStringToNumWithNonConstDefault<UInt32>(in, out_untyped, default_untyped)
-                    && !executeStringToNumWithNonConstDefault<UInt64>(in, out_untyped, default_untyped)
-                    && !executeStringToNumWithNonConstDefault<Int8>(in, out_untyped, default_untyped)
-                    && !executeStringToNumWithNonConstDefault<Int16>(in, out_untyped, default_untyped)
-                    && !executeStringToNumWithNonConstDefault<Int32>(in, out_untyped, default_untyped)
-                    && !executeStringToNumWithNonConstDefault<Int64>(in, out_untyped, default_untyped)
-                    && !executeStringToNumWithNonConstDefault<Float32>(in, out_untyped, default_untyped)
-                    && !executeStringToNumWithNonConstDefault<Float64>(in, out_untyped, default_untyped)
-                    && !executeStringToDecimalWithNonConstDefault<Decimal32>(in, out_untyped, default_untyped)
-                    && !executeStringToDecimalWithNonConstDefault<Decimal64>(in, out_untyped, default_untyped)
-
-                    && !executeStringToStringWithNonConstDefault(in, out_untyped, default_untyped))
-                {
-                    throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}",
-                        in->getName(), getName());
-                }
-            }
-
-            return true;
-        }
-
-        return false;
-    }
-
-    template <typename T, typename U>
-    bool executeNumToNumWithConstDefault(const ColumnVector<T> * in, IColumn * out_untyped) const
-    {
-        auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        executeImplNumToNumWithConstDefault<T, U>(in->getData(), out->getData(), static_cast<U>(cache.const_default_value.get<U>()));
-        return true;
-    }
-
-    template <typename T, typename U>
-    bool executeNumToDecimalWithConstDefault(const ColumnVector<T> * in, IColumn * out_untyped) const
-    {
-        auto out = typeid_cast<ColumnDecimal<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        executeImplNumToNumWithConstDefault<T, U>(in->getData(), out->getData(), cache.const_default_value.get<U>());
-        return true;
-    }
-
-
-    template <typename T, typename U>
-    bool executeDecimalToNumWithConstDefault(const ColumnDecimal<T> * in, IColumn * out_untyped) const
-    {
-        auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        executeImplNumToNumWithConstDefault<T, U>(in->getData(), out->getData(), static_cast<U>(cache.const_default_value.get<U>()));
-        return true;
-    }
-
-    template <typename T, typename U>
-    bool executeDecimalToDecimalWithConstDefault(const ColumnDecimal<T> * in, IColumn * out_untyped) const
-    {
-        auto out = typeid_cast<ColumnDecimal<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        executeImplNumToNumWithConstDefault<T, U>(in->getData(), out->getData(), cache.const_default_value.get<U>());
-        return true;
-    }
-
-    template <typename T, typename U>
-    bool executeNumToNumWithNonConstDefault(const ColumnVector<T> * in, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        if (!executeNumToNumWithNonConstDefault2<T, U, UInt8>(in, out, default_untyped)
-            && !executeNumToNumWithNonConstDefault2<T, U, UInt16>(in, out, default_untyped)
-            && !executeNumToNumWithNonConstDefault2<T, U, UInt32>(in, out, default_untyped)
-            && !executeNumToNumWithNonConstDefault2<T, U, UInt64>(in, out, default_untyped)
-            && !executeNumToNumWithNonConstDefault2<T, U, Int8>(in, out, default_untyped)
-            && !executeNumToNumWithNonConstDefault2<T, U, Int16>(in, out, default_untyped)
-            && !executeNumToNumWithNonConstDefault2<T, U, Int32>(in, out, default_untyped)
-            && !executeNumToNumWithNonConstDefault2<T, U, Int64>(in, out, default_untyped)
-            && !executeNumToNumWithNonConstDefault2<T, U, Float32>(in, out, default_untyped)
-            && !executeNumToNumWithNonConstDefault2<T, U, Float64>(in, out, default_untyped))
-        {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}",
-                default_untyped->getName(), getName());
-        }
-
-        return true;
-    }
-
-    template <typename T, typename U>
-    bool executeNumToDecimalWithNonConstDefault(const ColumnVector<T> * in, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        auto out = typeid_cast<ColumnDecimal<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        if (!executeNumToDecimalWithNonConstDefault2<T, U, UInt8>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefault2<T, U, UInt16>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefault2<T, U, UInt32>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefault2<T, U, UInt64>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefault2<T, U, Int8>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefault2<T, U, Int16>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefault2<T, U, Int32>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefault2<T, U, Int64>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefault2<T, U, Float32>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefault2<T, U, Float64>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefaultDecimal2<T, U, Decimal32>(in, out, default_untyped)
-            && !executeNumToDecimalWithNonConstDefaultDecimal2<T, U, Decimal64>(in, out, default_untyped))
-        {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}",
-                default_untyped->getName(), getName());
-        }
-
-        return true;
-    }
-
-    template <typename T, typename U>
-    bool executeDecimalToNumWithNonConstDefault(const ColumnDecimal<T> * in, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        if (!executeDecimalToNumWithNonConstDefault2<T, U, UInt8>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefault2<T, U, UInt16>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefault2<T, U, UInt32>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefault2<T, U, UInt64>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefault2<T, U, Int8>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefault2<T, U, Int16>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefault2<T, U, Int32>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefault2<T, U, Int64>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefault2<T, U, Float32>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefault2<T, U, Float64>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefaultDecimal2<T, U, Decimal32>(in, out, default_untyped)
-            && !executeDecimalToNumWithNonConstDefaultDecimal2<T, U, Decimal64>(in, out, default_untyped))
-        {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}",
-                default_untyped->getName(), getName());
-        }
-
-        return true;
-    }
-
-    template <typename T, typename U>
-    bool executeDecimalToDecimalWithNonConstDefault(const ColumnDecimal<T> * in, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        auto out = typeid_cast<ColumnDecimal<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        if (!executeDecimalToDecimalWithNonConstDefault2<T, U, UInt8>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefault2<T, U, UInt16>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefault2<T, U, UInt32>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefault2<T, U, UInt64>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefault2<T, U, Int8>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefault2<T, U, Int16>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefault2<T, U, Int32>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefault2<T, U, Int64>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefault2<T, U, Float32>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefault2<T, U, Float64>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefaultDecimal2<T, U, Decimal32>(in, out, default_untyped)
-            && !executeDecimalToDecimalWithNonConstDefaultDecimal2<T, U, Decimal64>(in, out, default_untyped))
-        {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}",
-                default_untyped->getName(), getName());
-        }
-
-        return true;
-    }
-
-    template <typename T, typename U, typename V>
-    bool executeNumToNumWithNonConstDefault2(const ColumnVector<T> * in, ColumnVector<U> * out, const IColumn * default_untyped) const
-    {
-        auto col_default = checkAndGetColumn<ColumnVector<V>>(default_untyped);
-        if (!col_default)
-            return false;
-
-        executeImplNumToNumWithNonConstDefault<T, U, V>(in->getData(), out->getData(), col_default->getData());
-        return true;
-    }
-
-    template <typename T, typename U, typename V>
-    bool executeNumToDecimalWithNonConstDefault2(const ColumnVector<T> * in, ColumnDecimal<U> * out, const IColumn * default_untyped) const
-    {
-        auto col_default = checkAndGetColumn<ColumnVector<V>>(default_untyped);
-        if (!col_default)
-            return false;
-
-        executeImplNumToNumWithNonConstDefault<T, U, V>(in->getData(), out->getData(), col_default->getData());
-        return true;
-    }
-
-    template <typename T, typename U, typename V>
-    bool executeNumToDecimalWithNonConstDefaultDecimal2(const ColumnVector<T> * in, ColumnDecimal<U> * out, const IColumn * default_untyped) const
-    {
-        auto col_default = checkAndGetColumn<ColumnDecimal<V>>(default_untyped);
-        if (!col_default)
-            return false;
-
-        executeImplNumToNumWithNonConstDefault<T, U, V>(in->getData(), out->getData(), col_default->getData());
-        return true;
-    }
-
-    template <typename T, typename U, typename V>
-    bool executeDecimalToNumWithNonConstDefault2(const ColumnDecimal<T> * in, ColumnVector<U> * out, const IColumn * default_untyped) const
-    {
-        auto col_default = checkAndGetColumn<ColumnVector<V>>(default_untyped);
-        if (!col_default)
-            return false;
-
-        executeImplNumToNumWithNonConstDefault<T, U, V>(in->getData(), out->getData(), col_default->getData());
-        return true;
-    }
-
-    template <typename T, typename U, typename V>
-    bool executeDecimalToDecimalWithNonConstDefault2(const ColumnDecimal<T> * in, ColumnDecimal<U> * out, const IColumn * default_untyped) const
-    {
-        auto col_default = checkAndGetColumn<ColumnVector<V>>(default_untyped);
-        if (!col_default)
-            return false;
-
-        executeImplNumToNumWithNonConstDefault<T, U, V>(in->getData(), out->getData(), col_default->getData());
-        return true;
-    }
-
-    template <typename T, typename U, typename V>
-    bool executeDecimalToNumWithNonConstDefaultDecimal2(const ColumnDecimal<T> * in, ColumnVector<U> * out, const IColumn * default_untyped) const
-    {
-        auto col_default = checkAndGetColumn<ColumnDecimal<V>>(default_untyped);
-        if (!col_default)
-            return false;
-
-        executeImplNumToNumWithNonConstDefault<T, U, V>(in->getData(), out->getData(), col_default->getData());
-        return true;
-    }
-
-    template <typename T, typename U, typename V>
-    bool executeDecimalToDecimalWithNonConstDefaultDecimal2(const ColumnDecimal<T> * in, ColumnDecimal<U> * out, const IColumn * default_untyped) const
-    {
-        auto col_default = checkAndGetColumn<ColumnDecimal<V>>(default_untyped);
-        if (!col_default)
-            return false;
-
-        executeImplNumToNumWithNonConstDefault<T, U, V>(in->getData(), out->getData(), col_default->getData());
-        return true;
-    }
-
-    template <typename T>
-    bool executeNumToStringWithConstDefault(const ColumnVector<T> * in, IColumn * out_untyped) const
-    {
-        auto * out = typeid_cast<ColumnString *>(out_untyped);
-        if (!out)
-            return false;
-
-        const String & default_str = cache.const_default_value.get<const String &>();
-        StringRef default_string_ref{default_str.data(), default_str.size() + 1};
-        executeImplNumToStringWithConstDefault<T>(in->getData(), out->getChars(), out->getOffsets(), default_string_ref);
-        return true;
-    }
-
-    template <typename T>
-    bool executeDecimalToStringWithConstDefault(const ColumnDecimal<T> * in, IColumn * out_untyped) const
-    {
-        auto * out = typeid_cast<ColumnString *>(out_untyped);
-        if (!out)
-            return false;
-
-        const String & default_str = cache.const_default_value.get<const String &>();
-        StringRef default_string_ref{default_str.data(), default_str.size() + 1};
-        executeImplNumToStringWithConstDefault<T>(in->getData(), out->getChars(), out->getOffsets(), default_string_ref);
-        return true;
-    }
-
-    template <typename T>
-    bool executeNumToStringWithNonConstDefault(const ColumnVector<T> * in, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        auto * out = typeid_cast<ColumnString *>(out_untyped);
-        if (!out)
-            return false;
-
-        const auto * default_col = checkAndGetColumn<ColumnString>(default_untyped);
-        if (!default_col)
-        {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}",
-                default_untyped->getName(), getName());
-        }
-
-        executeImplNumToStringWithNonConstDefault<T>(
-            in->getData(),
-            out->getChars(), out->getOffsets(),
-            default_col->getChars(), default_col->getOffsets());
-
-        return true;
-    }
-
-    template <typename T>
-    bool executeDecimalToStringWithNonConstDefault(const ColumnDecimal<T> * in, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        auto * out = typeid_cast<ColumnString *>(out_untyped);
-        if (!out)
-            return false;
-
-        const auto * default_col = checkAndGetColumn<ColumnString>(default_untyped);
-        if (!default_col)
-        {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}",
-                            default_untyped->getName(), getName());
-        }
-
-        executeImplNumToStringWithNonConstDefault<T>(
-            in->getData(),
-            out->getChars(), out->getOffsets(),
-            default_col->getChars(), default_col->getOffsets());
-
-        return true;
-    }
-
-    template <typename U>
-    bool executeStringToNumWithConstDefault(const ColumnString * in, IColumn * out_untyped) const
-    {
-        auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        executeImplStringToNumWithConstDefault<U>(
-            in->getChars(), in->getOffsets(), out->getData(), static_cast<U>(cache.const_default_value.get<U>()));
-        return true;
-    }
-
-    template <typename U>
-    bool executeStringToDecimalWithConstDefault(const ColumnString * in, IColumn * out_untyped) const
-    {
-        auto out = typeid_cast<ColumnDecimal<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        executeImplStringToNumWithConstDefault<U>(in->getChars(), in->getOffsets(), out->getData(), cache.const_default_value.get<U>());
-        return true;
-    }
-
-    template <typename U>
-    bool executeStringToNumWithNonConstDefault(const ColumnString * in, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        auto out = typeid_cast<ColumnVector<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        if (!executeStringToNumWithNonConstDefault2<U, UInt8>(in, out, default_untyped)
-            && !executeStringToNumWithNonConstDefault2<U, UInt16>(in, out, default_untyped)
-            && !executeStringToNumWithNonConstDefault2<U, UInt32>(in, out, default_untyped)
-            && !executeStringToNumWithNonConstDefault2<U, UInt64>(in, out, default_untyped)
-            && !executeStringToNumWithNonConstDefault2<U, Int8>(in, out, default_untyped)
-            && !executeStringToNumWithNonConstDefault2<U, Int16>(in, out, default_untyped)
-            && !executeStringToNumWithNonConstDefault2<U, Int32>(in, out, default_untyped)
-            && !executeStringToNumWithNonConstDefault2<U, Int64>(in, out, default_untyped)
-            && !executeStringToNumWithNonConstDefault2<U, Float32>(in, out, default_untyped)
-            && !executeStringToNumWithNonConstDefault2<U, Float64>(in, out, default_untyped))
-        {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}",
-                default_untyped->getName(), getName());
-        }
-
-        return true;
-    }
-
-    template <typename U>
-    bool executeStringToDecimalWithNonConstDefault(const ColumnString * in, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        auto out = typeid_cast<ColumnDecimal<U> *>(out_untyped);
-        if (!out)
-            return false;
-
-        if (!executeStringToDecimalWithNonConstDefault2<U, UInt8>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefault2<U, UInt16>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefault2<U, UInt32>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefault2<U, UInt64>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefault2<U, Int8>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefault2<U, Int16>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefault2<U, Int32>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefault2<U, Int64>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefault2<U, Float32>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefault2<U, Float64>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefaultDecimal2<U, Decimal32>(in, out, default_untyped)
-            && !executeStringToDecimalWithNonConstDefaultDecimal2<U, Decimal64>(in, out, default_untyped))
-        {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}",
-                            default_untyped->getName(), getName());
-        }
-
-        return true;
-    }
-
-
-    template <typename U, typename V>
-    bool executeStringToNumWithNonConstDefault2(const ColumnString * in, ColumnVector<U> * out, const IColumn * default_untyped) const
-    {
-        auto col_default = checkAndGetColumn<ColumnVector<V>>(default_untyped);
-        if (!col_default)
-            return false;
-
-        executeImplStringToNumWithNonConstDefault<U, V>(in->getChars(), in->getOffsets(), out->getData(), col_default->getData());
-        return true;
-    }
-
-    template <typename U, typename V>
-    bool executeStringToDecimalWithNonConstDefault2(const ColumnString * in, ColumnDecimal<U> * out, const IColumn * default_untyped) const
-    {
-        auto col_default = checkAndGetColumn<ColumnVector<V>>(default_untyped);
-        if (!col_default)
-            return false;
-
-        executeImplStringToNumWithNonConstDefault<U, V>(in->getChars(), in->getOffsets(), out->getData(), col_default->getData());
-        return true;
-    }
-
-    template <typename U, typename V>
-    bool executeStringToDecimalWithNonConstDefaultDecimal2(const ColumnString * in, ColumnDecimal<U> * out, const IColumn * default_untyped) const
-    {
-        auto col_default = checkAndGetColumn<ColumnDecimal<V>>(default_untyped);
-        if (!col_default)
-            return false;
-
-        executeImplStringToNumWithNonConstDefault<U, V>(in->getChars(), in->getOffsets(), out->getData(), col_default->getData());
-        return true;
-    }
-
-    bool executeStringToString(const ColumnString * in, IColumn * out_untyped) const
-    {
-        auto * out = typeid_cast<ColumnString *>(out_untyped);
-        if (!out)
-            return false;
-
-        executeImplStringToString(in->getChars(), in->getOffsets(), out->getChars(), out->getOffsets());
-        return true;
-    }
-
-    bool executeStringToStringWithConstDefault(const ColumnString * in, IColumn * out_untyped) const
-    {
-        auto * out = typeid_cast<ColumnString *>(out_untyped);
-        if (!out)
-            return false;
-
-        const String & default_str = cache.const_default_value.get<const String &>();
-        StringRef default_string_ref{default_str.data(), default_str.size() + 1};
-        executeImplStringToStringWithConstDefault(in->getChars(), in->getOffsets(), out->getChars(), out->getOffsets(), default_string_ref);
-        return true;
-    }
-
-    bool executeStringToStringWithNonConstDefault(const ColumnString * in, IColumn * out_untyped, const IColumn * default_untyped) const
-    {
-        auto * out = typeid_cast<ColumnString *>(out_untyped);
-        if (!out)
-            return false;
-
-        const auto * default_col = checkAndGetColumn<ColumnString>(default_untyped);
-        if (!default_col)
-        {
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}",
-                default_untyped->getName(), getName());
-        }
-
-        executeImplStringToStringWithNonConstDefault(
-            in->getChars(), in->getOffsets(),
-            out->getChars(), out->getOffsets(),
-            default_col->getChars(), default_col->getOffsets());
-
-        return true;
-    }
-
-
-    template <typename T, typename U>
-    void executeImplNumToNumWithConstDefault(const PaddedPODArray<T> & src, PaddedPODArray<U> & dst, U dst_default) const
-    {
-        const auto & table = *cache.table_num_to_num;
-        size_t size = src.size();
-        dst.resize(size);
-        for (size_t i = 0; i < size; ++i)
-        {
-            const auto * it = table.find(bit_cast<UInt64>(src[i]));
-            if (it)
-            {
-                if constexpr (std::endian::native == std::endian::little)
-                    memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
-                else
-                    memcpy(&dst[i], reinterpret_cast<const char *>(&it->getMapped()) + sizeof(UInt64) - sizeof(dst[i]), sizeof(dst[i]));
-            }
-            else
-                dst[i] = dst_default;
-        }
-    }
-
-    template <typename T, typename U, typename V>
-    void executeImplNumToNumWithNonConstDefault(const PaddedPODArray<T> & src, PaddedPODArray<U> & dst, const PaddedPODArray<V> & dst_default) const
-    {
-        const auto & table = *cache.table_num_to_num;
-        size_t size = src.size();
-        dst.resize(size);
-        for (size_t i = 0; i < size; ++i)
-        {
-            const auto * it = table.find(bit_cast<UInt64>(src[i]));
-            if (it)
-            {
-                if constexpr (std::endian::native == std::endian::little)
-                    memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
-                else
-                    memcpy(&dst[i], reinterpret_cast<const char *>(&it->getMapped()) + sizeof(UInt64) - sizeof(dst[i]), sizeof(dst[i]));
-            }
-            else if constexpr (is_decimal<U>)
-                dst[i] = static_cast<typename U::NativeType>(dst_default[i]);
-            else
-                dst[i] = static_cast<U>(dst_default[i]); // NOLINT(bugprone-signed-char-misuse,cert-str34-c)
-        }
-    }
-
-    template <typename T>
-    void executeImplNumToNum(const PaddedPODArray<T> & src, PaddedPODArray<T> & dst) const
-    {
-        const auto & table = *cache.table_num_to_num;
-        size_t size = src.size();
-        dst.resize(size);
-        for (size_t i = 0; i < size; ++i)
-        {
-            const auto * it = table.find(bit_cast<UInt64>(src[i]));
-            if (it)
-            {
-                if constexpr (std::endian::native == std::endian::little)
-                    memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
-                else
-                    memcpy(&dst[i], reinterpret_cast<const char *>(&it->getMapped()) + sizeof(UInt64) - sizeof(dst[i]), sizeof(dst[i]));
-            }
-            else
-                dst[i] = src[i];
-        }
-    }
-
-    template <typename T>
-    void executeImplNumToStringWithConstDefault(const PaddedPODArray<T> & src,
-        ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, StringRef dst_default) const
-    {
-        const auto & table = *cache.table_num_to_string;
-        size_t size = src.size();
-        dst_offsets.resize(size);
-        ColumnString::Offset current_dst_offset = 0;
-        for (size_t i = 0; i < size; ++i)
-        {
-            const auto * it = table.find(bit_cast<UInt64>(src[i]));
-            StringRef ref = it ? it->getMapped() : dst_default;
-            dst_data.resize(current_dst_offset + ref.size);
-            memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
-            current_dst_offset += ref.size;
-            dst_offsets[i] = current_dst_offset;
-        }
-    }
-
-    template <typename T>
-    void executeImplNumToStringWithNonConstDefault(const PaddedPODArray<T> & src,
-        ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets,
-        const ColumnString::Chars & dst_default_data, const ColumnString::Offsets & dst_default_offsets) const
-    {
-        const auto & table = *cache.table_num_to_string;
-        size_t size = src.size();
-        dst_offsets.resize(size);
-        ColumnString::Offset current_dst_offset = 0;
-        ColumnString::Offset current_dst_default_offset = 0;
-        for (size_t i = 0; i < size; ++i)
-        {
-            const auto * it = table.find(bit_cast<UInt64>(src[i]));
-            StringRef ref;
-
-            if (it)
-                ref = it->getMapped();
-            else
-            {
-                ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
-                ref.size = dst_default_offsets[i] - current_dst_default_offset;
-            }
-
-            dst_data.resize(current_dst_offset + ref.size);
-            memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
-            current_dst_offset += ref.size;
-            current_dst_default_offset = dst_default_offsets[i];
-            dst_offsets[i] = current_dst_offset;
-        }
-    }
-
-    template <typename U>
-    void executeImplStringToNumWithConstDefault(
-        const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
-        PaddedPODArray<U> & dst, U dst_default) const
-    {
-        const auto & table = *cache.table_string_to_num;
-        size_t size = src_offsets.size();
-        dst.resize(size);
-        ColumnString::Offset current_src_offset = 0;
-        for (size_t i = 0; i < size; ++i)
-        {
-            StringRef ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset};
-            current_src_offset = src_offsets[i];
-            const auto * it = table.find(ref);
-            if (it)
-            {
-                if constexpr (std::endian::native == std::endian::little)
-                    memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
-                else
-                    memcpy(&dst[i], reinterpret_cast<const char *>(&it->getMapped()) + sizeof(UInt64) - sizeof(dst[i]), sizeof(dst[i]));
-            }
-            else
-                dst[i] = dst_default;
-        }
-    }
-
-    template <typename U, typename V>
-    void executeImplStringToNumWithNonConstDefault(
-        const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
-        PaddedPODArray<U> & dst, const PaddedPODArray<V> & dst_default) const
-    {
-        const auto & table = *cache.table_string_to_num;
-        size_t size = src_offsets.size();
-        dst.resize(size);
-        ColumnString::Offset current_src_offset = 0;
-        for (size_t i = 0; i < size; ++i)
-        {
-            StringRef ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset};
-            current_src_offset = src_offsets[i];
-            const auto * it = table.find(ref);
-            if (it)
-            {
-                if constexpr (std::endian::native == std::endian::little)
-                    memcpy(&dst[i], &it->getMapped(), sizeof(dst[i]));
-                else
-                    memcpy(&dst[i], reinterpret_cast<const char *>(&it->getMapped()) + sizeof(UInt64) - sizeof(dst[i]), sizeof(dst[i]));
-            }
-            else if constexpr (is_decimal<U>)
-                dst[i] = static_cast<typename U::NativeType>(dst_default[i]);
-            else
-                dst[i] = static_cast<U>(dst_default[i]); // NOLINT(bugprone-signed-char-misuse,cert-str34-c)
-        }
-    }
-
-    template <bool with_default>
-    void executeImplStringToStringWithOrWithoutConstDefault(
-        const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
-        ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, StringRef dst_default) const
-    {
-        const auto & table = *cache.table_string_to_string;
-        size_t size = src_offsets.size();
-        dst_offsets.resize(size);
-        ColumnString::Offset current_src_offset = 0;
-        ColumnString::Offset current_dst_offset = 0;
-        for (size_t i = 0; i < size; ++i)
-        {
-            StringRef src_ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset};
-            current_src_offset = src_offsets[i];
-
-            const auto * it = table.find(src_ref);
-
-            StringRef dst_ref = it ? it->getMapped() : (with_default ? dst_default : src_ref);
-            dst_data.resize(current_dst_offset + dst_ref.size);
-            memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
-            current_dst_offset += dst_ref.size;
-            dst_offsets[i] = current_dst_offset;
-        }
-    }
-
-    void executeImplStringToString(
-        const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
-        ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets) const
-    {
-        executeImplStringToStringWithOrWithoutConstDefault<false>(src_data, src_offsets, dst_data, dst_offsets, {});
-    }
-
-    void executeImplStringToStringWithConstDefault(
-        const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
-        ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets, StringRef dst_default) const
-    {
-        executeImplStringToStringWithOrWithoutConstDefault<true>(src_data, src_offsets, dst_data, dst_offsets, dst_default);
-    }
-
-    void executeImplStringToStringWithNonConstDefault(
-        const ColumnString::Chars & src_data, const ColumnString::Offsets & src_offsets,
-        ColumnString::Chars & dst_data, ColumnString::Offsets & dst_offsets,
-        const ColumnString::Chars & dst_default_data, const ColumnString::Offsets & dst_default_offsets) const
-    {
-        const auto & table = *cache.table_string_to_string;
-        size_t size = src_offsets.size();
-        dst_offsets.resize(size);
-        ColumnString::Offset current_src_offset = 0;
-        ColumnString::Offset current_dst_offset = 0;
-        ColumnString::Offset current_dst_default_offset = 0;
-        for (size_t i = 0; i < size; ++i)
-        {
-            StringRef src_ref{&src_data[current_src_offset], src_offsets[i] - current_src_offset};
-            current_src_offset = src_offsets[i];
-
-            const auto * it = table.find(src_ref);
-            StringRef dst_ref;
-
-            if (it)
-                dst_ref = it->getMapped();
-            else
-            {
-                dst_ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
-                dst_ref.size = dst_default_offsets[i] - current_dst_default_offset;
-            }
-
-            dst_data.resize(current_dst_offset + dst_ref.size);
-            memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
-            current_dst_offset += dst_ref.size;
-            current_dst_default_offset = dst_default_offsets[i];
-            dst_offsets[i] = current_dst_offset;
-        }
-    }
-
-
-    /// Different versions of the hash tables to implement the mapping.
-
-    struct Cache
-    {
-        using NumToNum = HashMap<UInt64, UInt64, HashCRC32<UInt64>>;
-        using NumToString = HashMap<UInt64, StringRef, HashCRC32<UInt64>>;     /// Everywhere StringRef's with trailing zero.
-        using StringToNum = HashMap<StringRef, UInt64, StringRefHash>;
-        using StringToString = HashMap<StringRef, StringRef, StringRefHash>;
-
-        std::unique_ptr<NumToNum> table_num_to_num;
-        std::unique_ptr<NumToString> table_num_to_string;
-        std::unique_ptr<StringToNum> table_string_to_num;
-        std::unique_ptr<StringToString> table_string_to_string;
-
-        Arena string_pool;
-
-        Field const_default_value;    /// Null, if not specified.
-
-        std::atomic<bool> initialized{false};
-        std::mutex mutex;
-    };
-
-    mutable Cache cache;
-
-
-    static UInt64 bitCastToUInt64(const Field & x)
-    {
-        switch (x.getType())
-        {
-            case Field::Types::UInt64:      return x.get<UInt64>();
-            case Field::Types::Int64:       return x.get<Int64>();
-            case Field::Types::Float64:     return std::bit_cast<UInt64>(x.get<Float64>());
-            case Field::Types::Bool:        return x.get<bool>();
-            case Field::Types::Decimal32:   return x.get<DecimalField<Decimal32>>().getValue();
-            case Field::Types::Decimal64:   return x.get<DecimalField<Decimal64>>().getValue();
-            default:
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type in function 'transform'");
-        }
-    }
-
-    /// Can be called from different threads. It works only on the first call.
-    void initialize(const Array & from, const Array & to, const ColumnsWithTypeAndName & arguments) const
-    {
-        if (cache.initialized)
-            return;
-
-        const size_t size = from.size();
-        if (0 == size)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty arrays are illegal in function {}", getName());
-
-        std::lock_guard lock(cache.mutex);
-
-        if (cache.initialized)
-            return;
-
-        if (size != to.size())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second and third arguments of function {} must be arrays of same size", getName());
-
-        Array converted_to;
-        const Array * used_to = &to;
-
-        /// Whether the default value is set.
-
-        if (arguments.size() == 4)
-        {
-            const IColumn * default_col = arguments[3].column.get();
-            const ColumnConst * const_default_col = typeid_cast<const ColumnConst *>(default_col);
-
-            if (const_default_col)
-                cache.const_default_value = (*const_default_col)[0];
-
-            /// Do we need to convert the elements `to` and `default_value` to the smallest common type that is Float64?
-            bool default_col_is_float =
-                   checkColumn<ColumnFloat32>(default_col)
-                || checkColumn<ColumnFloat64>(default_col)
-                || checkColumnConst<ColumnFloat32>(default_col)
-                || checkColumnConst<ColumnFloat64>(default_col);
-
-            bool to_is_float = to[0].getType() == Field::Types::Float64;
-
-            if (default_col_is_float && !to_is_float)
-            {
-                converted_to.resize(size);
+                const size_t size = pod.size();
+                const auto & table = *cache.table_num_to_idx;
+                column_result.reserve(size);
                 for (size_t i = 0; i < size; ++i)
-                    converted_to[i] = applyVisitor(FieldVisitorConvertToNumber<Float64>(), to[i]);
-                used_to = &converted_to;
+                {
+                    const auto * it = table.find(bit_cast<UInt64>(pod[i]));
+                    if (it)
+                        column_result.insertFrom(*cache.to_columns, it->getMapped());
+                    else if (cache.default_column)
+                        column_result.insertFrom(*cache.default_column, 0);
+                    else if (default_non_const)
+                        column_result.insertFrom(*default_non_const, i);
+                    else
+                        column_result.insertFrom(*in, i);
+                }
             }
-            else if (!default_col_is_float && to_is_float)
+            return true;
+        }
+
+        template <typename T>
+        bool executeNumToString(const PaddedPODArray<T> & pod, IColumn & column_result, const ColumnPtr default_non_const) const
+        {
+            auto * out = typeid_cast<ColumnString *>(&column_result);
+            if (!out)
+                return false;
+            auto & out_offs = out->getOffsets();
+            const size_t size = pod.size();
+            out_offs.resize(size);
+            auto & out_chars = out->getChars();
+
+            const auto * to_col = reinterpret_cast<const ColumnString *>(cache.to_columns.get());
+            const auto & to_chars = to_col->getChars();
+            const auto & to_offs = to_col->getOffsets();
+            const auto & table = *cache.table_num_to_idx;
+
+            if (cache.default_column)
             {
-                if (const_default_col)
-                    cache.const_default_value = applyVisitor(FieldVisitorConvertToNumber<Float64>(), cache.const_default_value);
+                const auto * def = reinterpret_cast<const ColumnString *>(cache.default_column.get());
+                const auto & def_chars = def->getChars();
+                const auto & def_offs = def->getOffsets();
+                const auto * def_data = def_chars.data();
+                auto def_size = def_offs[0];
+                executeNumToStringHelper(table, pod, out_chars, out_offs, to_chars, to_offs, def_data, def_size, size);
+            }
+            else
+            {
+                const auto * def = reinterpret_cast<const ColumnString *>(default_non_const.get());
+                const auto & def_chars = def->getChars();
+                const auto & def_offs = def->getOffsets();
+                executeNumToStringHelper(table, pod, out_chars, out_offs, to_chars, to_offs, def_chars, def_offs, size);
+            }
+            return true;
+        }
+
+        template <typename Table, typename In, typename DefData, typename DefOffs>
+        void executeNumToStringHelper(
+            const Table & table,
+            const PaddedPODArray<In> & pod,
+            ColumnString::Chars & out_data,
+            ColumnString::Offsets & out_offsets,
+            const ColumnString::Chars & to_data,
+            const ColumnString::Offsets & to_offsets,
+            const DefData & def_data,
+            const DefOffs & def_offsets,
+            const size_t size) const
+        {
+            size_t out_cur_off = 0;
+            for (size_t i = 0; i < size; ++i)
+            {
+                const char8_t * to = nullptr;
+                size_t to_size = 0;
+                const auto * it = table.find(bit_cast<UInt64>(pod[i]));
+                if (it)
+                {
+                    const auto idx = it->getMapped();
+                    const auto start = to_offsets[idx - 1];
+                    to = &to_data[start];
+                    to_size = to_offsets[idx] - start;
+                }
+                else if constexpr (std::is_same_v<DefData, ColumnString::Chars>)
+                {
+                    const auto start = def_offsets[i - 1];
+                    to = &def_data[start];
+                    to_size = def_offsets[i] - start;
+                }
+                else
+                {
+                    to = def_data;
+                    to_size = def_offsets;
+                }
+                out_data.resize(out_cur_off + to_size);
+                memcpy(&out_data[out_cur_off], to, to_size);
+                out_cur_off += to_size;
+                out_offsets[i] = out_cur_off;
             }
         }
 
-        /// Note: Doesn't check the duplicates in the `from` array.
-
-        const IDataType & from_type = *arguments[0].type;
-
-        if (from[0].getType() != Field::Types::String)
+        template <typename T, typename U>
+        bool executeNumToNum(
+            const PaddedPODArray<U> & pod, IColumn & column_result, const ColumnPtr default_non_const, const UInt32 in_scale) const
         {
-            if (to[0].getType() != Field::Types::String)
+            auto * out = typeid_cast<T *>(&column_result);
+            if (!out)
+                return false;
+            auto & out_pod = out->getData();
+            const size_t size = pod.size();
+            out_pod.resize(size);
+            UInt32 out_scale = 0;
+            if constexpr (std::is_same_v<ColumnDecimal<Decimal32>, T> || std::is_same_v<ColumnDecimal<Decimal64>, T>)
+                out_scale = out->getScale();
+
+            const auto & to_pod = reinterpret_cast<const T *>(cache.to_columns.get())->getData();
+            const auto & table = *cache.table_num_to_idx;
+            if (cache.default_column)
             {
-                cache.table_num_to_num = std::make_unique<Cache::NumToNum>();
-                auto & table = *cache.table_num_to_num;
+                const auto const_def = reinterpret_cast<const T *>(cache.default_column.get())->getData()[0];
+                executeNumToNumHelper(table, pod, out_pod, to_pod, const_def, size, out_scale, out_scale);
+            }
+            else if (default_non_const)
+            {
+                const auto & nconst_def = reinterpret_cast<const T *>(default_non_const.get())->getData();
+                executeNumToNumHelper(table, pod, out_pod, to_pod, nconst_def, size, out_scale, out_scale);
+            }
+            else
+                executeNumToNumHelper(table, pod, out_pod, to_pod, pod, size, out_scale, in_scale);
+            return true;
+        }
+
+        template <typename Table, typename In, typename Out, typename Def>
+        void executeNumToNumHelper(
+            const Table & table,
+            const PaddedPODArray<In> & pod,
+            PaddedPODArray<Out> & out_pod,
+            const PaddedPODArray<Out> & to_pod,
+            const Def & def,
+            const size_t size,
+            const UInt32 out_scale,
+            const UInt32 def_scale) const
+        {
+            for (size_t i = 0; i < size; ++i)
+            {
+                const auto * it = table.find(bit_cast<UInt64>(pod[i]));
+                if (it)
+                {
+                    const auto idx = it->getMapped();
+                    out_pod[i] = to_pod[idx];
+                }
+                else if constexpr (std::is_same_v<Def, Out>)
+                    out_pod[i] = def;
+                else if constexpr (is_decimal<Out> && !is_decimal<typename Def::value_type>)
+                    out_pod[i] = DecimalUtils::decimalFromComponents<Out>(static_cast<typename Out::NativeType>(def[i]), 0, out_scale);
+                else if constexpr (is_decimal<Out>)
+                {
+                    if (def_scale == out_scale)
+                        out_pod[i] = static_cast<typename Out::NativeType>(def[i]);
+                    else
+                    {
+                        const auto whole = static_cast<typename Out::NativeType>(DecimalUtils::getWholePart(def[i], def_scale));
+                        const auto fract = static_cast<typename Out::NativeType>(DecimalUtils::getFractionalPart(def[i], def_scale));
+                        out_pod[i] = DecimalUtils::decimalFromComponents<Out>(whole, fract, out_scale);
+                    }
+                }
+                else
+                    out_pod[i] = static_cast<Out>(def[i]); // NOLINT(bugprone-signed-char-misuse,cert-str34-c)
+            }
+        }
+
+        bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const
+        {
+            const auto * const in = checkAndGetColumn<ColumnString>(in_untyped);
+            if (!in)
+                return false;
+            const auto & data = in->getChars();
+            const auto & offsets = in->getOffsets();
+
+            if (!executeStringToString(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnVector<UInt8>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnVector<UInt16>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnVector<UInt32>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnVector<UInt64>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnVector<Int8>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnVector<Int16>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnVector<Int32>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnVector<Int64>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnVector<Float32>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnVector<Float64>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnDecimal<Decimal32>>(data, offsets, column_result, default_non_const)
+                && !executeStringToNum<ColumnDecimal<Decimal64>>(data, offsets, column_result, default_non_const))
+            {
+                const size_t size = offsets.size();
+                const auto & table = *cache.table_string_to_idx;
+                ColumnString::Offset current_offset = 0;
+                for (size_t i = 0; i < size; ++i)
+                {
+                    const StringRef ref{&data[current_offset], offsets[i] - current_offset};
+                    current_offset = offsets[i];
+                    const auto * it = table.find(ref);
+                    if (it)
+                        column_result.insertFrom(*cache.to_columns, it->getMapped());
+                    else if (cache.default_column)
+                        column_result.insertFrom(*cache.default_column, 0);
+                    else if (default_non_const)
+                        column_result.insertFrom(*default_non_const, 0);
+                    else
+                        column_result.insertFrom(*in, i);
+                }
+            }
+            return true;
+        }
+
+        bool executeStringToString(
+            const ColumnString::Chars & data,
+            const ColumnString::Offsets & offsets,
+            IColumn & column_result,
+            const ColumnPtr default_non_const) const
+        {
+            auto * out = typeid_cast<ColumnString *>(&column_result);
+            if (!out)
+                return false;
+            auto & out_offs = out->getOffsets();
+            const size_t size = offsets.size();
+            out_offs.resize(size);
+            auto & out_chars = out->getChars();
+
+            const auto * to_col = reinterpret_cast<const ColumnString *>(cache.to_columns.get());
+            const auto & to_chars = to_col->getChars();
+            const auto & to_offs = to_col->getOffsets();
+
+            const auto & table = *cache.table_string_to_idx;
+            if (cache.default_column)
+            {
+                const auto * def = reinterpret_cast<const ColumnString *>(cache.default_column.get());
+                const auto & def_chars = def->getChars();
+                const auto & def_offs = def->getOffsets();
+                const auto * def_data = def_chars.data();
+                auto def_size = def_offs[0];
+                executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, def_data, def_size, size);
+            }
+            else if (default_non_const)
+            {
+                const auto * def = reinterpret_cast<const ColumnString *>(default_non_const.get());
+                const auto & def_chars = def->getChars();
+                const auto & def_offs = def->getOffsets();
+                executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, def_chars, def_offs, size);
+            }
+            else
+            {
+                executeStringToStringHelper(table, data, offsets, out_chars, out_offs, to_chars, to_offs, data, offsets, size);
+            }
+            return true;
+        }
+
+        template <typename Table, typename DefData, typename DefOffs>
+        void executeStringToStringHelper(
+            const Table & table,
+            const ColumnString::Chars & data,
+            const ColumnString::Offsets & offsets,
+            ColumnString::Chars & out_data,
+            ColumnString::Offsets & out_offsets,
+            const ColumnString::Chars & to_data,
+            const ColumnString::Offsets & to_offsets,
+            const DefData & def_data,
+            const DefOffs & def_offsets,
+            const size_t size) const
+        {
+            ColumnString::Offset current_offset = 0;
+            size_t out_cur_off = 0;
+            for (size_t i = 0; i < size; ++i)
+            {
+                const char8_t * to = nullptr;
+                size_t to_size = 0;
+                const StringRef ref{&data[current_offset], offsets[i] - current_offset};
+                current_offset = offsets[i];
+                const auto * it = table.find(ref);
+                if (it)
+                {
+                    const auto idx = it->getMapped();
+                    const auto start = to_offsets[idx - 1];
+                    to = &to_data[start];
+                    to_size = to_offsets[idx] - start;
+                }
+                else if constexpr (std::is_same_v<DefData, ColumnString::Chars>)
+                {
+                    const auto start = def_offsets[i - 1];
+                    to = &def_data[start];
+                    to_size = def_offsets[i] - start;
+                }
+                else
+                {
+                    to = def_data;
+                    to_size = def_offsets;
+                }
+                out_data.resize(out_cur_off + to_size);
+                memcpy(&out_data[out_cur_off], to, to_size);
+                out_cur_off += to_size;
+                out_offsets[i] = out_cur_off;
+            }
+        }
+
+        template <typename T>
+        bool executeStringToNum(
+            const ColumnString::Chars & data,
+            const ColumnString::Offsets & offsets,
+            IColumn & column_result,
+            const ColumnPtr default_non_const) const
+        {
+            auto * out = typeid_cast<T *>(&column_result);
+            if (!out)
+                return false;
+            auto & out_pod = out->getData();
+            const size_t size = offsets.size();
+            out_pod.resize(size);
+
+            const auto & to_pod = reinterpret_cast<const T *>(cache.to_columns.get())->getData();
+            const auto & table = *cache.table_string_to_idx;
+            if (cache.default_column)
+            {
+                const auto const_def = reinterpret_cast<const T *>(cache.default_column.get())->getData()[0];
+                executeStringToNumHelper(table, data, offsets, out_pod, to_pod, const_def, size);
+            }
+            else
+            {
+                const auto & nconst_def = reinterpret_cast<const T *>(default_non_const.get())->getData();
+                executeStringToNumHelper(table, data, offsets, out_pod, to_pod, nconst_def, size);
+            }
+            return true;
+        }
+
+        template <typename Table, typename Out, typename Def>
+        void executeStringToNumHelper(
+            const Table & table,
+            const ColumnString::Chars & data,
+            const ColumnString::Offsets & offsets,
+            PaddedPODArray<Out> & out_pod,
+            const PaddedPODArray<Out> & to_pod,
+            const Def & def,
+            const size_t size) const
+        {
+            ColumnString::Offset current_offset = 0;
+            for (size_t i = 0; i < size; ++i)
+            {
+                const StringRef ref{&data[current_offset], offsets[i] - current_offset};
+                current_offset = offsets[i];
+                const auto * it = table.find(ref);
+                if (it)
+                {
+                    const auto idx = it->getMapped();
+                    out_pod[i] = to_pod[idx];
+                }
+                else if constexpr (std::is_same_v<Def, Out>)
+                    out_pod[i] = def;
+                else if constexpr (is_decimal<Out>)
+                    out_pod[i] = static_cast<typename Out::NativeType>(def[i]);
+                else
+                    out_pod[i] = static_cast<Out>(def[i]); // NOLINT(bugprone-signed-char-misuse,cert-str34-c)
+            }
+        }
+
+        /// Different versions of the hash tables to implement the mapping.
+
+        struct Cache
+        {
+            using NumToIdx = HashMap<UInt64, size_t, HashCRC32<UInt64>>;
+            using StringToIdx = HashMap<StringRef, size_t, StringRefHash>;
+
+            std::unique_ptr<NumToIdx> table_num_to_idx;
+            std::unique_ptr<StringToIdx> table_string_to_idx;
+
+            ColumnPtr to_columns;
+            ColumnPtr default_column;
+
+            Arena string_pool;
+
+            std::atomic<bool> initialized{false};
+            std::mutex mutex;
+        };
+
+        mutable Cache cache;
+
+
+        static UInt64 bitCastToUInt64(const Field & x)
+        {
+            switch (x.getType())
+            {
+                case Field::Types::UInt64:
+                    return x.get<UInt64>();
+                case Field::Types::Int64:
+                    return x.get<Int64>();
+                case Field::Types::Float64:
+                    return std::bit_cast<UInt64>(x.get<Float64>());
+                case Field::Types::Bool:
+                    return x.get<bool>();
+                case Field::Types::Decimal32:
+                    return x.get<DecimalField<Decimal32>>().getValue();
+                case Field::Types::Decimal64:
+                    return x.get<DecimalField<Decimal64>>().getValue();
+                default:
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type in function 'transform'");
+            }
+        }
+
+        static void checkAllowedType(const DataTypePtr & dt)
+        {
+            if (dt->isNullable())
+                checkAllowedTypeHelper(static_cast<const DataTypeNullable *>(dt.get())->getNestedType());
+            else
+                checkAllowedTypeHelper(dt);
+        }
+
+        static void checkAllowedTypeHelper(const DataTypePtr & dt)
+        {
+            if (isStringOrFixedString(dt))
+                return;
+            auto dtsize = dt->getMaximumSizeOfValueInMemory();
+            if (dtsize <= sizeof(UInt64))
+                return;
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} in function 'transform'", dt->getName());
+        }
+
+        /// Can be called from different threads. It works only on the first call.
+        void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
+        {
+            const ColumnConst * array_from = checkAndGetColumnConst<ColumnArray>(arguments[1].column.get());
+            const ColumnConst * array_to = checkAndGetColumnConst<ColumnArray>(arguments[2].column.get());
+
+            if (!array_from || !array_to)
+                throw Exception(
+                    ErrorCodes::ILLEGAL_COLUMN, "Second and third arguments of function {} must be constant arrays.", getName());
+
+            if (cache.initialized)
+                return;
+
+            const auto & from = array_from->getValue<Array>();
+            const size_t size = from.size();
+            if (0 == size)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty arrays are illegal in function {}", getName());
+
+            std::lock_guard lock(cache.mutex);
+
+            if (cache.initialized)
+                return;
+
+            const auto & to = array_to->getValue<Array>();
+            if (size != to.size())
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS, "Second and third arguments of function {} must be arrays of same size", getName());
+
+            /// Whether the default value is set.
+
+            if (arguments.size() == 4)
+            {
+                const IColumn * default_col = arguments[3].column.get();
+                if (default_col && isColumnConst(*default_col))
+                {
+                    auto default_column = result_type->createColumn();
+                    if (!default_col->onlyNull())
+                    {
+                        Field f = convertFieldToType((*default_col)[0], *result_type);
+                        default_column->insert(f);
+                    }
+                    else
+                        default_column->insertDefault();
+                    cache.default_column = std::move(default_column);
+                }
+            }
+
+            /// Note: Doesn't check the duplicates in the `from` array.
+
+            const IDataType & from_type = *arguments[0].type;
+
+            if (from[0].getType() != Field::Types::String)
+            {
+                cache.table_num_to_idx = std::make_unique<Cache::NumToIdx>();
+                auto & table = *cache.table_num_to_idx;
                 for (size_t i = 0; i < size; ++i)
                 {
                     Field key = convertFieldToType(from[i], from_type);
@@ -1204,55 +715,31 @@ private:
                         continue;
 
                     /// Field may be of Float type, but for the purpose of bitwise equality we can treat them as UInt64
-                    table[bitCastToUInt64(key)] = bitCastToUInt64((*used_to)[i]);
+                    table[bitCastToUInt64(key)] = i;
                 }
             }
             else
             {
-                cache.table_num_to_string = std::make_unique<Cache::NumToString>();
-                auto & table = *cache.table_num_to_string;
-                for (size_t i = 0; i < size; ++i)
-                {
-                    Field key = convertFieldToType(from[i], from_type);
-                    if (key.isNull())
-                        continue;
-
-                    const String & str_to = to[i].get<const String &>();
-                    StringRef ref{cache.string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
-                    table[bitCastToUInt64(key)] = ref;
-                }
-            }
-        }
-        else
-        {
-            if (to[0].getType() != Field::Types::String)
-            {
-                cache.table_string_to_num = std::make_unique<Cache::StringToNum>();
-                auto & table = *cache.table_string_to_num;
+                cache.table_string_to_idx = std::make_unique<Cache::StringToIdx>();
+                auto & table = *cache.table_string_to_idx;
                 for (size_t i = 0; i < size; ++i)
                 {
                     const String & str_from = from[i].get<const String &>();
                     StringRef ref{cache.string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
-                    table[ref] = bitCastToUInt64((*used_to)[i]);
+                    table[ref] = i;
                 }
             }
-            else
-            {
-                cache.table_string_to_string = std::make_unique<Cache::StringToString>();
-                auto & table = *cache.table_string_to_string;
-                for (size_t i = 0; i < size; ++i)
-                {
-                    const String & str_from = from[i].get<const String &>();
-                    const String & str_to = to[i].get<const String &>();
-                    StringRef ref_from{cache.string_pool.insert(str_from.data(), str_from.size() + 1), str_from.size() + 1};
-                    StringRef ref_to{cache.string_pool.insert(str_to.data(), str_to.size() + 1), str_to.size() + 1};
-                    table[ref_from] = ref_to;
-                }
-            }
-        }
 
-        cache.initialized = true;
-    }
+            auto to_columns = result_type->createColumn();
+            for (size_t i = 0; i < size; ++i)
+            {
+                Field to_value = convertFieldToType(to[i], *result_type);
+                to_columns->insert(to_value);
+            }
+            cache.to_columns = std::move(to_columns);
+
+            cache.initialized = true;
+        }
 };
 
 }
diff --git a/src/IO/BackupsIOThreadPool.cpp b/src/IO/BackupsIOThreadPool.cpp
deleted file mode 100644
index e135ef66ffb..00000000000
--- a/src/IO/BackupsIOThreadPool.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-#include <IO/BackupsIOThreadPool.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/ThreadPool.h>
-#include <Core/Field.h>
-
-namespace CurrentMetrics
-{
-    extern const Metric BackupsIOThreads;
-    extern const Metric BackupsIOThreadsActive;
-}
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-std::unique_ptr<ThreadPool> BackupsIOThreadPool::instance;
-
-void BackupsIOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
-{
-    if (instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The BackupsIO thread pool is initialized twice");
-    }
-
-    instance = std::make_unique<ThreadPool>(
-        CurrentMetrics::BackupsIOThreads,
-        CurrentMetrics::BackupsIOThreadsActive,
-        max_threads,
-        max_free_threads,
-        queue_size,
-        /* shutdown_on_exception= */ false);
-}
-
-ThreadPool & BackupsIOThreadPool::get()
-{
-    if (!instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The BackupsIO thread pool is not initialized");
-    }
-
-    return *instance;
-}
-
-}
diff --git a/src/IO/BackupsIOThreadPool.h b/src/IO/BackupsIOThreadPool.h
deleted file mode 100644
index 745bf267300..00000000000
--- a/src/IO/BackupsIOThreadPool.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-
-#include <Common/ThreadPool_fwd.h>
-#include <cstdlib>
-#include <memory>
-
-namespace DB
-{
-
-/*
- * ThreadPool used for the Backup IO.
- */
-class BackupsIOThreadPool
-{
-    static std::unique_ptr<ThreadPool> instance;
-
-public:
-    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
-    static ThreadPool & get();
-};
-
-}
diff --git a/src/IO/BrotliReadBuffer.cpp b/src/IO/BrotliReadBuffer.cpp
index 56ef2b5446b..1863cef8a39 100644
--- a/src/IO/BrotliReadBuffer.cpp
+++ b/src/IO/BrotliReadBuffer.cpp
@@ -3,6 +3,7 @@
 #if USE_BROTLI
 #    include <brotli/decode.h>
 #    include "BrotliReadBuffer.h"
+#    include <IO/WithFileName.h>
 
 namespace DB
 {
@@ -60,7 +61,10 @@ bool BrotliReadBuffer::nextImpl()
 
         if (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in->eof()))
         {
-            throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error");
+            throw Exception(
+                ErrorCodes::BROTLI_READ_FAILED,
+                "brotli decode error{}",
+                getExceptionEntryWithFileName(*in));
         }
 
         out_capacity = internal_buffer.size();
@@ -83,13 +87,19 @@ bool BrotliReadBuffer::nextImpl()
         }
         else
         {
-            throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error");
+            throw Exception(
+                ErrorCodes::BROTLI_READ_FAILED,
+                "brotli decode error{}",
+                getExceptionEntryWithFileName(*in));
         }
     }
 
     if (brotli->result == BROTLI_DECODER_RESULT_ERROR)
     {
-        throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error");
+        throw Exception(
+            ErrorCodes::BROTLI_READ_FAILED,
+            "brotli decode error{}",
+            getExceptionEntryWithFileName(*in));
     }
 
     return true;
diff --git a/src/IO/Bzip2ReadBuffer.cpp b/src/IO/Bzip2ReadBuffer.cpp
index 9970edcbcf3..45ce8f45232 100644
--- a/src/IO/Bzip2ReadBuffer.cpp
+++ b/src/IO/Bzip2ReadBuffer.cpp
@@ -3,6 +3,7 @@
 #if USE_BZIP2
 #    include <IO/Bzip2ReadBuffer.h>
 #    include <bzlib.h>
+#    include <IO/WithFileName.h>
 
 namespace DB
 {
@@ -118,13 +119,17 @@ bool Bzip2ReadBuffer::nextImpl()
     if (ret != BZ_OK)
         throw Exception(
             ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
-            "bzip2 stream decoder failed: error code: {}",
-            ret);
+            "bzip2 stream decoder failed: error code: {}{}",
+            ret,
+            getExceptionEntryWithFileName(*in));
 
     if (in->eof())
     {
         eof_flag = true;
-        throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of bzip2 archive");
+        throw Exception(
+            ErrorCodes::UNEXPECTED_END_OF_FILE,
+            "Unexpected end of bzip2 archive{}",
+            getExceptionEntryWithFileName(*in));
     }
 
     return true;
diff --git a/src/IO/HTTPChunkedReadBuffer.cpp b/src/IO/HTTPChunkedReadBuffer.cpp
index 65ccad4aab7..29034b35e16 100644
--- a/src/IO/HTTPChunkedReadBuffer.cpp
+++ b/src/IO/HTTPChunkedReadBuffer.cpp
@@ -33,7 +33,7 @@ size_t HTTPChunkedReadBuffer::readChunkHeader()
     } while (!in->eof() && isHexDigit(*in->position()));
 
     if (res > max_chunk_size)
-        throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Chunk size exceeded the limit");
+        throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Chunk size exceeded the limit (max size: {})", max_chunk_size);
 
     /// NOTE: If we want to read any chunk extensions, it should be done here.
 
diff --git a/src/IO/HadoopSnappyReadBuffer.cpp b/src/IO/HadoopSnappyReadBuffer.cpp
index 6ba31997b37..28588854268 100644
--- a/src/IO/HadoopSnappyReadBuffer.cpp
+++ b/src/IO/HadoopSnappyReadBuffer.cpp
@@ -11,6 +11,8 @@
 
 #include "HadoopSnappyReadBuffer.h"
 
+#include <IO/WithFileName.h>
+
 namespace DB
 {
 namespace ErrorCodes
@@ -89,9 +91,8 @@ inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readCompressedLength(siz
     {
         auto status = readLength(avail_in, next_in, &compressed_length);
         if (unlikely(compressed_length > 0 && static_cast<size_t>(compressed_length) > sizeof(buffer)))
-            throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED,
-                            "Too large snappy compressed block. buffer size: {}, compressed block size: {}",
-                            sizeof(buffer), compressed_length);
+            return Status::TOO_LARGE_COMPRESSED_BLOCK;
+
         return status;
     }
     return Status::OK;
@@ -196,7 +197,11 @@ bool HadoopSnappyReadBuffer::nextImpl()
 
         if (decoder->result == Status::NEEDS_MORE_INPUT && (!in_available || in->eof()))
         {
-            throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "hadoop snappy decode error: {}", statusToString(decoder->result));
+            throw Exception(
+                ErrorCodes::SNAPPY_UNCOMPRESS_FAILED,
+                "hadoop snappy decode error: {}{}",
+                statusToString(decoder->result),
+                getExceptionEntryWithFileName(*in));
         }
 
         out_capacity = internal_buffer.size();
@@ -219,9 +224,13 @@ bool HadoopSnappyReadBuffer::nextImpl()
         }
         return true;
     }
-    else if (decoder->result == Status::INVALID_INPUT || decoder->result == Status::BUFFER_TOO_SMALL)
+    else if (decoder->result != Status::NEEDS_MORE_INPUT)
     {
-        throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "hadoop snappy decode error: {}", statusToString(decoder->result));
+        throw Exception(
+            ErrorCodes::SNAPPY_UNCOMPRESS_FAILED,
+            "hadoop snappy decode error: {}{}",
+            statusToString(decoder->result),
+            getExceptionEntryWithFileName(*in));
     }
     return true;
 }
diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h
index 6d1b95f6813..b5fb1fec093 100644
--- a/src/IO/HadoopSnappyReadBuffer.h
+++ b/src/IO/HadoopSnappyReadBuffer.h
@@ -29,6 +29,7 @@ public:
         INVALID_INPUT = 1,
         BUFFER_TOO_SMALL = 2,
         NEEDS_MORE_INPUT = 3,
+        TOO_LARGE_COMPRESSED_BLOCK = 4,
     };
 
     HadoopSnappyDecoder() = default;
@@ -84,6 +85,8 @@ public:
                 return "BUFFER_TOO_SMALL";
             case Status::NEEDS_MORE_INPUT:
                 return "NEEDS_MORE_INPUT";
+            case Status::TOO_LARGE_COMPRESSED_BLOCK:
+                return "TOO_LARGE_COMPRESSED_BLOCK";
         }
         UNREACHABLE();
     }
diff --git a/src/IO/IOThreadPool.cpp b/src/IO/IOThreadPool.cpp
deleted file mode 100644
index 6765deff4d4..00000000000
--- a/src/IO/IOThreadPool.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-#include <IO/IOThreadPool.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/ThreadPool.h>
-#include <Core/Field.h>
-
-namespace CurrentMetrics
-{
-    extern const Metric IOThreads;
-    extern const Metric IOThreadsActive;
-}
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-std::unique_ptr<ThreadPool> IOThreadPool::instance;
-
-void IOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
-{
-    if (instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is initialized twice");
-    }
-
-    instance = std::make_unique<ThreadPool>(
-        CurrentMetrics::IOThreads,
-        CurrentMetrics::IOThreadsActive,
-        max_threads,
-        max_free_threads,
-        queue_size,
-        /* shutdown_on_exception= */ false);
-}
-
-ThreadPool & IOThreadPool::get()
-{
-    if (!instance)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is not initialized");
-    }
-
-    return *instance;
-}
-
-}
diff --git a/src/IO/IOThreadPool.h b/src/IO/IOThreadPool.h
deleted file mode 100644
index cfe755ed45a..00000000000
--- a/src/IO/IOThreadPool.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-
-#include <Common/ThreadPool_fwd.h>
-#include <cstdlib>
-#include <memory>
-
-namespace DB
-{
-
-/*
- * ThreadPool used for the IO.
- */
-class IOThreadPool
-{
-    static std::unique_ptr<ThreadPool> instance;
-
-public:
-    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
-    static ThreadPool & get();
-};
-
-}
diff --git a/src/IO/LZMAInflatingReadBuffer.cpp b/src/IO/LZMAInflatingReadBuffer.cpp
index 6d40dafd517..a6f3c74ae73 100644
--- a/src/IO/LZMAInflatingReadBuffer.cpp
+++ b/src/IO/LZMAInflatingReadBuffer.cpp
@@ -1,4 +1,5 @@
 #include <IO/LZMAInflatingReadBuffer.h>
+#include <IO/WithFileName.h>
 
 namespace DB
 {
@@ -78,18 +79,20 @@ bool LZMAInflatingReadBuffer::nextImpl()
         {
             throw Exception(
                 ErrorCodes::LZMA_STREAM_DECODER_FAILED,
-                "lzma decoder finished, but input stream has not exceeded: error code: {}; lzma version: {}",
+                "lzma decoder finished, but input stream has not exceeded: error code: {}; lzma version: {}{}",
                 ret,
-                LZMA_VERSION_STRING);
+                LZMA_VERSION_STRING,
+                getExceptionEntryWithFileName(*in));
         }
     }
 
     if (ret != LZMA_OK)
         throw Exception(
             ErrorCodes::LZMA_STREAM_DECODER_FAILED,
-            "lzma_stream_decoder failed: error code: error codeL {}; lzma version: {}",
+            "lzma_stream_decoder failed: error code: error code {}; lzma version: {}{}",
             ret,
-            LZMA_VERSION_STRING);
+            LZMA_VERSION_STRING,
+            getExceptionEntryWithFileName(*in));
 
     return true;
 }
diff --git a/src/IO/Lz4InflatingReadBuffer.cpp b/src/IO/Lz4InflatingReadBuffer.cpp
index 049f3a4d15a..eaa71048e70 100644
--- a/src/IO/Lz4InflatingReadBuffer.cpp
+++ b/src/IO/Lz4InflatingReadBuffer.cpp
@@ -1,4 +1,5 @@
 #include <IO/Lz4InflatingReadBuffer.h>
+#include <IO/WithFileName.h>
 
 namespace DB
 {
@@ -72,9 +73,10 @@ bool Lz4InflatingReadBuffer::nextImpl()
     if (LZ4F_isError(ret))
         throw Exception(
             ErrorCodes::LZ4_DECODER_FAILED,
-            "LZ4 decompression failed. LZ4F version: {}. Error: {}",
+            "LZ4 decompression failed. LZ4F version: {}. Error: {}{}",
             LZ4F_VERSION,
-            LZ4F_getErrorName(ret));
+            LZ4F_getErrorName(ret),
+            getExceptionEntryWithFileName(*in));
 
     if (in->eof())
     {
diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp
index e0bca52d333..93e2c46b080 100644
--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@@ -203,11 +203,13 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence)
         return offset_;
 
     if (impl && restricted_seek)
+    {
         throw Exception(
-                        ErrorCodes::CANNOT_SEEK_THROUGH_FILE,
-                        "Seek is allowed only before first read attempt from the buffer (current offset: "
-                        "{}, new offset: {}, reading until position: {}, available: {})",
-                        getPosition(), offset_, read_until_position, available());
+            ErrorCodes::CANNOT_SEEK_THROUGH_FILE,
+            "Seek is allowed only before first read attempt from the buffer (current offset: "
+            "{}, new offset: {}, reading until position: {}, available: {})",
+            getPosition(), offset_, read_until_position, available());
+    }
 
     if (whence != SEEK_SET)
         throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed.");
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 97880c8f598..32338552b66 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -28,7 +28,6 @@
 #include <Common/Allocator.h>
 #include <Common/Exception.h>
 #include <Common/StringUtils/StringUtils.h>
-#include <Common/Arena.h>
 #include <Common/intExp.h>
 
 #include <Formats/FormatSettings.h>
@@ -137,22 +136,6 @@ inline void readStringBinary(std::string & s, ReadBuffer & buf, size_t max_strin
     buf.readStrict(s.data(), size);
 }
 
-
-inline StringRef readStringBinaryInto(Arena & arena, ReadBuffer & buf)
-{
-    size_t size = 0;
-    readVarUInt(size, buf);
-
-    if (unlikely(size > DEFAULT_MAX_STRING_SIZE))
-        throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size.");
-
-    char * data = arena.alloc(size);
-    buf.readStrict(data, size);
-
-    return StringRef(data, size);
-}
-
-
 template <typename T>
 void readVectorBinary(std::vector<T> & v, ReadBuffer & buf)
 {
diff --git a/src/IO/ReadHelpersArena.h b/src/IO/ReadHelpersArena.h
new file mode 100644
index 00000000000..b88d5c037d4
--- /dev/null
+++ b/src/IO/ReadHelpersArena.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <IO/ReadBuffer.h>
+#include <IO/ReadHelpers.h>
+#include <IO/VarInt.h>
+#include <base/StringRef.h>
+#include <Common/Arena.h>
+
+
+namespace DB
+{
+
+
+namespace ErrorCodes
+{
+    extern const int TOO_LARGE_STRING_SIZE;
+}
+
+inline StringRef readStringBinaryInto(Arena & arena, ReadBuffer & buf)
+{
+    size_t size = 0;
+    readVarUInt(size, buf);
+
+    if (unlikely(size > DEFAULT_MAX_STRING_SIZE))
+        throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size.");
+
+    char * data = arena.alloc(size);
+    buf.readStrict(data, size);
+
+    return StringRef(data, size);
+}
+
+}
diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h
index fc229ada59c..d1551b16324 100644
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@@ -68,7 +68,9 @@ struct ReadSettings
     /// Method to use reading from remote filesystem.
     RemoteFSReadMethod remote_fs_method = RemoteFSReadMethod::threadpool;
 
-    size_t local_fs_buffer_size = DBMS_DEFAULT_BUFFER_SIZE;
+    /// https://eklitzke.org/efficient-file-copying-on-linux
+    size_t local_fs_buffer_size = 128 * 1024;
+
     size_t remote_fs_buffer_size = DBMS_DEFAULT_BUFFER_SIZE;
     size_t prefetch_buffer_size = DBMS_DEFAULT_BUFFER_SIZE;
 
@@ -96,10 +98,6 @@ struct ReadSettings
     bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false;
     bool enable_filesystem_cache_log = false;
     bool is_file_cache_persistent = false; /// Some files can be made non-evictable.
-    /// Some buffers which read via thread pool can also do caching in threadpool
-    /// (instead of caching the result outside of threadpool). By default, if they support it,
-    /// they will do it. But this behaviour can be changed with this setting.
-    bool enable_filesystem_cache_on_lower_level = true;
 
     size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024);
     bool skip_download_if_exceeds_query_cache = true;
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 98c48076bdc..53972e3de0b 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -96,6 +96,7 @@ void verifyClientConfiguration(const Aws::Client::ClientConfiguration & client_c
 
 std::unique_ptr<Client> Client::create(
     size_t max_redirects_,
+    ServerSideEncryptionKMSConfig sse_kms_config_,
     const std::shared_ptr<Aws::Auth::AWSCredentialsProvider> & credentials_provider,
     const Aws::Client::ClientConfiguration & client_configuration,
     Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
@@ -103,7 +104,7 @@ std::unique_ptr<Client> Client::create(
 {
     verifyClientConfiguration(client_configuration);
     return std::unique_ptr<Client>(
-        new Client(max_redirects_, credentials_provider, client_configuration, sign_payloads, use_virtual_addressing));
+        new Client(max_redirects_, std::move(sse_kms_config_), credentials_provider, client_configuration, sign_payloads, use_virtual_addressing));
 }
 
 std::unique_ptr<Client> Client::create(const Client & other)
@@ -113,12 +114,14 @@ std::unique_ptr<Client> Client::create(const Client & other)
 
 Client::Client(
     size_t max_redirects_,
+    ServerSideEncryptionKMSConfig sse_kms_config_,
     const std::shared_ptr<Aws::Auth::AWSCredentialsProvider> & credentials_provider,
     const Aws::Client::ClientConfiguration & client_configuration,
     Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
     bool use_virtual_addressing)
     : Aws::S3::S3Client(credentials_provider, client_configuration, std::move(sign_payloads), use_virtual_addressing)
     , max_redirects(max_redirects_)
+    , sse_kms_config(std::move(sse_kms_config_))
     , log(&Poco::Logger::get("S3Client"))
 {
     auto * endpoint_provider = dynamic_cast<Aws::S3::Endpoint::S3DefaultEpProviderBase *>(accessEndpointProvider().get());
@@ -141,6 +144,7 @@ Client::Client(const Client & other)
     , detect_region(other.detect_region)
     , provider_type(other.provider_type)
     , max_redirects(other.max_redirects)
+    , sse_kms_config(other.sse_kms_config)
     , log(&Poco::Logger::get("S3Client"))
 {
     cache = std::make_shared<ClientCache>(*other.cache);
@@ -178,6 +182,28 @@ void Client::insertRegionOverride(const std::string & bucket, const std::string
         LOG_INFO(log, "Detected different region ('{}') for bucket {} than the one defined ('{}')", region, bucket, explicit_region);
 }
 
+template <typename RequestType>
+void Client::setKMSHeaders(RequestType & request) const
+{
+    // Don't do anything unless a key ID was specified
+    if (sse_kms_config.key_id)
+    {
+        request.SetServerSideEncryption(Model::ServerSideEncryption::aws_kms);
+        // If the key ID was specified but is empty, treat it as using the AWS managed key and omit the header
+        if (!sse_kms_config.key_id->empty())
+            request.SetSSEKMSKeyId(*sse_kms_config.key_id);
+        if (sse_kms_config.encryption_context)
+            request.SetSSEKMSEncryptionContext(*sse_kms_config.encryption_context);
+        if (sse_kms_config.bucket_key_enabled)
+            request.SetBucketKeyEnabled(*sse_kms_config.bucket_key_enabled);
+    }
+}
+
+// Explicitly instantiate this method only for the request types that support KMS headers
+template void Client::setKMSHeaders<CreateMultipartUploadRequest>(CreateMultipartUploadRequest & request) const;
+template void Client::setKMSHeaders<CopyObjectRequest>(CopyObjectRequest & request) const;
+template void Client::setKMSHeaders<PutObjectRequest>(PutObjectRequest & request) const;
+
 Model::HeadObjectOutcome Client::HeadObject(const HeadObjectRequest & request) const
 {
     const auto & bucket = request.GetBucket();
@@ -574,6 +600,7 @@ std::unique_ptr<S3::Client> ClientFactory::create( // NOLINT
     const String & access_key_id,
     const String & secret_access_key,
     const String & server_side_encryption_customer_key_base64,
+    ServerSideEncryptionKMSConfig sse_kms_config,
     HTTPHeaderEntries headers,
     CredentialsConfiguration credentials_configuration)
 {
@@ -596,6 +623,7 @@ std::unique_ptr<S3::Client> ClientFactory::create( // NOLINT
             Aws::Utils::HashingUtils::Base64Encode(Aws::Utils::HashingUtils::CalculateMD5(str_buffer))});
     }
 
+    // These will be added after request signing
     client_configuration.extra_headers = std::move(headers);
 
     Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
@@ -607,6 +635,7 @@ std::unique_ptr<S3::Client> ClientFactory::create( // NOLINT
     client_configuration.retryStrategy = std::make_shared<Client::RetryStrategy>(std::move(client_configuration.retryStrategy));
     return Client::create(
         client_configuration.s3_max_redirects,
+        std::move(sse_kms_config),
         credentials_provider,
         client_configuration, // Client configuration.
         Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index e3218872b4f..37cdeb7e892 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -1,7 +1,27 @@
 #pragma once
 
+#include <optional>
+#include <base/types.h>
+
 #include "config.h"
 
+namespace DB::S3
+{
+
+/// See https://docs.aws.amazon.com/AmazonS3/latest/userguide/specifying-kms-encryption.html
+/// Needed by S3Common.h even if USE_AWS_S3 is 0
+struct ServerSideEncryptionKMSConfig
+{
+    // If key_id is non-null, enable SSE-KMS. If key_id is "", use the AWS managed key
+    std::optional<String> key_id = std::nullopt;
+    std::optional<String> encryption_context = std::nullopt;
+    std::optional<bool> bucket_key_enabled = std::nullopt;
+
+    bool operator==(const ServerSideEncryptionKMSConfig & other) const = default;
+};
+
+}
+
 #if USE_AWS_S3
 
 #include <Common/assert_cast.h>
@@ -85,6 +105,7 @@ public:
     /// e.g. Client::RetryStrategy should be used
     static std::unique_ptr<Client> create(
             size_t max_redirects_,
+            ServerSideEncryptionKMSConfig sse_kms_config_,
             const std::shared_ptr<Aws::Auth::AWSCredentialsProvider> & credentials_provider,
             const Aws::Client::ClientConfiguration & client_configuration,
             Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
@@ -143,6 +164,13 @@ public:
         std::shared_ptr<Aws::Client::RetryStrategy> wrapped_strategy;
     };
 
+    /// SSE-KMS headers MUST be signed, so they need to be added before the SDK signs the message
+    /// (before sending the request with one of the methods below).
+    /// Per the docs (https://docs.aws.amazon.com/AmazonS3/latest/userguide/specifying-kms-encryption.html),
+    /// the headers should only be set for PutObject, CopyObject, POST Object, and CreateMultipartUpload.
+    template <typename RequestType>
+    void setKMSHeaders(RequestType & request) const;
+
     Model::HeadObjectOutcome HeadObject(const HeadObjectRequest & request) const;
     Model::ListObjectsV2Outcome ListObjectsV2(const ListObjectsV2Request & request) const;
     Model::ListObjectsOutcome ListObjects(const ListObjectsRequest & request) const;
@@ -165,6 +193,7 @@ public:
     ProviderType getProviderType() const;
 private:
     Client(size_t max_redirects_,
+           ServerSideEncryptionKMSConfig sse_kms_config_,
            const std::shared_ptr<Aws::Auth::AWSCredentialsProvider>& credentials_provider,
            const Aws::Client::ClientConfiguration& client_configuration,
            Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads,
@@ -215,6 +244,8 @@ private:
 
     const size_t max_redirects;
 
+    const ServerSideEncryptionKMSConfig sse_kms_config;
+
     Poco::Logger * log;
 };
 
@@ -231,6 +262,7 @@ public:
         const String & access_key_id,
         const String & secret_access_key,
         const String & server_side_encryption_customer_key_base64,
+        ServerSideEncryptionKMSConfig sse_kms_config,
         HTTPHeaderEntries headers,
         CredentialsConfiguration credentials_configuration);
 
diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 9aabc73fc66..ee14e251cb5 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -260,6 +260,7 @@ void PocoHTTPClient::makeRequestInternal(
     Poco::Logger * log = &Poco::Logger::get("AWSClient");
 
     auto uri = request.GetUri().GetURIString();
+#if 0
     auto provider_type = getProviderTypeFromURL(uri);
 
     if (provider_type == ProviderType::GCS)
@@ -269,6 +270,7 @@ void PocoHTTPClient::makeRequestInternal(
         request.DeleteHeader("amz-sdk-invocation-id");
         request.DeleteHeader("amz-sdk-request");
     }
+#endif
 
     if (enable_s3_requests_logging)
         LOG_TEST(log, "Make request to: {}", uri);
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index d343bc58e58..bf7294ce4aa 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -100,9 +100,8 @@ namespace
         std::mutex bg_tasks_mutex;
         std::condition_variable bg_tasks_condvar;
 
-        void createMultipartUpload()
+        void fillCreateMultipartRequest(S3::CreateMultipartUploadRequest & request)
         {
-            S3::CreateMultipartUploadRequest request;
             request.SetBucket(dest_bucket);
             request.SetKey(dest_key);
 
@@ -116,6 +115,14 @@ namespace
             if (!storage_class_name.empty())
                 request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(storage_class_name));
 
+            client_ptr->setKMSHeaders(request);
+        }
+
+        void createMultipartUpload()
+        {
+            S3::CreateMultipartUploadRequest request;
+            fillCreateMultipartRequest(request);
+
             ProfileEvents::increment(ProfileEvents::S3CreateMultipartUpload);
             if (for_disk_s3)
                 ProfileEvents::increment(ProfileEvents::DiskS3CreateMultipartUpload);
@@ -465,6 +472,8 @@ namespace
 
             /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
             request.SetContentType("binary/octet-stream");
+
+            client_ptr->setKMSHeaders(request);
         }
 
         void processPutRequest(const S3::PutObjectRequest & request)
@@ -660,6 +669,8 @@ namespace
 
             /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
             request.SetContentType("binary/octet-stream");
+
+            client_ptr->setKMSHeaders(request);
         }
 
         void processCopyRequest(const S3::CopyObjectRequest & request)
diff --git a/src/IO/S3/tests/TestPocoHTTPServer.h b/src/IO/S3/tests/TestPocoHTTPServer.h
index 47ae90cb809..0f5ce86b388 100644
--- a/src/IO/S3/tests/TestPocoHTTPServer.h
+++ b/src/IO/S3/tests/TestPocoHTTPServer.h
@@ -8,38 +8,66 @@
 #include <Poco/Net/HTTPRequestHandlerFactory.h>
 #include <Poco/Net/HTTPServer.h>
 #include <Poco/Net/HTTPServerParams.h>
+#include <Poco/Net/HTTPServerRequest.h>
+#include <Poco/Net/HTTPServerResponse.h>
+#include <Poco/Net/MessageHeader.h>
 #include <Poco/Net/NetException.h>
 #include <Poco/Net/ServerSocket.h>
 #include <Poco/AutoPtr.h>
 #include <Poco/SharedPtr.h>
 
+class MockRequestHandler : public Poco::Net::HTTPRequestHandler
+{
+    Poco::Net::MessageHeader & last_request_header;
+
+public:
+    MockRequestHandler(Poco::Net::MessageHeader & last_request_header_)
+    : Poco::Net::HTTPRequestHandler(), last_request_header(last_request_header_)
+    {
+    }
+
+    void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override
+    {
+        response.setStatus(Poco::Net::HTTPResponse::HTTP_OK);
+        last_request_header = request;
+        response.send();
+    }
+};
 
-template <typename RequestHandler>
 class HTTPRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory
 {
+    Poco::Net::MessageHeader & last_request_header;
+
     virtual Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest &) override
     {
-        return new RequestHandler();
+        return new MockRequestHandler(last_request_header);
     }
 
 public:
+    HTTPRequestHandlerFactory(Poco::Net::MessageHeader & last_request_header_)
+    : Poco::Net::HTTPRequestHandlerFactory(), last_request_header(last_request_header_)
+    {
+    }
+
     virtual ~HTTPRequestHandlerFactory() override
     {
     }
 };
 
-template <typename RequestHandler>
 class TestPocoHTTPServer
 {
     std::unique_ptr<Poco::Net::ServerSocket> server_socket;
-    Poco::SharedPtr<HTTPRequestHandlerFactory<RequestHandler>> handler_factory;
+    Poco::SharedPtr<HTTPRequestHandlerFactory> handler_factory;
     Poco::AutoPtr<Poco::Net::HTTPServerParams> server_params;
     std::unique_ptr<Poco::Net::HTTPServer> server;
+    // Stores the last request header handled. It's obviously not thread-safe to share the same
+    // reference across request handlers, but it's good enough for this the purposes of this test.
+    Poco::Net::MessageHeader last_request_header;
 
 public:
     TestPocoHTTPServer():
         server_socket(std::make_unique<Poco::Net::ServerSocket>(0)),
-        handler_factory(new HTTPRequestHandlerFactory<RequestHandler>()),
+        handler_factory(new HTTPRequestHandlerFactory(last_request_header)),
         server_params(new Poco::Net::HTTPServerParams()),
         server(std::make_unique<Poco::Net::HTTPServer>(handler_factory, *server_socket, server_params))
     {
@@ -50,4 +78,9 @@ public:
     {
         return "http://" + server_socket->address().toString();
     }
+
+    const Poco::Net::MessageHeader & getLastRequestHeader() const
+    {
+        return last_request_header;
+    }
 };
diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp
index 3e0ed21d01c..1e5c643d5e5 100644
--- a/src/IO/S3/tests/gtest_aws_s3_client.cpp
+++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp
@@ -7,13 +7,9 @@
 #if USE_AWS_S3
 
 #include <memory>
-#include <ostream>
 
 #include <boost/algorithm/string.hpp>
 
-#include <Poco/Net/HTTPRequestHandler.h>
-#include <Poco/Net/HTTPServerRequest.h>
-#include <Poco/Net/HTTPServerResponse.h>
 #include <Poco/URI.h>
 
 #include <aws/core/client/AWSError.h>
@@ -25,6 +21,7 @@
 #include <IO/ReadBufferFromS3.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadSettings.h>
+#include <IO/WriteBufferFromS3.h>
 #include <IO/S3Common.h>
 #include <IO/S3/Client.h>
 #include <IO/HTTPHeaderEntries.h>
@@ -41,83 +38,33 @@ public:
     ~NoRetryStrategy() override = default;
 };
 
-
-TEST(IOTestAwsS3Client, AppendExtraSSECHeaders)
+String getSSEAndSignedHeaders(const Poco::Net::MessageHeader & message_header)
 {
-    /// See https://github.com/ClickHouse/ClickHouse/pull/19748
-
-    class MyRequestHandler : public Poco::Net::HTTPRequestHandler
+    String content;
+    for (const auto & [header_name, header_value] : message_header)
     {
-    public:
-        void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override
+        if (boost::algorithm::starts_with(header_name, "x-amz-server-side-encryption"))
         {
-            response.setStatus(Poco::Net::HTTPResponse::HTTP_OK);
-            std::ostream & out = response.send();
-            for (const auto & [header_name, header_value] : request)
-            {
-                if (boost::algorithm::starts_with(header_name, "x-amz-server-side-encryption-customer-"))
-                {
-                    out << header_name << ": " << header_value << "\n";
-                }
-                else if (header_name == "authorization")
-                {
-                    std::vector<String> parts;
-                    boost::split(parts, header_value, [](char c){ return c == ' '; });
-                    for (const auto & part : parts)
-                    {
-                        if (boost::algorithm::starts_with(part, "SignedHeaders="))
-                            out << header_name << ": ... " << part << " ...\n";
-                    }
-                }
-            }
-            out.flush();
+            content += header_name + ": " + header_value + "\n";
         }
-    };
+        else if (header_name == "authorization")
+        {
+            std::vector<String> parts;
+            boost::split(parts, header_value, [](char c){ return c == ' '; });
+            for (const auto & part : parts)
+            {
+                if (boost::algorithm::starts_with(part, "SignedHeaders="))
+                    content += header_name + ": ... " + part + " ...\n";
+            }
+        }
+    }
+    return content;
+}
 
-    TestPocoHTTPServer<MyRequestHandler> http;
-
-    DB::RemoteHostFilter remote_host_filter;
-    unsigned int s3_max_redirects = 100;
-    DB::S3::URI uri(http.getUrl() + "/IOTestAwsS3ClientAppendExtraHeaders/test.txt");
-    String access_key_id = "ACCESS_KEY_ID";
-    String secret_access_key = "SECRET_ACCESS_KEY";
-    String region = "us-east-1";
+void doReadRequest(std::shared_ptr<const DB::S3::Client> client, const DB::S3::URI & uri)
+{
     String version_id;
     UInt64 max_single_read_retries = 1;
-    bool enable_s3_requests_logging = false;
-    DB::S3::PocoHTTPClientConfiguration client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(
-        region,
-        remote_host_filter,
-        s3_max_redirects,
-        enable_s3_requests_logging,
-        /* for_disk_s3 = */ false,
-        /* get_request_throttler = */ {},
-        /* put_request_throttler = */ {}
-    );
-
-    client_configuration.endpointOverride = uri.endpoint;
-    client_configuration.retryStrategy = std::make_shared<NoRetryStrategy>();
-
-    String server_side_encryption_customer_key_base64 = "Kv/gDqdWVGIT4iDqg+btQvV3lc1idlm4WI+MMOyHOAw=";
-    DB::HTTPHeaderEntries headers;
-    bool use_environment_credentials = false;
-    bool use_insecure_imds_request = false;
-
-    std::shared_ptr<DB::S3::Client> client = DB::S3::ClientFactory::instance().create(
-        client_configuration,
-        uri.is_virtual_hosted_style,
-        access_key_id,
-        secret_access_key,
-        server_side_encryption_customer_key_base64,
-        headers,
-        DB::S3::CredentialsConfiguration
-        {
-            .use_environment_credentials = use_environment_credentials,
-            .use_insecure_imds_request = use_insecure_imds_request
-        }
-    );
-
-    ASSERT_TRUE(client);
 
     DB::ReadSettings read_settings;
     DB::S3Settings::RequestSettings request_settings;
@@ -133,7 +80,170 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders)
 
     String content;
     DB::readStringUntilEOF(content, read_buffer);
-    EXPECT_EQ(content, "authorization: ... SignedHeaders=amz-sdk-invocation-id;amz-sdk-request;content-type;host;x-amz-api-version;x-amz-content-sha256;x-amz-date, ...\nx-amz-server-side-encryption-customer-algorithm: AES256\nx-amz-server-side-encryption-customer-key: Kv/gDqdWVGIT4iDqg+btQvV3lc1idlm4WI+MMOyHOAw=\nx-amz-server-side-encryption-customer-key-md5: fMNuOw6OLU5GG2vc6RTA+g==\n");
+}
+
+void doWriteRequest(std::shared_ptr<const DB::S3::Client> client, const DB::S3::URI & uri)
+{
+    UInt64 max_unexpected_write_error_retries = 1;
+
+    DB::S3Settings::RequestSettings request_settings;
+    request_settings.max_unexpected_write_error_retries = max_unexpected_write_error_retries;
+    DB::WriteBufferFromS3 write_buffer(
+        client,
+        uri.bucket,
+        uri.key,
+        request_settings
+    );
+
+    write_buffer.write('\0'); // doesn't matter what we write here, just needs to be something
+    write_buffer.finalize();
+}
+
+using RequestFn = std::function<void(std::shared_ptr<const DB::S3::Client>, const DB::S3::URI &)>;
+
+void testServerSideEncryption(
+    RequestFn do_request,
+    String server_side_encryption_customer_key_base64,
+    DB::S3::ServerSideEncryptionKMSConfig sse_kms_config,
+    String expected_headers)
+{
+    TestPocoHTTPServer http;
+
+    DB::RemoteHostFilter remote_host_filter;
+    unsigned int s3_max_redirects = 100;
+    DB::S3::URI uri(http.getUrl() + "/IOTestAwsS3ClientAppendExtraHeaders/test.txt");
+    String access_key_id = "ACCESS_KEY_ID";
+    String secret_access_key = "SECRET_ACCESS_KEY";
+    String region = "us-east-1";
+    bool enable_s3_requests_logging = false;
+    DB::S3::PocoHTTPClientConfiguration client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(
+        region,
+        remote_host_filter,
+        s3_max_redirects,
+        enable_s3_requests_logging,
+        /* for_disk_s3 = */ false,
+        /* get_request_throttler = */ {},
+        /* put_request_throttler = */ {}
+    );
+
+    client_configuration.endpointOverride = uri.endpoint;
+    client_configuration.retryStrategy = std::make_shared<NoRetryStrategy>();
+
+    DB::HTTPHeaderEntries headers;
+    bool use_environment_credentials = false;
+    bool use_insecure_imds_request = false;
+
+    std::shared_ptr<DB::S3::Client> client = DB::S3::ClientFactory::instance().create(
+        client_configuration,
+        uri.is_virtual_hosted_style,
+        access_key_id,
+        secret_access_key,
+        server_side_encryption_customer_key_base64,
+        sse_kms_config,
+        headers,
+        DB::S3::CredentialsConfiguration
+        {
+            .use_environment_credentials = use_environment_credentials,
+            .use_insecure_imds_request = use_insecure_imds_request
+        }
+    );
+
+    ASSERT_TRUE(client);
+
+    do_request(client, uri);
+    String content = getSSEAndSignedHeaders(http.getLastRequestHeader());
+    EXPECT_EQ(content, expected_headers);
+}
+
+TEST(IOTestAwsS3Client, AppendExtraSSECHeadersRead)
+{
+    /// See https://github.com/ClickHouse/ClickHouse/pull/19748
+    testServerSideEncryption(
+        doReadRequest,
+        "Kv/gDqdWVGIT4iDqg+btQvV3lc1idlm4WI+MMOyHOAw=",
+        {},
+        "authorization: ... SignedHeaders="
+        "amz-sdk-invocation-id;"
+        "amz-sdk-request;"
+        "content-type;"
+        "host;"
+        "x-amz-api-version;"
+        "x-amz-content-sha256;"
+        "x-amz-date, ...\n"
+        "x-amz-server-side-encryption-customer-algorithm: AES256\n"
+        "x-amz-server-side-encryption-customer-key: Kv/gDqdWVGIT4iDqg+btQvV3lc1idlm4WI+MMOyHOAw=\n"
+        "x-amz-server-side-encryption-customer-key-md5: fMNuOw6OLU5GG2vc6RTA+g==\n");
+}
+
+TEST(IOTestAwsS3Client, AppendExtraSSECHeadersWrite)
+{
+    /// See https://github.com/ClickHouse/ClickHouse/pull/19748
+    testServerSideEncryption(
+        doWriteRequest,
+        "Kv/gDqdWVGIT4iDqg+btQvV3lc1idlm4WI+MMOyHOAw=",
+        {},
+        "authorization: ... SignedHeaders="
+        "amz-sdk-invocation-id;"
+        "amz-sdk-request;"
+        "content-length;"
+        "content-md5;"
+        "content-type;"
+        "host;"
+        "x-amz-content-sha256;"
+        "x-amz-date, ...\n"
+        "x-amz-server-side-encryption-customer-algorithm: AES256\n"
+        "x-amz-server-side-encryption-customer-key: Kv/gDqdWVGIT4iDqg+btQvV3lc1idlm4WI+MMOyHOAw=\n"
+        "x-amz-server-side-encryption-customer-key-md5: fMNuOw6OLU5GG2vc6RTA+g==\n");
+}
+
+TEST(IOTestAwsS3Client, AppendExtraSSEKMSHeadersRead)
+{
+    DB::S3::ServerSideEncryptionKMSConfig sse_kms_config;
+    sse_kms_config.key_id = "alias/test-key";
+    sse_kms_config.encryption_context = "arn:aws:s3:::bucket_ARN";
+    sse_kms_config.bucket_key_enabled = true;
+    // KMS headers shouldn't be set on a read request
+    testServerSideEncryption(
+        doReadRequest,
+        "",
+        sse_kms_config,
+        "authorization: ... SignedHeaders="
+        "amz-sdk-invocation-id;"
+        "amz-sdk-request;"
+        "content-type;"
+        "host;"
+        "x-amz-api-version;"
+        "x-amz-content-sha256;"
+        "x-amz-date, ...\n");
+}
+
+TEST(IOTestAwsS3Client, AppendExtraSSEKMSHeadersWrite)
+{
+    DB::S3::ServerSideEncryptionKMSConfig sse_kms_config;
+    sse_kms_config.key_id = "alias/test-key";
+    sse_kms_config.encryption_context = "arn:aws:s3:::bucket_ARN";
+    sse_kms_config.bucket_key_enabled = true;
+    testServerSideEncryption(
+        doWriteRequest,
+        "",
+        sse_kms_config,
+        "authorization: ... SignedHeaders="
+        "amz-sdk-invocation-id;"
+        "amz-sdk-request;"
+        "content-length;"
+        "content-md5;"
+        "content-type;"
+        "host;"
+        "x-amz-content-sha256;"
+        "x-amz-date;"
+        "x-amz-server-side-encryption;"
+        "x-amz-server-side-encryption-aws-kms-key-id;"
+        "x-amz-server-side-encryption-bucket-key-enabled;"
+        "x-amz-server-side-encryption-context, ...\n"
+        "x-amz-server-side-encryption: aws:kms\n"
+        "x-amz-server-side-encryption-aws-kms-key-id: alias/test-key\n"
+        "x-amz-server-side-encryption-bucket-key-enabled: true\n"
+        "x-amz-server-side-encryption-context: arn:aws:s3:::bucket_ARN\n");
 }
 
 #endif
diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp
index 5c946d59274..ffd6b6d711f 100644
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@@ -70,6 +70,41 @@ namespace ErrorCodes
 namespace S3
 {
 
+HTTPHeaderEntries getHTTPHeaders(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config)
+{
+    HTTPHeaderEntries headers;
+    Poco::Util::AbstractConfiguration::Keys subconfig_keys;
+    config.keys(config_elem, subconfig_keys);
+    for (const std::string & subkey : subconfig_keys)
+    {
+        if (subkey.starts_with("header"))
+        {
+            auto header_str = config.getString(config_elem + "." + subkey);
+            auto delimiter = header_str.find(':');
+            if (delimiter == std::string::npos)
+                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Malformed s3 header value");
+            headers.emplace_back(header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos));
+        }
+    }
+    return headers;
+}
+
+ServerSideEncryptionKMSConfig getSSEKMSConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config)
+{
+    ServerSideEncryptionKMSConfig sse_kms_config;
+
+    if (config.has(config_elem + ".server_side_encryption_kms_key_id"))
+        sse_kms_config.key_id = config.getString(config_elem + ".server_side_encryption_kms_key_id");
+
+    if (config.has(config_elem + ".server_side_encryption_kms_encryption_context"))
+        sse_kms_config.encryption_context = config.getString(config_elem + ".server_side_encryption_kms_encryption_context");
+
+    if (config.has(config_elem + ".server_side_encryption_kms_bucket_key_enabled"))
+        sse_kms_config.bucket_key_enabled = config.getBool(config_elem + ".server_side_encryption_kms_bucket_key_enabled");
+
+    return sse_kms_config;
+}
+
 AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config)
 {
     auto access_key_id = config.getString(config_elem + ".access_key_id", "");
@@ -93,26 +128,15 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const
     if (config.has(config_elem + ".no_sign_request"))
         no_sign_request = config.getBool(config_elem + ".no_sign_request");
 
-    HTTPHeaderEntries headers;
-    Poco::Util::AbstractConfiguration::Keys subconfig_keys;
-    config.keys(config_elem, subconfig_keys);
-    for (const std::string & subkey : subconfig_keys)
-    {
-        if (subkey.starts_with("header"))
-        {
-            auto header_str = config.getString(config_elem + "." + subkey);
-            auto delimiter = header_str.find(':');
-            if (delimiter == std::string::npos)
-                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Malformed s3 header value");
-            headers.emplace_back(header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos));
-        }
-    }
+    HTTPHeaderEntries headers = getHTTPHeaders(config_elem, config);
+    ServerSideEncryptionKMSConfig sse_kms_config = getSSEKMSConfig(config_elem, config);
 
     return AuthSettings
     {
         std::move(access_key_id), std::move(secret_access_key),
         std::move(region),
         std::move(server_side_encryption_customer_key_base64),
+        std::move(sse_kms_config),
         std::move(headers),
         use_environment_credentials,
         use_insecure_imds_request,
@@ -135,6 +159,7 @@ void AuthSettings::updateFrom(const AuthSettings & from)
     headers = from.headers;
     region = from.region;
     server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64;
+    server_side_encryption_kms_config = from.server_side_encryption_kms_config;
 
     if (from.use_environment_credentials.has_value())
         use_environment_credentials = from.use_environment_credentials;
diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h
index 0b3ab97b2d8..370c5911482 100644
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <IO/S3/Client.h>
 #include <IO/S3/PocoHTTPClient.h>
 #include <IO/HTTPHeaderEntries.h>
 
@@ -14,7 +15,6 @@
 #include <Common/Exception.h>
 #include <Common/Throttler_fwd.h>
 
-#include <IO/S3/Client.h>
 #include <IO/S3/URI.h>
 
 #include <aws/core/Aws.h>
@@ -71,6 +71,10 @@ namespace Poco::Util
 namespace DB::S3
 {
 
+HTTPHeaderEntries getHTTPHeaders(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config);
+
+ServerSideEncryptionKMSConfig getSSEKMSConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config);
+
 struct AuthSettings
 {
     static AuthSettings loadFromConfig(const std::string & config_elem, const Poco::Util::AbstractConfiguration & config);
@@ -79,6 +83,7 @@ struct AuthSettings
     std::string secret_access_key;
     std::string region;
     std::string server_side_encryption_customer_key_base64;
+    ServerSideEncryptionKMSConfig server_side_encryption_kms_config;
 
     HTTPHeaderEntries headers;
 
diff --git a/src/IO/SharedThreadPools.cpp b/src/IO/SharedThreadPools.cpp
new file mode 100644
index 00000000000..b7b6aea1567
--- /dev/null
+++ b/src/IO/SharedThreadPools.cpp
@@ -0,0 +1,108 @@
+#include <IO/SharedThreadPools.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/ThreadPool.h>
+#include <Core/Field.h>
+
+namespace CurrentMetrics
+{
+    extern const Metric IOThreads;
+    extern const Metric IOThreadsActive;
+    extern const Metric BackupsIOThreads;
+    extern const Metric BackupsIOThreadsActive;
+    extern const Metric OutdatedPartsLoadingThreads;
+    extern const Metric OutdatedPartsLoadingThreadsActive;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+std::unique_ptr<ThreadPool> IOThreadPool::instance;
+
+void IOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
+{
+    if (instance)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is initialized twice");
+    }
+
+    instance = std::make_unique<ThreadPool>(
+        CurrentMetrics::IOThreads,
+        CurrentMetrics::IOThreadsActive,
+        max_threads,
+        max_free_threads,
+        queue_size,
+        /* shutdown_on_exception= */ false);
+}
+
+ThreadPool & IOThreadPool::get()
+{
+    if (!instance)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is not initialized");
+    }
+
+    return *instance;
+}
+
+std::unique_ptr<ThreadPool> BackupsIOThreadPool::instance;
+
+void BackupsIOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
+{
+    if (instance)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The BackupsIO thread pool is initialized twice");
+    }
+
+    instance = std::make_unique<ThreadPool>(
+        CurrentMetrics::BackupsIOThreads,
+        CurrentMetrics::BackupsIOThreadsActive,
+        max_threads,
+        max_free_threads,
+        queue_size,
+        /* shutdown_on_exception= */ false);
+}
+
+ThreadPool & BackupsIOThreadPool::get()
+{
+    if (!instance)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The BackupsIO thread pool is not initialized");
+    }
+
+    return *instance;
+}
+
+std::unique_ptr<ThreadPool> OutdatedPartsLoadingThreadPool::instance;
+
+void OutdatedPartsLoadingThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
+{
+    if (instance)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The PartsLoadingThreadPool thread pool is initialized twice");
+    }
+
+    instance = std::make_unique<ThreadPool>(
+        CurrentMetrics::OutdatedPartsLoadingThreads,
+        CurrentMetrics::OutdatedPartsLoadingThreadsActive,
+        max_threads,
+        max_free_threads,
+        queue_size,
+        /* shutdown_on_exception= */ false);
+}
+
+ThreadPool & OutdatedPartsLoadingThreadPool::get()
+{
+    if (!instance)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The PartsLoadingThreadPool thread pool is not initialized");
+    }
+
+    return *instance;
+}
+
+}
diff --git a/src/IO/SharedThreadPools.h b/src/IO/SharedThreadPools.h
new file mode 100644
index 00000000000..1b43dfe778c
--- /dev/null
+++ b/src/IO/SharedThreadPools.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include <Common/ThreadPool_fwd.h>
+#include <cstdlib>
+#include <memory>
+
+namespace DB
+{
+
+/*
+ * ThreadPool used for the IO.
+ */
+class IOThreadPool
+{
+    static std::unique_ptr<ThreadPool> instance;
+
+public:
+    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
+    static ThreadPool & get();
+};
+
+
+/*
+ * ThreadPool used for the Backup IO.
+ */
+class BackupsIOThreadPool
+{
+    static std::unique_ptr<ThreadPool> instance;
+
+public:
+    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
+    static ThreadPool & get();
+};
+
+
+/*
+ * ThreadPool used for the loading of Outdated data parts for MergeTree tables.
+ */
+class OutdatedPartsLoadingThreadPool
+{
+    static std::unique_ptr<ThreadPool> instance;
+
+public:
+    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
+    static ThreadPool & get();
+};
+
+}
diff --git a/src/IO/WithFileName.cpp b/src/IO/WithFileName.cpp
index 6ecb3671ca0..9d9f264c861 100644
--- a/src/IO/WithFileName.cpp
+++ b/src/IO/WithFileName.cpp
@@ -26,4 +26,14 @@ String getFileNameFromReadBuffer(const ReadBuffer & in)
         return getFileName(in);
 }
 
+String getExceptionEntryWithFileName(const ReadBuffer & in)
+{
+    auto filename = getFileNameFromReadBuffer(in);
+
+    if (filename.empty())
+        return "";
+
+    return fmt::format(": While reading from: {}", filename);
+}
+
 }
diff --git a/src/IO/WithFileName.h b/src/IO/WithFileName.h
index d770634e738..595f1a768c5 100644
--- a/src/IO/WithFileName.h
+++ b/src/IO/WithFileName.h
@@ -14,5 +14,6 @@ public:
 };
 
 String getFileNameFromReadBuffer(const ReadBuffer & in);
+String getExceptionEntryWithFileName(const ReadBuffer & in);
 
 }
diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp
index 4c1b1b65d19..dde19c37e29 100644
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@@ -85,7 +85,8 @@ WriteBufferFromS3::WriteBufferFromS3(
     , upload_settings(request_settings.getUploadSettings())
     , client_ptr(std::move(client_ptr_))
     , object_metadata(std::move(object_metadata_))
-    , upload_part_size(upload_settings.min_upload_part_size)
+    , strict_upload_part_size(upload_settings.strict_upload_part_size)
+    , current_upload_part_size(upload_settings.min_upload_part_size)
     , schedule(std::move(schedule_))
     , write_settings(write_settings_)
 {
@@ -100,28 +101,79 @@ void WriteBufferFromS3::nextImpl()
     /// Buffer in a bad state after exception
     if (temporary_buffer->tellp() == -1)
         allocateBuffer();
+    else
+        chassert(temporary_buffer->tellp() == static_cast<std::streamoff>(last_part_size));
+
+    if (strict_upload_part_size)
+        processWithStrictParts();
+    else
+        processWithDynamicParts();
+
+    waitForReadyBackGroundTasks();
+}
+
+void WriteBufferFromS3::processWithStrictParts()
+{
+    chassert(strict_upload_part_size > 0);
+
+    size_t buffer_size = offset();
+    size_t left_in_buffer = buffer_size;
+    size_t new_size = last_part_size + buffer_size;
+    size_t buffer_offset = 0;
+
+    if (new_size > strict_upload_part_size)
+    {
+        /// Data size will exceed fixed part size threshold for multipart upload, need to use multipart upload.
+        if (multipart_upload_id.empty())
+            createMultipartUpload();
+
+        while (new_size > strict_upload_part_size)
+        {
+            size_t to_write = strict_upload_part_size - last_part_size;
+            temporary_buffer->write(working_buffer.begin() + buffer_offset, to_write);
+            buffer_offset += to_write;
+
+            writePart();
+            allocateBuffer();
+
+            new_size -= strict_upload_part_size;
+            left_in_buffer -= to_write;
+        }
+    }
+
+    if (left_in_buffer)
+    {
+        temporary_buffer->write(working_buffer.begin() + buffer_offset, left_in_buffer);
+        last_part_size += left_in_buffer;
+    }
+
+    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, buffer_size);
+
+    if (write_settings.remote_throttler)
+        write_settings.remote_throttler->add(buffer_size, ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds);
+}
+
+void WriteBufferFromS3::processWithDynamicParts()
+{
+    chassert(current_upload_part_size > 0);
 
     size_t size = offset();
     temporary_buffer->write(working_buffer.begin(), size);
+    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, size);
+    last_part_size += size;
 
-    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, offset());
-    last_part_size += offset();
     if (write_settings.remote_throttler)
-        write_settings.remote_throttler->add(offset(), ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds);
+        write_settings.remote_throttler->add(size, ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds);
 
     /// Data size exceeds singlepart upload threshold, need to use multipart upload.
     if (multipart_upload_id.empty() && last_part_size > upload_settings.max_single_part_upload_size)
         createMultipartUpload();
 
-    chassert(upload_part_size > 0);
-    if (!multipart_upload_id.empty() && last_part_size > upload_part_size)
+    if (!multipart_upload_id.empty() && last_part_size > current_upload_part_size)
     {
         writePart();
-
         allocateBuffer();
     }
-
-    waitForReadyBackGroundTasks();
 }
 
 void WriteBufferFromS3::allocateBuffer()
@@ -186,9 +238,8 @@ void WriteBufferFromS3::finalizeImpl()
     }
 }
 
-void WriteBufferFromS3::createMultipartUpload()
+void WriteBufferFromS3::fillCreateMultipartRequest(DB::S3::CreateMultipartUploadRequest & req)
 {
-    DB::S3::CreateMultipartUploadRequest req;
     req.SetBucket(bucket);
     req.SetKey(key);
 
@@ -198,6 +249,14 @@ void WriteBufferFromS3::createMultipartUpload()
     if (object_metadata.has_value())
         req.SetMetadata(object_metadata.value());
 
+    client_ptr->setKMSHeaders(req);
+}
+
+void WriteBufferFromS3::createMultipartUpload()
+{
+    DB::S3::CreateMultipartUploadRequest req;
+    fillCreateMultipartRequest(req);
+
     ProfileEvents::increment(ProfileEvents::S3CreateMultipartUpload);
     if (write_settings.for_object_storage)
         ProfileEvents::increment(ProfileEvents::DiskS3CreateMultipartUpload);
@@ -335,14 +394,17 @@ void WriteBufferFromS3::fillUploadRequest(S3::UploadPartRequest & req)
     /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
     req.SetContentType("binary/octet-stream");
 
-    /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
-    auto threshold = upload_settings.upload_part_size_multiply_parts_count_threshold;
-    if (!multipart_upload_id.empty() && (part_number % threshold == 0))
+    if (!strict_upload_part_size)
     {
-        auto max_upload_part_size = upload_settings.max_upload_part_size;
-        auto upload_part_size_multiply_factor = upload_settings.upload_part_size_multiply_factor;
-        upload_part_size *= upload_part_size_multiply_factor;
-        upload_part_size = std::min(upload_part_size, max_upload_part_size);
+        /// Maybe increase `current_upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
+        auto threshold = upload_settings.upload_part_size_multiply_parts_count_threshold;
+        if (!multipart_upload_id.empty() && (part_number % threshold == 0))
+        {
+            auto max_upload_part_size = upload_settings.max_upload_part_size;
+            auto upload_part_size_multiply_factor = upload_settings.upload_part_size_multiply_factor;
+            current_upload_part_size *= upload_part_size_multiply_factor;
+            current_upload_part_size = std::min(current_upload_part_size, max_upload_part_size);
+        }
     }
 }
 
@@ -516,6 +578,8 @@ void WriteBufferFromS3::fillPutRequest(S3::PutObjectRequest & req)
 
     /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
     req.SetContentType("binary/octet-stream");
+
+    client_ptr->setKMSHeaders(req);
 }
 
 void WriteBufferFromS3::processPutRequest(const PutObjectTask & task)
diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h
index 5fa934b886e..d3c7cdf2407 100644
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@@ -58,6 +58,10 @@ public:
 private:
     void allocateBuffer();
 
+    void processWithStrictParts();
+    void processWithDynamicParts();
+
+    void fillCreateMultipartRequest(S3::CreateMultipartUploadRequest & req);
     void createMultipartUpload();
     void writePart();
     void completeMultipartUpload();
@@ -86,7 +90,10 @@ private:
     const std::shared_ptr<const S3::Client> client_ptr;
     const std::optional<std::map<String, String>> object_metadata;
 
-    size_t upload_part_size = 0;
+    /// Strict/static Part size, no adjustments will be done on fly.
+    size_t strict_upload_part_size = 0;
+    /// Part size will be adjusted on fly (for bigger uploads)
+    size_t current_upload_part_size = 0;
     std::shared_ptr<Aws::StringStream> temporary_buffer; /// Buffer to accumulate data.
     size_t last_part_size = 0;
     size_t part_number = 0;
diff --git a/src/IO/ZlibInflatingReadBuffer.cpp b/src/IO/ZlibInflatingReadBuffer.cpp
index 09e4fce7c4c..b43dda1bfcc 100644
--- a/src/IO/ZlibInflatingReadBuffer.cpp
+++ b/src/IO/ZlibInflatingReadBuffer.cpp
@@ -1,5 +1,5 @@
 #include <IO/ZlibInflatingReadBuffer.h>
-
+#include <IO/WithFileName.h>
 
 namespace DB
 {
@@ -99,14 +99,22 @@ bool ZlibInflatingReadBuffer::nextImpl()
             {
                 rc = inflateReset(&zstr);
                 if (rc != Z_OK)
-                    throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc));
+                    throw Exception(
+                        ErrorCodes::ZLIB_INFLATE_FAILED,
+                        "inflateReset failed: {}{}",
+                        zError(rc),
+                        getExceptionEntryWithFileName(*in));
                 return true;
             }
         }
 
         /// If it is not end and not OK, something went wrong, throw exception
         if (rc != Z_OK)
-            throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflate failed: {}", zError(rc));
+            throw Exception(
+                ErrorCodes::ZLIB_INFLATE_FAILED,
+                "inflate failed: {}{}",
+                zError(rc),
+                getExceptionEntryWithFileName(*in));
     }
     while (working_buffer.empty());
 
diff --git a/src/IO/ZstdInflatingReadBuffer.cpp b/src/IO/ZstdInflatingReadBuffer.cpp
index 6f5c8b4dc71..2b663ec7145 100644
--- a/src/IO/ZstdInflatingReadBuffer.cpp
+++ b/src/IO/ZstdInflatingReadBuffer.cpp
@@ -1,4 +1,5 @@
 #include <IO/ZstdInflatingReadBuffer.h>
+#include <IO/WithFileName.h>
 #include <zstd_errors.h>
 
 
@@ -61,12 +62,13 @@ bool ZstdInflatingReadBuffer::nextImpl()
         {
             throw Exception(
                 ErrorCodes::ZSTD_DECODER_FAILED,
-                "ZSTD stream decoding failed: error '{}'{}; ZSTD version: {}",
+                "ZSTD stream decoding failed: error '{}'{}; ZSTD version: {}{}",
                 ZSTD_getErrorName(ret),
                 ZSTD_error_frameParameter_windowTooLarge == ret
                     ? ". You can increase the maximum window size with the 'zstd_window_log_max' setting in ClickHouse. Example: 'SET zstd_window_log_max = 31'"
                     : "",
-                ZSTD_VERSION_STRING);
+                ZSTD_VERSION_STRING,
+                getExceptionEntryWithFileName(*in));
         }
 
         /// Check that something has changed after decompress (input or output position)
diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp
index 0bde147fbb6..165937560cc 100644
--- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp
+++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp
@@ -23,10 +23,12 @@ namespace
     void updateUserFromQueryImpl(
         User & user,
         const ASTCreateUserQuery & query,
+        const std::optional<AuthenticationData> auth_data,
         const std::shared_ptr<ASTUserNameWithHost> & override_name,
         const std::optional<RolesOrUsersSet> & override_default_roles,
         const std::optional<SettingsProfileElements> & override_settings,
         const std::optional<RolesOrUsersSet> & override_grantees,
+        bool allow_implicit_no_password,
         bool allow_no_password,
         bool allow_plaintext_password)
     {
@@ -37,10 +39,16 @@ namespace
         else if (query.names->size() == 1)
             user.setName(query.names->front()->toString());
 
-        if (query.auth_data)
-            user.auth_data = *query.auth_data;
+        if (!query.attach && !query.alter && !auth_data && !allow_implicit_no_password)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "Authentication type NO_PASSWORD must "
+                            "be explicitly specified, check the setting allow_implicit_no_password "
+                            "in the server configuration");
 
-        if (query.auth_data || !query.alter)
+        if (auth_data)
+            user.auth_data = *auth_data;
+
+        if (auth_data || !query.alter)
         {
             auto auth_type = user.auth_data.getType();
             if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) ||
@@ -104,17 +112,9 @@ BlockIO InterpreterCreateUserQuery::execute()
     bool no_password_allowed = access_control.isNoPasswordAllowed();
     bool plaintext_password_allowed = access_control.isPlaintextPasswordAllowed();
 
-     if (!query.attach && !query.alter && !query.auth_data && !implicit_no_password_allowed)
-        throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                        "Authentication type NO_PASSWORD must "
-                        "be explicitly specified, check the setting allow_implicit_no_password "
-                        "in the server configuration");
-
-    if (!query.attach && query.temporary_password_for_checks)
-    {
-        access_control.checkPasswordComplexityRules(query.temporary_password_for_checks.value());
-        query.temporary_password_for_checks.reset();
-    }
+    std::optional<AuthenticationData> auth_data;
+    if (query.auth_data)
+        auth_data = AuthenticationData::fromAST(*query.auth_data, getContext(), !query.attach);
 
     std::optional<RolesOrUsersSet> default_roles_from_query;
     if (query.default_roles)
@@ -148,7 +148,7 @@ BlockIO InterpreterCreateUserQuery::execute()
         auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr
         {
             auto updated_user = typeid_cast<std::shared_ptr<User>>(entity->clone());
-            updateUserFromQueryImpl(*updated_user, query, {}, default_roles_from_query, settings_from_query, grantees_from_query, no_password_allowed, plaintext_password_allowed);
+            updateUserFromQueryImpl(*updated_user, query, auth_data, {}, default_roles_from_query, settings_from_query, grantees_from_query, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed);
             return updated_user;
         };
 
@@ -167,7 +167,7 @@ BlockIO InterpreterCreateUserQuery::execute()
         for (const auto & name : *query.names)
         {
             auto new_user = std::make_shared<User>();
-            updateUserFromQueryImpl(*new_user, query, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, no_password_allowed, plaintext_password_allowed);
+            updateUserFromQueryImpl(*new_user, query, auth_data, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, implicit_no_password_allowed, no_password_allowed, plaintext_password_allowed);
             new_users.emplace_back(std::move(new_user));
         }
 
@@ -197,7 +197,11 @@ BlockIO InterpreterCreateUserQuery::execute()
 
 void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreateUserQuery & query, bool allow_no_password, bool allow_plaintext_password)
 {
-    updateUserFromQueryImpl(user, query, {}, {}, {}, {}, allow_no_password, allow_plaintext_password);
+    std::optional<AuthenticationData> auth_data;
+    if (query.auth_data)
+        auth_data = AuthenticationData::fromAST(*query.auth_data, {}, !query.attach);
+
+    updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, allow_no_password, allow_plaintext_password, true);
 }
 
 }
diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp
index f7e170965e2..77474d68795 100644
--- a/src/Interpreters/Access/InterpreterGrantQuery.cpp
+++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp
@@ -17,6 +17,7 @@ namespace DB
 {
 namespace ErrorCodes
 {
+    extern const int BAD_ARGUMENTS;
     extern const int LOGICAL_ERROR;
 }
 
@@ -330,6 +331,54 @@ namespace
             updateGrantedAccessRightsAndRolesTemplate(*role, elements_to_grant, elements_to_revoke, roles_to_grant, roles_to_revoke, admin_option);
     }
 
+    template <typename T>
+    void grantCurrentGrantsTemplate(
+        T & grantee,
+        const AccessRights & rights_to_grant,
+        const AccessRightsElements & elements_to_revoke)
+    {
+        if (!elements_to_revoke.empty())
+            grantee.access.revoke(elements_to_revoke);
+
+        grantee.access.makeUnion(rights_to_grant);
+    }
+
+    /// Grants current user's grants with grant options to specified user.
+    void grantCurrentGrants(
+        IAccessEntity & grantee,
+        const AccessRights & new_rights,
+        const AccessRightsElements & elements_to_revoke)
+    {
+        if (auto * user = typeid_cast<User *>(&grantee))
+            grantCurrentGrantsTemplate(*user, new_rights, elements_to_revoke);
+        else if (auto * role = typeid_cast<Role *>(&grantee))
+            grantCurrentGrantsTemplate(*role, new_rights, elements_to_revoke);
+    }
+
+    /// Calculates all available rights to grant with current user intersection.
+    void calculateCurrentGrantRightsWithIntersection(
+        AccessRights & rights,
+        std::shared_ptr<const ContextAccess> current_user_access,
+        const AccessRightsElements & elements_to_grant)
+    {
+        AccessRightsElements current_user_grantable_elements;
+        auto available_grant_elements = current_user_access->getAccessRights()->getElements();
+        AccessRights current_user_rights;
+        for (auto & element : available_grant_elements)
+        {
+            if (!element.grant_option && !element.is_partial_revoke)
+                continue;
+
+            if (element.is_partial_revoke)
+                current_user_rights.revoke(element);
+            else
+                current_user_rights.grant(element);
+        }
+
+        rights.grant(elements_to_grant);
+        rights.makeIntersection(current_user_rights);
+    }
+
     /// Updates grants of a specified user or role.
     void updateFromQuery(IAccessEntity & grantee, const ASTGrantQuery & query)
     {
@@ -373,6 +422,9 @@ BlockIO InterpreterGrantQuery::execute()
     /// Executing on cluster.
     if (!query.cluster.empty())
     {
+        if (query.current_grants)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "GRANT CURRENT GRANTS can't be executed on cluster.");
+
         auto required_access = getRequiredAccessForExecutingOnCluster(elements_to_grant, elements_to_revoke);
         checkAdminOptionForExecutingOnCluster(*current_user_access, roles_to_grant, roles_to_revoke);
         current_user_access->checkGranteesAreAllowed(grantees);
@@ -386,7 +438,8 @@ BlockIO InterpreterGrantQuery::execute()
     elements_to_grant.replaceEmptyDatabase(current_database);
     elements_to_revoke.replaceEmptyDatabase(current_database);
     bool need_check_grantees_are_allowed = true;
-    checkGrantOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, elements_to_grant, elements_to_revoke);
+    if (!query.current_grants)
+        checkGrantOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, elements_to_grant, elements_to_revoke);
 
     /// Check if the current user has corresponding roles granted with admin option.
     checkAdminOption(access_control, *current_user_access, grantees, need_check_grantees_are_allowed, roles_to_grant, roles_to_revoke, query.admin_option);
@@ -394,11 +447,18 @@ BlockIO InterpreterGrantQuery::execute()
     if (need_check_grantees_are_allowed)
         current_user_access->checkGranteesAreAllowed(grantees);
 
+    AccessRights new_rights;
+    if (query.current_grants)
+        calculateCurrentGrantRightsWithIntersection(new_rights, current_user_access, elements_to_grant);
+
     /// Update roles and users listed in `grantees`.
     auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr
     {
         auto clone = entity->clone();
-        updateGrantedAccessRightsAndRoles(*clone, elements_to_grant, elements_to_revoke, roles_to_grant, roles_to_revoke, query.admin_option);
+        if (query.current_grants)
+            grantCurrentGrants(*clone, new_rights, elements_to_revoke);
+        else
+            updateGrantedAccessRightsAndRoles(*clone, elements_to_grant, elements_to_revoke, roles_to_grant, roles_to_revoke, query.admin_option);
         return clone;
     };
 
diff --git a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp
index b5478f434b4..e9862e99393 100644
--- a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp
+++ b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp
@@ -4,6 +4,7 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/Access/InterpreterShowCreateAccessEntityQuery.h>
 #include <Interpreters/Access/InterpreterShowGrantsQuery.h>
+#include <Interpreters/formatWithPossiblyHidingSecrets.h>
 #include <Columns/ColumnString.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <DataTypes/DataTypeString.h>
@@ -33,7 +34,7 @@ QueryPipeline InterpreterShowAccessQuery::executeImpl() const
     /// Build the result column.
     MutableColumnPtr column = ColumnString::create();
     for (const auto & query : queries)
-        column->insert(query->formatWithSecretsHidden());
+        column->insert(format({getContext(), *query}));
 
     String desc = "ACCESS";
     return QueryPipeline(std::make_shared<SourceFromSingleChunk>(Block{{std::move(column), std::make_shared<DataTypeString>(), desc}}));
diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp
index 7b9a8f98c8f..7292892d3c1 100644
--- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp
+++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp
@@ -1,4 +1,5 @@
 #include <Interpreters/Access/InterpreterShowCreateAccessEntityQuery.h>
+#include <Interpreters/formatWithPossiblyHidingSecrets.h>
 #include <Parsers/Access/ASTShowCreateAccessEntityQuery.h>
 #include <Parsers/Access/ASTCreateUserQuery.h>
 #include <Parsers/Access/ASTCreateRoleQuery.h>
@@ -62,7 +63,7 @@ namespace
         }
 
         if (user.auth_data.getType() != AuthenticationType::NO_PASSWORD)
-            query->auth_data = user.auth_data;
+            query->auth_data = user.auth_data.toAST();
 
         if (!user.settings.empty())
         {
@@ -254,7 +255,7 @@ QueryPipeline InterpreterShowCreateAccessEntityQuery::executeImpl()
     /// Build the result column.
     MutableColumnPtr column = ColumnString::create();
     for (const auto & create_query : create_queries)
-        column->insert(create_query->formatWithSecretsHidden());
+        column->insert(format({getContext(), *create_query}));
 
     /// Prepare description of the result column.
     const auto & show_query = query_ptr->as<const ASTShowCreateAccessEntityQuery &>();
diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 3bb3ea67e29..b3bb6e9db90 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -535,7 +535,6 @@ ActionsMatcher::Data::Data(
     bool no_subqueries_,
     bool no_makeset_,
     bool only_consts_,
-    bool create_source_for_in_,
     AggregationKeysInfo aggregation_keys_info_,
     bool build_expression_with_window_functions_,
     bool is_create_parameterized_view_)
@@ -547,7 +546,6 @@ ActionsMatcher::Data::Data(
     , no_subqueries(no_subqueries_)
     , no_makeset(no_makeset_)
     , only_consts(only_consts_)
-    , create_source_for_in(create_source_for_in_)
     , visit_depth(0)
     , actions_stack(std::move(actions_dag), context_)
     , aggregation_keys_info(aggregation_keys_info_)
@@ -1000,7 +998,6 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
             data.no_subqueries,
             data.no_makeset,
             data.only_consts,
-            /*create_source_for_in*/ false,
             data.aggregation_keys_info);
 
         NamesWithAliases args;
@@ -1432,7 +1429,7 @@ FutureSet ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no
           * In case that we have HAVING with IN subquery, we have to force creating set for it.
           * Also it doesn't make sense if it is GLOBAL IN or ordinary IN.
           */
-        if (data.create_source_for_in && !subquery_for_set.hasSource())
+        if (!subquery_for_set.hasSource())
         {
             auto interpreter = interpretSubquery(right_in_operand, data.getContext(), data.subquery_depth, {});
             subquery_for_set.createSource(*interpreter);
diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h
index 260fd5ab2c0..3cf43746630 100644
--- a/src/Interpreters/ActionsVisitor.h
+++ b/src/Interpreters/ActionsVisitor.h
@@ -129,7 +129,6 @@ public:
         bool no_subqueries;
         bool no_makeset;
         bool only_consts;
-        bool create_source_for_in;
         size_t visit_depth;
         ScopeStack actions_stack;
         AggregationKeysInfo aggregation_keys_info;
@@ -153,7 +152,6 @@ public:
             bool no_subqueries_,
             bool no_makeset_,
             bool only_consts_,
-            bool create_source_for_in_,
             AggregationKeysInfo aggregation_keys_info_,
             bool build_expression_with_window_functions_ = false,
             bool is_create_parameterized_view_ = false);
diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h
index 32b01ee0416..2e6da40ff1f 100644
--- a/src/Interpreters/AggregationCommon.h
+++ b/src/Interpreters/AggregationCommon.h
@@ -3,7 +3,6 @@
 #include <array>
 
 #include <Common/SipHash.h>
-#include <Common/Arena.h>
 #include <Common/HashTable/Hash.h>
 #include <Common/memcpySmall.h>
 #include <Common/assert_cast.h>
@@ -25,6 +24,8 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+class Arena;
+
 using Sizes = std::vector<size_t>;
 
 /// When packing the values of nullable columns at a given row, we have to
diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h
index fad24adae2c..05b34e8460f 100644
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@@ -7,7 +7,6 @@
 
 
 #include <base/StringRef.h>
-#include <Common/Arena.h>
 #include <Common/HashTable/FixedHashMap.h>
 #include <Common/HashTable/HashMap.h>
 #include <Common/HashTable/TwoLevelHashMap.h>
@@ -47,6 +46,10 @@ namespace ErrorCodes
     extern const int UNKNOWN_AGGREGATED_DATA_VARIANT;
 }
 
+class Arena;
+using ArenaPtr = std::shared_ptr<Arena>;
+using Arenas = std::vector<ArenaPtr>;
+
 /** Different data structures that can be used for aggregation
   * For efficiency, the aggregation data itself is put into the pool.
   * Data and pool ownership (states of aggregate functions)
diff --git a/src/Interpreters/AsynchronousInsertLog.cpp b/src/Interpreters/AsynchronousInsertLog.cpp
index 916ec8f3d56..eeccd9ad92e 100644
--- a/src/Interpreters/AsynchronousInsertLog.cpp
+++ b/src/Interpreters/AsynchronousInsertLog.cpp
@@ -36,6 +36,7 @@ NamesAndTypesList AsynchronousInsertLogElement::getNamesAndTypes()
         {"format", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
         {"query_id", std::make_shared<DataTypeString>()},
         {"bytes", std::make_shared<DataTypeUInt64>()},
+        {"rows", std::make_shared<DataTypeUInt64>()},
         {"exception", std::make_shared<DataTypeString>()},
         {"status", type_status},
 
@@ -71,6 +72,7 @@ void AsynchronousInsertLogElement::appendToBlock(MutableColumns & columns) const
     columns[i++]->insert(insert_query.format);
     columns[i++]->insert(query_id);
     columns[i++]->insert(bytes);
+    columns[i++]->insert(rows);
     columns[i++]->insert(exception);
     columns[i++]->insert(status);
 
diff --git a/src/Interpreters/AsynchronousInsertLog.h b/src/Interpreters/AsynchronousInsertLog.h
index e2fdd4c90a0..f1e39db1ded 100644
--- a/src/Interpreters/AsynchronousInsertLog.h
+++ b/src/Interpreters/AsynchronousInsertLog.h
@@ -24,6 +24,7 @@ struct AsynchronousInsertLogElement
     ASTPtr query;
     String query_id;
     UInt64 bytes{};
+    UInt64 rows{};
     String exception;
     Status status{};
 
diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp
index b8de0246ae2..88233f58a54 100644
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@@ -40,6 +40,7 @@ namespace ProfileEvents
 {
     extern const Event AsyncInsertQuery;
     extern const Event AsyncInsertBytes;
+    extern const Event AsyncInsertRows;
     extern const Event FailedAsyncInsertQuery;
 }
 
@@ -444,7 +445,8 @@ try
     {
         auto buffer = std::make_unique<ReadBufferFromString>(entry->bytes);
         current_entry = entry;
-        total_rows += executor.execute(*buffer);
+        size_t num_rows = executor.execute(*buffer);
+        total_rows += num_rows;
         chunk_info->offsets.push_back(total_rows);
 
         /// Keep buffer, because it still can be used
@@ -459,6 +461,7 @@ try
             elem.query = key.query;
             elem.query_id = entry->query_id;
             elem.bytes = entry->bytes.size();
+            elem.rows = num_rows;
             elem.exception = current_exception;
             current_exception.clear();
 
@@ -479,6 +482,7 @@ try
 
     format->addBuffer(std::move(last_buffer));
     auto insert_query_id = insert_context->getCurrentQueryId();
+    ProfileEvents::increment(ProfileEvents::AsyncInsertRows, total_rows);
 
     auto finish_entries = [&]
     {
diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index 24391ed0b7f..2ba44f64d1f 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -1,20 +1,16 @@
 #include "FileCache.h"
 
 #include <Common/randomSeed.h>
-#include <Common/SipHash.h>
-#include <Common/logger_useful.h>
 #include <Interpreters/Cache/FileCacheSettings.h>
 #include <Interpreters/Cache/LRUFileCachePriority.h>
+#include <Interpreters/Context.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/ReadSettings.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <pcg-random/pcg_random.hpp>
-#include <filesystem>
-
-
-namespace fs = std::filesystem;
+#include <base/hex.h>
 
 namespace DB
 {
@@ -24,207 +20,159 @@ namespace ErrorCodes
 }
 
 FileCache::FileCache(const FileCacheSettings & settings)
-    : cache_base_path(settings.base_path)
-    , max_size(settings.max_size)
-    , max_element_size(settings.max_elements)
-    , max_file_segment_size(settings.max_file_segment_size)
+    : max_file_segment_size(settings.max_file_segment_size)
     , allow_persistent_files(settings.do_not_evict_index_and_mark_files)
-    , enable_cache_hits_threshold(settings.enable_cache_hits_threshold)
-    , enable_filesystem_query_cache_limit(settings.enable_filesystem_query_cache_limit)
-    , enable_bypass_cache_with_threashold(settings.enable_bypass_cache_with_threashold)
-    , bypass_cache_threashold(settings.bypass_cache_threashold)
+    , bypass_cache_threshold(settings.enable_bypass_cache_with_threashold ? settings.bypass_cache_threashold : 0)
+    , delayed_cleanup_interval_ms(settings.delayed_cleanup_interval_ms)
     , log(&Poco::Logger::get("FileCache"))
-    , main_priority(std::make_unique<LRUFileCachePriority>())
-    , stash_priority(std::make_unique<LRUFileCachePriority>())
-    , max_stash_element_size(settings.max_elements)
+    , metadata(settings.base_path)
 {
+    main_priority = std::make_unique<LRUFileCachePriority>(settings.max_size, settings.max_elements);
+
+    if (settings.cache_hits_threshold)
+        stash = std::make_unique<HitsCountStash>(settings.cache_hits_threshold, settings.max_elements);
+
+    if (settings.enable_filesystem_query_cache_limit)
+        query_limit = std::make_unique<FileCacheQueryLimit>();
 }
 
-FileCache::Key FileCache::hash(const String & path)
+FileCache::Key FileCache::createKeyForPath(const String & path)
 {
-    return Key(sipHash128(path.data(), path.size()));
+    return Key(path);
+}
+
+const String & FileCache::getBasePath() const
+{
+    return metadata.getBaseDirectory();
 }
 
 String FileCache::getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const
 {
-    String file_suffix;
-    switch (segment_kind)
-    {
-        case FileSegmentKind::Persistent:
-            file_suffix = "_persistent";
-            break;
-        case FileSegmentKind::Temporary:
-            file_suffix = "_temporary";
-            break;
-        case FileSegmentKind::Regular:
-            file_suffix = "";
-            break;
-    }
-
-    auto key_str = key.toString();
-    return fs::path(cache_base_path)
-        / key_str.substr(0, 3)
-        / key_str
-        / (std::to_string(offset) + file_suffix);
+    return metadata.getPathInLocalCache(key, offset, segment_kind);
 }
 
 String FileCache::getPathInLocalCache(const Key & key) const
 {
-    auto key_str = key.toString();
-    return fs::path(cache_base_path) / key_str.substr(0, 3) / key_str;
+    return metadata.getPathInLocalCache(key);
 }
 
-void FileCache::removeKeyDirectoryIfExists(const Key & key, std::lock_guard<std::mutex> & /* cache_lock */) const
+void FileCache::assertInitialized() const
 {
-    /// Note: it is guaranteed that there is no concurrency here with files deletion
-    /// because cache key directories are create only in FileCache class under cache_lock.
-
-    auto key_str = key.toString();
-    auto key_prefix_path = fs::path(cache_base_path) / key_str.substr(0, 3);
-    auto key_path = key_prefix_path / key_str;
-
-    if (!fs::exists(key_path))
+    if (is_initialized)
         return;
 
-    fs::remove_all(key_path);
+    std::unique_lock lock(init_mutex);
+    if (is_initialized)
+        return;
 
-    if (fs::is_empty(key_prefix_path))
-        fs::remove(key_prefix_path);
-}
-
-static bool isQueryInitialized()
-{
-    return CurrentThread::isInitialized()
-        && CurrentThread::get().getQueryContext()
-        && !CurrentThread::getQueryId().empty();
-}
-
-bool FileCache::isReadOnly()
-{
-    return !isQueryInitialized();
-}
-
-void FileCache::assertInitialized(std::lock_guard<std::mutex> & /* cache_lock */) const
-{
+    if (init_exception)
+        std::rethrow_exception(init_exception);
     if (!is_initialized)
-    {
-        if (initialization_exception)
-            std::rethrow_exception(initialization_exception);
-        else
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache not initialized");
-    }
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache not initialized");
 }
 
 void FileCache::initialize()
 {
-    std::lock_guard cache_lock(mutex);
-    if (!is_initialized)
+    std::lock_guard lock(init_mutex);
+
+    if (is_initialized)
+        return;
+
+    try
     {
-        if (fs::exists(cache_base_path))
+        if (fs::exists(getBasePath()))
         {
-            try
-            {
-                loadCacheInfoIntoMemory(cache_lock);
-            }
-            catch (...)
-            {
-                initialization_exception = std::current_exception();
-                throw;
-            }
+            loadMetadata();
         }
         else
         {
-            fs::create_directories(cache_base_path);
+            fs::create_directories(getBasePath());
         }
-
-        is_initialized = true;
     }
-}
-
-void FileCache::useCell(
-    const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & cache_lock)
-{
-    auto file_segment = cell.file_segment;
-
-    if (file_segment->isDownloaded())
+    catch (...)
     {
-        fs::path path = file_segment->getPathInLocalCache();
-        if (!fs::exists(path))
-        {
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR,
-                "File path does not exist, but file has DOWNLOADED state. {}",
-                file_segment->getInfoForLog());
-        }
-
-        if (fs::file_size(path) == 0)
-        {
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR,
-                "Cannot have zero size downloaded file segments. {}",
-                file_segment->getInfoForLog());
-        }
+        init_exception = std::current_exception();
+        tryLogCurrentException(__PRETTY_FUNCTION__);
+        throw;
     }
 
-    result.push_back(cell.file_segment);
+    is_initialized = true;
 
-    /**
-     * A cell receives a queue iterator on first successful space reservation attempt
-     * (space is reserved incrementally on each read buffer nextImpl() call).
-     */
-    if (cell.queue_iterator)
-    {
-        /// Move to the end of the queue. The iterator remains valid.
-        cell.queue_iterator->use(cache_lock);
-    }
+    cleanup_task = Context::getGlobalContextInstance()->getSchedulePool().createTask("FileCacheCleanup", [this]{ cleanupThreadFunc(); });
+    cleanup_task->activate();
+    cleanup_task->scheduleAfter(delayed_cleanup_interval_ms);
 }
 
-FileCache::FileSegmentCell * FileCache::getCell(
-    const Key & key, size_t offset, std::lock_guard<std::mutex> & /* cache_lock */)
-{
-    auto it = files.find(key);
-    if (it == files.end())
-        return nullptr;
-
-    auto & offsets = it->second;
-    auto cell_it = offsets.find(offset);
-    if (cell_it == offsets.end())
-        return nullptr;
-
-    return &cell_it->second;
-}
-
-FileSegments FileCache::getImpl(
-    const Key & key, const FileSegment::Range & range, std::lock_guard<std::mutex> & cache_lock)
+FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment::Range & range) const
 {
     /// Given range = [left, right] and non-overlapping ordered set of file segments,
     /// find list [segment1, ..., segmentN] of segments which intersect with given range.
 
-    FileSegments result;
-
-    if (enable_bypass_cache_with_threashold && (range.size() > bypass_cache_threashold))
+    if (bypass_cache_threshold && range.size() > bypass_cache_threshold)
     {
         auto file_segment = std::make_shared<FileSegment>(
-            range.left, range.size(), key, this, FileSegment::State::SKIP_CACHE, CreateFileSegmentSettings{});
-        {
-            std::unique_lock segment_lock(file_segment->mutex);
-            file_segment->detachAssumeStateFinalized(segment_lock);
-        }
-        result.emplace_back(file_segment);
-        return result;
+            locked_key.getKey(), range.left, range.size(), FileSegment::State::DETACHED);
+        return { file_segment };
     }
 
-    auto it = files.find(key);
-    if (it == files.end())
-        return {};
-
-    const auto & file_segments = it->second;
+    const auto & file_segments = *locked_key.getKeyMetadata();
     if (file_segments.empty())
-    {
-        files.erase(key);
-        removeKeyDirectoryIfExists(key, cache_lock);
         return {};
-    }
+
+    FileSegments result;
+    auto add_to_result = [&](const FileSegmentMetadata & file_segment_metadata)
+    {
+        FileSegmentPtr file_segment;
+        if (file_segment_metadata.valid())
+        {
+            file_segment = file_segment_metadata.file_segment;
+            if (file_segment->isDownloaded())
+            {
+                if (file_segment->getDownloadedSize(true) == 0)
+                {
+                    throw Exception(
+                        ErrorCodes::LOGICAL_ERROR,
+                        "Cannot have zero size downloaded file segments. {}",
+                        file_segment->getInfoForLog());
+                }
+
+#ifndef NDEBUG
+                /**
+                * Check that in-memory state of the cache is consistent with the state on disk.
+                * Check only in debug build, because such checks can be done often and can be quite
+                * expensive compared to overall query execution time.
+                */
+
+                fs::path path = file_segment->getPathInLocalCache();
+                if (!fs::exists(path))
+                {
+                    throw Exception(
+                        ErrorCodes::LOGICAL_ERROR,
+                        "File path does not exist, but file has DOWNLOADED state. {}",
+                        file_segment->getInfoForLog());
+                }
+
+                if (fs::file_size(path) == 0)
+                {
+                    throw Exception(
+                        ErrorCodes::LOGICAL_ERROR,
+                        "Cannot have zero size downloaded file segments. {}",
+                        file_segment->getInfoForLog());
+                }
+#endif
+            }
+        }
+        else
+        {
+            file_segment = std::make_shared<FileSegment>(
+                locked_key.getKey(),
+                file_segment_metadata.file_segment->offset(),
+                file_segment_metadata.file_segment->range().size(),
+                FileSegment::State::DETACHED);
+        }
+
+        result.push_back(file_segment);
+    };
 
     auto segment_it = file_segments.lower_bound(range.left);
     if (segment_it == file_segments.end())
@@ -236,27 +184,27 @@ FileSegments FileCache::getImpl(
         ///     ^                                        ^
         ///     range.left                               range.left
 
-        const auto & cell = file_segments.rbegin()->second;
-        if (cell.file_segment->range().right < range.left)
+        const auto & file_segment_metadata = *file_segments.rbegin()->second;
+        if (file_segment_metadata.file_segment->range().right < range.left)
             return {};
 
-        useCell(cell, result, cache_lock);
+        add_to_result(file_segment_metadata);
     }
     else /// segment_it <-- segmment{k}
     {
         if (segment_it != file_segments.begin())
         {
-            const auto & prev_cell = std::prev(segment_it)->second;
-            const auto & prev_cell_range = prev_cell.file_segment->range();
+            const auto & prev_file_segment_metadata = *std::prev(segment_it)->second;
+            const auto & prev_range = prev_file_segment_metadata.file_segment->range();
 
-            if (range.left <= prev_cell_range.right)
+            if (range.left <= prev_range.right)
             {
                 ///   segment{k-1}  segment{k}
                 ///   [________]   [_____
                 ///       [___________
                 ///       ^
                 ///       range.left
-                useCell(prev_cell, result, cache_lock);
+                add_to_result(prev_file_segment_metadata);
             }
         }
 
@@ -268,11 +216,11 @@ FileSegments FileCache::getImpl(
 
         while (segment_it != file_segments.end())
         {
-            const auto & cell = segment_it->second;
-            if (range.right < cell.file_segment->range().left)
+            const auto & file_segment_metadata = *segment_it->second;
+            if (range.right < file_segment_metadata.file_segment->range().left)
                 break;
 
-            useCell(cell, result, cache_lock);
+            add_to_result(file_segment_metadata);
             ++segment_it;
         }
     }
@@ -280,34 +228,32 @@ FileSegments FileCache::getImpl(
     return result;
 }
 
-FileSegments FileCache::splitRangeIntoCells(
-    const Key & key,
+FileSegments FileCache::splitRangeIntoFileSegments(
+    LockedKey & locked_key,
     size_t offset,
     size_t size,
     FileSegment::State state,
-    const CreateFileSegmentSettings & settings,
-    std::lock_guard<std::mutex> & cache_lock)
+    const CreateFileSegmentSettings & settings)
 {
     assert(size > 0);
 
     auto current_pos = offset;
     auto end_pos_non_included = offset + size;
 
-    size_t current_cell_size;
+    size_t current_file_segment_size;
     size_t remaining_size = size;
 
     FileSegments file_segments;
     while (current_pos < end_pos_non_included)
     {
-        current_cell_size = std::min(remaining_size, max_file_segment_size);
-        remaining_size -= current_cell_size;
+        current_file_segment_size = std::min(remaining_size, max_file_segment_size);
+        remaining_size -= current_file_segment_size;
 
-        auto * cell = addCell(key, current_pos, current_cell_size, state, settings, cache_lock);
-        if (cell)
-            file_segments.push_back(cell->file_segment);
-        assert(cell);
+        auto file_segment_metadata_it = addFileSegment(
+            locked_key, current_pos, current_file_segment_size, state, settings, nullptr);
+        file_segments.push_back(file_segment_metadata_it->second->file_segment);
 
-        current_pos += current_cell_size;
+        current_pos += current_file_segment_size;
     }
 
     assert(file_segments.empty() || offset + size - 1 == file_segments.back()->range().right);
@@ -315,12 +261,11 @@ FileSegments FileCache::splitRangeIntoCells(
 }
 
 void FileCache::fillHolesWithEmptyFileSegments(
+    LockedKey & locked_key,
     FileSegments & file_segments,
-    const Key & key,
     const FileSegment::Range & range,
     bool fill_with_detached_file_segments,
-    const CreateFileSegmentSettings & settings,
-    std::lock_guard<std::mutex> & cache_lock)
+    const CreateFileSegmentSettings & settings)
 {
     /// There are segments [segment1, ..., segmentN]
     /// (non-overlapping, non-empty, ascending-ordered) which (maybe partially)
@@ -330,7 +275,9 @@ void FileCache::fillHolesWithEmptyFileSegments(
     /// [____________________]         -- requested range
     ///     [____]  [_]   [_________]  -- intersecting cache [segment1, ..., segmentN]
     ///
-    /// For each such hole create a cell with file segment state EMPTY.
+    /// For each such hole create a file_segment_metadata with file segment state EMPTY.
+
+    assert(!file_segments.empty());
 
     auto it = file_segments.begin();
     auto segment_range = (*it)->range();
@@ -366,16 +313,16 @@ void FileCache::fillHolesWithEmptyFileSegments(
 
         if (fill_with_detached_file_segments)
         {
-            auto file_segment = std::make_shared<FileSegment>(current_pos, hole_size, key, this, FileSegment::State::EMPTY, settings);
-            {
-                std::unique_lock segment_lock(file_segment->mutex);
-                file_segment->detachAssumeStateFinalized(segment_lock);
-            }
+            auto file_segment = std::make_shared<FileSegment>(
+                locked_key.getKey(), current_pos, hole_size, FileSegment::State::DETACHED, settings);
+
             file_segments.insert(it, file_segment);
         }
         else
         {
-            file_segments.splice(it, splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, settings, cache_lock));
+            auto split = splitRangeIntoFileSegments(
+                locked_key, current_pos, hole_size, FileSegment::State::EMPTY, settings);
+            file_segments.splice(it, std::move(split));
         }
 
         current_pos = segment_range.right + 1;
@@ -393,655 +340,481 @@ void FileCache::fillHolesWithEmptyFileSegments(
 
         if (fill_with_detached_file_segments)
         {
-            auto file_segment = std::make_shared<FileSegment>(current_pos, hole_size, key, this, FileSegment::State::EMPTY, settings);
-            {
-                std::unique_lock segment_lock(file_segment->mutex);
-                file_segment->detachAssumeStateFinalized(segment_lock);
-            }
+            auto file_segment = std::make_shared<FileSegment>(
+                locked_key.getKey(), current_pos, hole_size, FileSegment::State::DETACHED, settings);
+
             file_segments.insert(file_segments.end(), file_segment);
         }
         else
         {
-            file_segments.splice(
-                file_segments.end(),
-                splitRangeIntoCells(key, current_pos, hole_size, FileSegment::State::EMPTY, settings, cache_lock));
+            auto split = splitRangeIntoFileSegments(
+                locked_key, current_pos, hole_size, FileSegment::State::EMPTY, settings);
+            file_segments.splice(file_segments.end(), std::move(split));
         }
     }
 }
 
-FileSegmentsHolder FileCache::getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings)
-{
-    std::lock_guard cache_lock(mutex);
-
-    assertInitialized(cache_lock);
-
-#ifndef NDEBUG
-    assertCacheCorrectness(key, cache_lock);
-#endif
-
-    FileSegment::Range range(offset, offset + size - 1);
-    /// Get all segments which intersect with the given range.
-    auto file_segments = getImpl(key, range, cache_lock);
-
-    if (file_segments.empty())
-    {
-        file_segments = splitRangeIntoCells(key, offset, size, FileSegment::State::EMPTY, settings, cache_lock);
-    }
-    else
-    {
-        fillHolesWithEmptyFileSegments(file_segments, key, range, /* fill_with_detached */false, settings, cache_lock);
-    }
-    assert(!file_segments.empty());
-    return FileSegmentsHolder(std::move(file_segments));
-}
-
-FileSegmentsHolder FileCache::set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings)
-{
-    std::lock_guard cache_lock(mutex);
-
-    auto it = files.find(key);
-    if (it != files.end())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} already exists", key.toString());
-
-    if (settings.unbounded)
-    {
-        /// If the file is unbounded, we can create a single cell for it.
-        FileSegments file_segments;
-        if (auto * cell = addCell(key, offset, size, FileSegment::State::EMPTY, settings, cache_lock))
-            file_segments.push_back(cell->file_segment);
-        else
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add cell for file {}", key.toString());
-        return FileSegmentsHolder(std::move(file_segments));
-    }
-    return FileSegmentsHolder(splitRangeIntoCells(key, offset, size, FileSegment::State::EMPTY, settings, cache_lock));
-}
-
-FileSegmentsHolder FileCache::get(const Key & key, size_t offset, size_t size)
-{
-    std::lock_guard cache_lock(mutex);
-
-    assertInitialized(cache_lock);
-
-#ifndef NDEBUG
-    assertCacheCorrectness(key, cache_lock);
-#endif
-
-    FileSegment::Range range(offset, offset + size - 1);
-
-    /// Get all segments which intersect with the given range.
-    auto file_segments = getImpl(key, range, cache_lock);
-
-    if (file_segments.empty())
-    {
-        auto file_segment = std::make_shared<FileSegment>(
-            offset, size, key, this, FileSegment::State::EMPTY, CreateFileSegmentSettings{});
-        {
-            std::unique_lock segment_lock(file_segment->mutex);
-            file_segment->detachAssumeStateFinalized(segment_lock);
-        }
-        file_segments = { file_segment };
-    }
-    else
-    {
-        fillHolesWithEmptyFileSegments(file_segments, key, range, /* fill_with_detached */true, {}, cache_lock);
-    }
-
-    return FileSegmentsHolder(std::move(file_segments));
-}
-
-FileCache::FileSegmentCell * FileCache::addCell(
-    const Key & key, size_t offset, size_t size,
-    FileSegment::State state, const CreateFileSegmentSettings & settings,
-    std::lock_guard<std::mutex> & cache_lock)
-{
-    /// Create a file segment cell and put it in `files` map by [key][offset].
-
-    if (!size)
-        return nullptr; /// Empty files are not cached.
-
-    if (files[key].contains(offset))
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Cache cell already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}",
-            key.toString(), offset, size, dumpStructureUnlocked(key, cache_lock));
-
-    auto skip_or_download = [&]() -> FileSegmentPtr
-    {
-        FileSegment::State result_state = state;
-        if (state == FileSegment::State::EMPTY && enable_cache_hits_threshold)
-        {
-            auto record = stash_records.find({key, offset});
-
-            if (record == stash_records.end())
-            {
-                auto priority_iter = stash_priority->add(key, offset, 0, cache_lock);
-                stash_records.insert({{key, offset}, priority_iter});
-
-                if (stash_priority->getElementsNum(cache_lock) > max_stash_element_size)
-                {
-                    auto remove_priority_iter = stash_priority->getLowestPriorityWriteIterator(cache_lock);
-                    stash_records.erase({remove_priority_iter->key(), remove_priority_iter->offset()});
-                    remove_priority_iter->removeAndGetNext(cache_lock);
-                }
-
-                /// For segments that do not reach the download threshold,
-                /// we do not download them, but directly read them
-                result_state = FileSegment::State::SKIP_CACHE;
-            }
-            else
-            {
-                auto priority_iter = record->second;
-                priority_iter->use(cache_lock);
-
-                result_state = priority_iter->hits() >= enable_cache_hits_threshold
-                    ? FileSegment::State::EMPTY
-                    : FileSegment::State::SKIP_CACHE;
-            }
-        }
-
-        return std::make_shared<FileSegment>(offset, size, key, this, result_state, settings);
-    };
-
-    FileSegmentCell cell(skip_or_download(), this, cache_lock);
-    auto & offsets = files[key];
-
-    if (offsets.empty())
-    {
-        auto key_path = getPathInLocalCache(key);
-
-        if (!fs::exists(key_path))
-            fs::create_directories(key_path);
-    }
-
-    auto [it, inserted] = offsets.insert({offset, std::move(cell)});
-    if (!inserted)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Failed to insert into cache key: `{}`, offset: {}, size: {}",
-            key.toString(), offset, size);
-
-    return &(it->second);
-}
-
-FileSegmentPtr FileCache::createFileSegmentForDownload(
+FileSegmentsHolderPtr FileCache::set(
     const Key & key,
     size_t offset,
     size_t size,
-    const CreateFileSegmentSettings & settings,
-    std::lock_guard<std::mutex> & cache_lock)
+    const CreateFileSegmentSettings & settings)
 {
-#ifndef NDEBUG
-    assertCacheCorrectness(key, cache_lock);
-#endif
+    assertInitialized();
 
-    if (!settings.unbounded && size > max_file_segment_size)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Requested size exceeds max file segment size");
+    auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY);
+    FileSegment::Range range(offset, offset + size - 1);
 
-    auto * cell = getCell(key, offset, cache_lock);
-    if (cell)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Cache cell already exists for key `{}` and offset {}",
-            key.toString(), offset);
+    auto file_segments = getImpl(*locked_key, range);
+    if (!file_segments.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Having intersection with already existing cache");
 
-    cell = addCell(key, offset, size, FileSegment::State::EMPTY, settings, cache_lock);
-
-    if (!cell)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed to add a new cell for download");
-
-    return cell->file_segment;
-}
-
-bool FileCache::tryReserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
-{
-    auto query_context = enable_filesystem_query_cache_limit ? getCurrentQueryContext(cache_lock) : nullptr;
-    if (!query_context)
+    if (settings.unbounded)
     {
-        return tryReserveForMainList(key, offset, size, nullptr, cache_lock);
+        /// If the file is unbounded, we can create a single file_segment_metadata for it.
+        auto file_segment_metadata_it = addFileSegment(
+            *locked_key, offset, size, FileSegment::State::EMPTY, settings, nullptr);
+        file_segments = {file_segment_metadata_it->second->file_segment};
     }
-    /// The maximum cache capacity of the request is not reached, thus the
-    //// cache block is evicted from the main LRU queue by tryReserveForMainList().
-    else if (query_context->getCacheSize() + size <= query_context->getMaxCacheSize())
-    {
-        return tryReserveForMainList(key, offset, size, query_context, cache_lock);
-    }
-    /// When skip_download_if_exceeds_query_cache is true, there is no need
-    /// to evict old data, skip the cache and read directly from remote fs.
-    else if (query_context->isSkipDownloadIfExceed())
-    {
-        return false;
-    }
-    /// The maximum cache size of the query is reached, the cache will be
-    /// evicted from the history cache accessed by the current query.
     else
     {
-        size_t removed_size = 0;
-        size_t queue_size = main_priority->getElementsNum(cache_lock);
+        file_segments = splitRangeIntoFileSegments(
+            *locked_key, offset, size, FileSegment::State::EMPTY, settings);
+    }
 
-        auto * cell_for_reserve = getCell(key, offset, cache_lock);
+    return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
+}
 
-        struct Segment
+FileSegmentsHolderPtr FileCache::getOrSet(
+    const Key & key,
+    size_t offset,
+    size_t size,
+    const CreateFileSegmentSettings & settings)
+{
+    assertInitialized();
+
+    FileSegment::Range range(offset, offset + size - 1);
+
+    auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY);
+
+    /// Get all segments which intersect with the given range.
+    auto file_segments = getImpl(*locked_key, range);
+    if (file_segments.empty())
+    {
+        file_segments = splitRangeIntoFileSegments(
+            *locked_key, offset, size, FileSegment::State::EMPTY, settings);
+    }
+    else
+    {
+        fillHolesWithEmptyFileSegments(
+            *locked_key, file_segments, range, /* fill_with_detached */false, settings);
+    }
+
+    chassert(!file_segments.empty());
+    return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
+}
+
+FileSegmentsHolderPtr FileCache::get(const Key & key, size_t offset, size_t size)
+{
+    assertInitialized();
+
+    auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::RETURN_NULL);
+    if (locked_key)
+    {
+        FileSegment::Range range(offset, offset + size - 1);
+
+        /// Get all segments which intersect with the given range.
+        auto file_segments = getImpl(*locked_key, range);
+        if (!file_segments.empty())
         {
-            Key key;
-            size_t offset;
-            size_t size;
+            fillHolesWithEmptyFileSegments(
+                *locked_key, file_segments, range, /* fill_with_detached */true, CreateFileSegmentSettings{});
 
-            Segment(Key key_, size_t offset_, size_t size_)
-                : key(key_), offset(offset_), size(size_) {}
-        };
+            return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
+        }
+    }
 
-        std::vector<Segment> ghost;
-        std::vector<FileSegmentCell *> trash;
-        std::vector<FileSegmentCell *> to_evict;
+    return std::make_unique<FileSegmentsHolder>(FileSegments{
+        std::make_shared<FileSegment>(key, offset, size, FileSegment::State::DETACHED)});
+}
 
-        auto is_overflow = [&]
+KeyMetadata::iterator FileCache::addFileSegment(
+    LockedKey & locked_key,
+    size_t offset,
+    size_t size,
+    FileSegment::State state,
+    const CreateFileSegmentSettings & settings,
+    const CacheGuard::Lock * lock)
+{
+    /// Create a file_segment_metadata and put it in `files` map by [key][offset].
+
+    chassert(size > 0); /// Empty file segments in cache are not allowed.
+
+    const auto & key = locked_key.getKey();
+    if (locked_key.tryGetByOffset(offset))
+    {
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Cache entry already exists for key: `{}`, offset: {}, size: {}.",
+            key, offset, size);
+    }
+
+    FileSegment::State result_state;
+
+    /// `stash` - a queue of "stashed" key-offset pairs. Implements counting of
+    /// cache entries and allows caching only if cache hit threadhold is reached.
+    if (stash && state == FileSegment::State::EMPTY)
+    {
+        if (!lock)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Using stash requires cache_lock");
+
+        KeyAndOffset stash_key(key, offset);
+
+        auto record_it = stash->records.find(stash_key);
+        if (record_it == stash->records.end())
         {
-            return (max_size != 0 && main_priority->getCacheSize(cache_lock) + size - removed_size > max_size)
-            || (max_element_size != 0 && queue_size > max_element_size)
-            || (query_context->getCacheSize() + size - removed_size > query_context->getMaxCacheSize());
-        };
+            auto & stash_records = stash->records;
 
-        /// Select the cache from the LRU queue held by query for expulsion.
-        for (auto iter = query_context->getPriority()->getLowestPriorityWriteIterator(cache_lock); iter->valid();)
+            stash_records.emplace(
+                stash_key, stash->queue->add(locked_key.getKeyMetadata(), offset, 0, *lock));
+
+            if (stash->queue->getElementsCount(*lock) > stash->queue->getElementsLimit())
+                stash->queue->pop(*lock);
+
+            result_state = FileSegment::State::DETACHED;
+        }
+        else
         {
-            if (!is_overflow())
-                break;
+            result_state = record_it->second->use(*lock) >= stash->hits_threshold
+                ? FileSegment::State::EMPTY
+                : FileSegment::State::DETACHED;
+        }
+    }
+    else
+    {
+        result_state = state;
+    }
 
-            auto * cell = getCell(iter->key(), iter->offset(), cache_lock);
+    PriorityIterator cache_it;
+    if (state == FileSegment::State::DOWNLOADED)
+    {
+        cache_it = main_priority->add(locked_key.getKeyMetadata(), offset, size, *lock);
+    }
 
-            if (!cell)
-            {
-                /// The cache corresponding to this record may be swapped out by
-                /// other queries, so it has become invalid.
-                removed_size += iter->size();
-                ghost.push_back(Segment(iter->key(), iter->offset(), iter->size()));
-                /// next()
-                iter->removeAndGetNext(cache_lock);
-            }
-            else
-            {
-                size_t cell_size = cell->size();
-                assert(iter->size() == cell_size);
+    try
+    {
+        auto file_segment = std::make_shared<FileSegment>(
+            key, offset, size, result_state, settings, this, locked_key.getKeyMetadata(), cache_it);
+        auto file_segment_metadata = std::make_shared<FileSegmentMetadata>(std::move(file_segment));
 
-                if (cell->releasable())
-                {
-                    auto & file_segment = cell->file_segment;
+        auto [file_segment_metadata_it, inserted] = locked_key.getKeyMetadata()->emplace(offset, file_segment_metadata);
+        if (!inserted)
+        {
+            if (cache_it)
+                cache_it->remove(*lock);
 
-                    if (file_segment->isPersistent() && allow_persistent_files)
-                    {
-                        LOG_DEBUG(log, "File segment will not be removed, because it is persistent: {}", file_segment->getInfoForLog());
-                        continue;
-                    }
-
-                    std::lock_guard segment_lock(file_segment->mutex);
-
-                    switch (file_segment->download_state)
-                    {
-                        case FileSegment::State::DOWNLOADED:
-                        {
-                            to_evict.push_back(cell);
-                            break;
-                        }
-                        default:
-                        {
-                            trash.push_back(cell);
-                            break;
-                        }
-                    }
-                    removed_size += cell_size;
-                    --queue_size;
-                }
-
-                iter->next();
-            }
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "Failed to insert {}:{}: entry already exists", key, offset);
         }
 
-        auto remove_file_segment = [&](FileSegmentPtr file_segment, size_t file_segment_size)
-        {
-            query_context->remove(file_segment->key(), file_segment->offset(), file_segment_size, cache_lock);
-            remove(file_segment, cache_lock);
-        };
-
-        assert(trash.empty());
-        for (auto & cell : trash)
-        {
-            if (auto file_segment = cell->file_segment)
-                remove_file_segment(file_segment, cell->size());
-        }
-
-        for (auto & entry : ghost)
-            query_context->remove(entry.key, entry.offset, entry.size, cache_lock);
-
-        if (is_overflow())
-            return false;
-
-        if (cell_for_reserve)
-        {
-            auto queue_iterator = cell_for_reserve->queue_iterator;
-            if (queue_iterator)
-                queue_iterator->incrementSize(size, cache_lock);
-            else
-                cell_for_reserve->queue_iterator = main_priority->add(key, offset, size, cache_lock);
-        }
-
-        for (auto & cell : to_evict)
-        {
-            if (auto file_segment = cell->file_segment)
-                remove_file_segment(file_segment, cell->size());
-        }
-
-        query_context->reserve(key, offset, size, cache_lock);
-        return true;
+        return file_segment_metadata_it;
+    }
+    catch (...)
+    {
+        if (cache_it)
+            cache_it->remove(*lock);
+        throw;
     }
 }
 
-bool FileCache::tryReserveForMainList(
-    const Key & key, size_t offset, size_t size, QueryContextPtr query_context, std::lock_guard<std::mutex> & cache_lock)
+bool FileCache::tryReserve(FileSegment & file_segment, size_t size)
 {
-    auto removed_size = 0;
-    size_t queue_size = main_priority->getElementsNum(cache_lock);
-    assert(queue_size <= max_element_size);
+    assertInitialized();
+    auto cache_lock = cache_guard.lock();
 
-    /// Since space reservation is incremental, cache cell already exists if it's state is EMPTY.
-    /// And it cache cell does not exist on startup -- as we first check for space and then add a cell.
-    auto * cell_for_reserve = getCell(key, offset, cache_lock);
+    /// In case of per query cache limit (by default disabled), we add/remove entries from both
+    /// (main_priority and query_priority) priority queues, but iterate entries in order of query_priority,
+    /// while checking the limits in both.
+    Priority * query_priority = nullptr;
 
-    /// A cell acquires a LRUQueue iterator on first successful space reservation attempt.
-    /// cell_for_reserve can be nullptr here when we call tryReserve() from loadCacheInfoIntoMemory().
-    if (!cell_for_reserve || !cell_for_reserve->queue_iterator)
+    auto query_context = query_limit ? query_limit->tryGetQueryContext(cache_lock) : nullptr;
+    if (query_context)
+    {
+        query_priority = &query_context->getPriority();
+
+        const bool query_limit_exceeded = query_priority->getSize(cache_lock) + size > query_priority->getSizeLimit();
+        if (query_limit_exceeded && !query_context->recacheOnFileCacheQueryLimitExceeded())
+            return false;
+    }
+
+    size_t queue_size = main_priority->getElementsCount(cache_lock);
+    chassert(queue_size <= main_priority->getElementsLimit());
+
+    /// A file_segment_metadata acquires a LRUQueue iterator on first successful space reservation attempt.
+    auto queue_iterator = file_segment.getQueueIterator();
+    if (queue_iterator)
+        chassert(file_segment.getReservedSize() > 0);
+    else
         queue_size += 1;
 
-    auto is_overflow = [&]
+    size_t removed_size = 0;
+
+    class EvictionCandidates final : public std::vector<FileSegmentMetadataPtr>
     {
-        /// max_size == 0 means unlimited cache size, max_element_size means unlimited number of cache elements.
-        return (max_size != 0 && main_priority->getCacheSize(cache_lock) + size - removed_size > max_size)
-            || (max_element_size != 0 && queue_size > max_element_size);
+    public:
+        explicit EvictionCandidates(KeyMetadataPtr key_metadata_) : key_metadata(key_metadata_) {}
+
+        KeyMetadata & getMetadata() { return *key_metadata; }
+
+        void add(FileSegmentMetadataPtr candidate)
+        {
+            candidate->removal_candidate = true;
+            push_back(candidate);
+        }
+
+        ~EvictionCandidates()
+        {
+            for (const auto & candidate : *this)
+                candidate->removal_candidate = false;
+        }
+
+    private:
+        KeyMetadataPtr key_metadata;
     };
 
-    std::vector<FileSegmentCell *> to_evict;
-    std::vector<FileSegmentCell *> trash;
+    std::unordered_map<Key, EvictionCandidates> to_delete;
 
-    for (auto it = main_priority->getLowestPriorityReadIterator(cache_lock); it->valid(); it->next())
+    auto iterate_func = [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
     {
-        const auto & entry_key = it->key();
-        auto entry_offset = it->offset();
+        chassert(segment_metadata->file_segment->assertCorrectness());
 
-        if (!is_overflow())
-            break;
+        const bool is_persistent = allow_persistent_files && segment_metadata->file_segment->isPersistent();
+        const bool releasable = segment_metadata->releasable() && !is_persistent;
 
-        auto * cell = getCell(entry_key, entry_offset, cache_lock);
-        if (!cell)
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR,
-                "Cache became inconsistent. Key: {}, offset: {}",
-                key.toString(), offset);
-
-        size_t cell_size = cell->size();
-        assert(it->size() == cell_size);
-
-        /// It is guaranteed that cell is not removed from cache as long as
-        /// pointer to corresponding file segment is hold by any other thread.
-
-        if (cell->releasable())
+        if (releasable)
         {
-            auto & file_segment = cell->file_segment;
-
-            if (file_segment->isPersistent() && allow_persistent_files)
-            {
-                LOG_DEBUG(log, "File segment will not be removed, because it is persistent: {}", file_segment->getInfoForLog());
-                continue;
-            }
-
-            std::lock_guard segment_lock(file_segment->mutex);
-
-            switch (file_segment->download_state)
-            {
-                case FileSegment::State::DOWNLOADED:
-                {
-                    /// Cell will actually be removed only if
-                    /// we managed to reserve enough space.
-
-                    to_evict.push_back(cell);
-                    break;
-                }
-                default:
-                {
-                    trash.push_back(cell);
-                    break;
-                }
-            }
-
-            removed_size += cell_size;
+            removed_size += segment_metadata->size();
             --queue_size;
+
+            auto segment = segment_metadata->file_segment;
+            if (segment->state() == FileSegment::State::DOWNLOADED)
+            {
+                const auto & key = segment->key();
+                auto it = to_delete.find(key);
+                if (it == to_delete.end())
+                    it = to_delete.emplace(key, locked_key.getKeyMetadata()).first;
+                it->second.add(segment_metadata);
+                return PriorityIterationResult::CONTINUE;
+            }
+
+            /// TODO: we can resize if partially downloaded instead.
+            locked_key.removeFileSegment(segment->offset(), segment->lock());
+            return PriorityIterationResult::REMOVE_AND_CONTINUE;
+        }
+        return PriorityIterationResult::CONTINUE;
+    };
+
+    if (query_priority)
+    {
+        auto is_query_priority_overflow = [&]
+        {
+            const size_t new_size = query_priority->getSize(cache_lock) + size - removed_size;
+            return new_size > query_priority->getSizeLimit();
+        };
+
+        query_priority->iterate(
+            [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
+            { return is_query_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; },
+            cache_lock);
+
+        if (is_query_priority_overflow())
+            return false;
+    }
+
+    auto is_main_priority_overflow = [&]
+    {
+        /// max_size == 0 means unlimited cache size,
+        /// max_element_size means unlimited number of cache elements.
+        return (main_priority->getSizeLimit() != 0 && main_priority->getSize(cache_lock) + size - removed_size > main_priority->getSizeLimit())
+            || (main_priority->getElementsLimit() != 0 && queue_size > main_priority->getElementsLimit());
+    };
+
+    main_priority->iterate(
+        [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
+        { return is_main_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; },
+        cache_lock);
+
+    if (is_main_priority_overflow())
+        return false;
+
+    if (!file_segment.getKeyMetadata()->createBaseDirectory())
+        return false;
+
+    for (auto & [current_key, deletion_info] : to_delete)
+    {
+        auto locked_key = deletion_info.getMetadata().tryLock();
+        if (!locked_key)
+            continue; /// key could become invalid after we released the key lock above, just skip it.
+
+        for (auto it = deletion_info.begin(); it != deletion_info.end();)
+        {
+            chassert((*it)->releasable());
+
+            auto segment = (*it)->file_segment;
+            locked_key->removeFileSegment(segment->offset(), segment->lock());
+            segment->getQueueIterator()->remove(cache_lock);
+
+            if (query_context)
+                query_context->remove(current_key, segment->offset(), cache_lock);
+
+            it = deletion_info.erase(it);
         }
     }
 
-    /// This case is very unlikely, can happen in case of exception from
-    /// file_segment->complete(), which would be a logical error.
-    assert(trash.empty());
-    for (auto & cell : trash)
+    /// queue_iteratir is std::nullopt here if no space has been reserved yet, a file_segment_metadata
+    /// acquires queue iterator on first successful space reservation attempt.
+    /// If queue iterator already exists, we need to update the size after each space reservation.
+    if (queue_iterator)
     {
-        if (auto file_segment = cell->file_segment)
-            remove(file_segment, cache_lock);
+        queue_iterator->updateSize(size);
     }
-
-    if (is_overflow())
-        return false;
-
-    /// cache cell is nullptr on server startup because we first check for space and then add a cell.
-    if (cell_for_reserve)
+    else
     {
-        /// queue_iteratir is std::nullopt here if no space has been reserved yet, a cache cell
-        /// acquires queue iterator on first successful space reservation attempt.
-        /// If queue iterator already exists, we need to update the size after each space reservation.
-        auto queue_iterator = cell_for_reserve->queue_iterator;
-        if (queue_iterator)
-            queue_iterator->incrementSize(size, cache_lock);
-        else
-            cell_for_reserve->queue_iterator = main_priority->add(key, offset, size, cache_lock);
+        /// Space reservation is incremental, so file_segment_metadata is created first (with state empty),
+        /// and getQueueIterator() is assigned on first space reservation attempt.
+        file_segment.setQueueIterator(main_priority->add(
+            file_segment.getKeyMetadata(), file_segment.offset(), size, cache_lock));
     }
 
-    for (auto & cell : to_evict)
-    {
-        if (auto file_segment = cell->file_segment)
-            remove(file_segment, cache_lock);
-    }
-
-    if (main_priority->getCacheSize(cache_lock) > (1ull << 63))
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. There must be a bug");
-
     if (query_context)
-        query_context->reserve(key, offset, size, cache_lock);
+    {
+        auto query_queue_it = query_context->tryGet(file_segment.key(), file_segment.offset(), cache_lock);
+        if (query_queue_it)
+            query_queue_it->updateSize(size);
+        else
+            query_context->add(file_segment, cache_lock);
+    }
 
+    if (main_priority->getSize(cache_lock) > (1ull << 63))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. There must be a bug");
+
+    file_segment.reserved_size += size;
     return true;
 }
 
-void FileCache::removeIfExists(const Key & key)
+void FileCache::removeKeyIfExists(const Key & key)
 {
-    std::lock_guard cache_lock(mutex);
+    assertInitialized();
 
-    assertInitialized(cache_lock);
-
-    auto it = files.find(key);
-    if (it == files.end())
+    auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::RETURN_NULL);
+    if (!locked_key)
         return;
 
-    auto & offsets = it->second;
-
-    std::vector<FileSegmentCell *> to_remove;
-    to_remove.reserve(offsets.size());
-
-    for (auto & [offset, cell] : offsets)
-        to_remove.push_back(&cell);
-
-    bool some_cells_were_skipped = false;
-    for (auto & cell : to_remove)
-    {
-        /// In ordinary case we remove data from cache when it's not used by anyone.
-        /// But if we have multiple replicated zero-copy tables on the same server
-        /// it became possible to start removing something from cache when it is used
-        /// by other "zero-copy" tables. That is why it's not an error.
-        if (!cell->releasable())
-        {
-            some_cells_were_skipped = true;
-            continue;
-        }
-
-        auto file_segment = cell->file_segment;
-        if (file_segment)
-        {
-            std::unique_lock<std::mutex> segment_lock(file_segment->mutex);
-            file_segment->detach(cache_lock, segment_lock);
-            remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
-        }
-    }
-
-    if (!some_cells_were_skipped)
-    {
-        files.erase(key);
-        removeKeyDirectoryIfExists(key, cache_lock);
-    }
+    /// In ordinary case we remove data from cache when it's not used by anyone.
+    /// But if we have multiple replicated zero-copy tables on the same server
+    /// it became possible to start removing something from cache when it is used
+    /// by other "zero-copy" tables. That is why it's not an error.
+    locked_key->removeAllReleasable();
 }
 
-void FileCache::removeIfReleasable()
+void FileCache::removeAllReleasable()
 {
-    /// Try remove all cached files by cache_base_path.
+    assertInitialized();
+
     /// Only releasable file segments are evicted.
     /// `remove_persistent_files` defines whether non-evictable by some criteria files
     /// (they do not comply with the cache eviction policy) should also be removed.
 
-    std::lock_guard cache_lock(mutex);
+    auto lock = cache_guard.lock();
 
-    std::vector<FileSegmentPtr> to_remove;
-    for (auto it = main_priority->getLowestPriorityReadIterator(cache_lock); it->valid(); it->next())
+    main_priority->iterate([&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
     {
-        const auto & key = it->key();
-        auto offset = it->offset();
-
-        auto * cell = getCell(key, offset, cache_lock);
-        if (!cell)
+        if (segment_metadata->releasable())
         {
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR,
-                "Cache is in inconsistent state: LRU queue contains entries with no cache cell");
+            auto file_segment = segment_metadata->file_segment;
+            locked_key.removeFileSegment(file_segment->offset(), file_segment->lock());
+            return PriorityIterationResult::REMOVE_AND_CONTINUE;
         }
+        return PriorityIterationResult::CONTINUE;
+    }, lock);
 
-        if (cell->releasable())
-        {
-            auto file_segment = cell->file_segment;
-
-            if (file_segment)
-            {
-                to_remove.emplace_back(file_segment);
-            }
-        }
-    }
-
-    for (auto & file_segment : to_remove)
+    if (stash)
     {
-        std::unique_lock segment_lock(file_segment->mutex);
-        file_segment->detach(cache_lock, segment_lock);
-        remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
-    }
-
-    /// Remove all access information.
-    stash_records.clear();
-    stash_priority->removeAll(cache_lock);
-
-#ifndef NDEBUG
-    assertCacheCorrectness(cache_lock);
-#endif
-}
-
-void FileCache::remove(FileSegmentPtr file_segment, std::lock_guard<std::mutex> & cache_lock)
-{
-    std::unique_lock segment_lock(file_segment->mutex);
-    remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
-}
-
-void FileCache::remove(
-    Key key, size_t offset,
-    std::lock_guard<std::mutex> & cache_lock, std::unique_lock<std::mutex> & /* segment_lock */)
-{
-    LOG_DEBUG(log, "Remove from cache. Key: {}, offset: {}", key.toString(), offset);
-
-    String cache_file_path;
-
-    {
-        auto * cell = getCell(key, offset, cache_lock);
-        if (!cell)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "No cache cell for key: {}, offset: {}", key.toString(), offset);
-
-        if (cell->queue_iterator)
-        {
-            cell->queue_iterator->removeAndGetNext(cache_lock);
-        }
-
-        cache_file_path = cell->file_segment->getPathInLocalCache();
-    }
-
-    auto & offsets = files[key];
-    offsets.erase(offset);
-
-    if (fs::exists(cache_file_path))
-    {
-        try
-        {
-            fs::remove(cache_file_path);
-
-            if (is_initialized && offsets.empty())
-            {
-                files.erase(key);
-                removeKeyDirectoryIfExists(key, cache_lock);
-            }
-        }
-        catch (...)
-        {
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR,
-                "Removal of cached file failed. Key: {}, offset: {}, path: {}, error: {}",
-                key.toString(), offset, cache_file_path, getCurrentExceptionMessage(false));
-        }
+        /// Remove all access information.
+        stash->records.clear();
+        stash->queue->removeAll(lock);
     }
 }
 
-void FileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock)
+void FileCache::loadMetadata()
 {
-    Key key;
+    auto lock = cache_guard.lock();
+
     UInt64 offset = 0;
     size_t size = 0;
-    std::vector<std::pair<IFileCachePriority::WriteIterator, std::weak_ptr<FileSegment>>> queue_entries;
+    std::vector<std::pair<PriorityIterator, std::weak_ptr<FileSegment>>> queue_entries;
 
     /// cache_base_path / key_prefix / key / offset
-    if (!files.empty())
+    if (!metadata.empty())
+    {
         throw Exception(
             ErrorCodes::LOGICAL_ERROR,
             "Cache initialization is partially made. "
             "This can be a result of a failed first attempt to initialize cache. "
             "Please, check log for error messages");
+    }
 
-    fs::directory_iterator key_prefix_it{cache_base_path};
-    for (; key_prefix_it != fs::directory_iterator(); ++key_prefix_it)
+    size_t total_size = 0;
+    for (auto key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()};
+         key_prefix_it != fs::directory_iterator();)
     {
-        if (!key_prefix_it->is_directory())
+        const fs::path key_prefix_directory = key_prefix_it->path();
+        key_prefix_it++;
+
+        if (!fs::is_directory(key_prefix_directory))
         {
-            if (key_prefix_it->path().filename() != "status")
-                LOG_DEBUG(log, "Unexpected file {} (not a directory), will skip it", key_prefix_it->path().string());
+            if (key_prefix_directory.filename() != "status")
+            {
+                LOG_WARNING(
+                    log, "Unexpected file {} (not a directory), will skip it",
+                    key_prefix_directory.string());
+            }
             continue;
         }
 
-        fs::directory_iterator key_it{key_prefix_it->path()};
-        for (; key_it != fs::directory_iterator(); ++key_it)
+        if (fs::is_empty(key_prefix_directory))
         {
-            if (!key_it->is_directory())
+            LOG_DEBUG(log, "Removing empty key prefix directory: {}", key_prefix_directory.string());
+            fs::remove(key_prefix_directory);
+            continue;
+        }
+
+        for (fs::directory_iterator key_it{key_prefix_directory}; key_it != fs::directory_iterator();)
+        {
+            const fs::path key_directory = key_it->path();
+            ++key_it;
+
+            if (!fs::is_directory(key_directory))
             {
-                LOG_DEBUG(log, "Unexpected file: {}. Expected a directory", key_it->path().string());
+                LOG_DEBUG(
+                    log,
+                    "Unexpected file: {} (not a directory). Expected a directory",
+                    key_directory.string());
                 continue;
             }
 
-            key = Key(unhexUInt<UInt128>(key_it->path().filename().string().data()));
-            fs::directory_iterator offset_it{key_it->path()};
-            for (; offset_it != fs::directory_iterator(); ++offset_it)
+            if (fs::is_empty(key_directory))
+            {
+                LOG_DEBUG(log, "Removing empty key directory: {}", key_directory.string());
+                fs::remove(key_directory);
+                continue;
+            }
+
+            const auto key = Key(unhexUInt<UInt128>(key_directory.filename().string().data()));
+            auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true);
+
+            for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it)
             {
                 auto offset_with_suffix = offset_it->path().filename().string();
                 auto delim_pos = offset_with_suffix.find('_');
@@ -1054,7 +827,9 @@ void FileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock
                 {
                     parsed = tryParse<UInt64>(offset, offset_with_suffix.substr(0, delim_pos));
                     if (offset_with_suffix.substr(delim_pos+1) == "persistent")
+                    {
                         segment_kind = FileSegmentKind::Persistent;
+                    }
                     if (offset_with_suffix.substr(delim_pos+1) == "temporary")
                     {
                         fs::remove(offset_it->path());
@@ -1075,21 +850,27 @@ void FileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock
                     continue;
                 }
 
-                if (tryReserve(key, offset, size, cache_lock))
+                if ((main_priority->getSizeLimit() == 0 || main_priority->getSize(lock) + size <= main_priority->getSizeLimit())
+                    && (main_priority->getElementsLimit() == 0 || main_priority->getElementsCount(lock) + 1 <= main_priority->getElementsLimit()))
                 {
-                    auto * cell = addCell(
-                        key, offset, size, FileSegment::State::DOWNLOADED,
-                        CreateFileSegmentSettings(segment_kind), cache_lock);
+                    auto file_segment_metadata_it = addFileSegment(
+                        *locked_key, offset, size, FileSegment::State::DOWNLOADED, CreateFileSegmentSettings(segment_kind), &lock);
 
-                    if (cell)
-                        queue_entries.emplace_back(cell->queue_iterator, cell->file_segment);
+                    const auto & file_segment_metadata = file_segment_metadata_it->second;
+                    chassert(file_segment_metadata->file_segment->assertCorrectness());
+                    total_size += size;
+
+                    queue_entries.emplace_back(
+                        file_segment_metadata->getQueueIterator(),
+                        file_segment_metadata->file_segment);
                 }
                 else
                 {
                     LOG_WARNING(
                         log,
-                        "Cache capacity changed (max size: {}, available: {}), cached file `{}` does not fit in cache anymore (size: {})",
-                        max_size, getAvailableCacheSizeUnlocked(cache_lock), key_it->path().string(), size);
+                        "Cache capacity changed (max size: {}, used: {}), "
+                        "cached file `{}` does not fit in cache anymore (size: {})",
+                        main_priority->getSizeLimit(), main_priority->getSize(lock), key_directory.string(), size);
 
                     fs::remove(offset_it->path());
                 }
@@ -1097,252 +878,142 @@ void FileCache::loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock
         }
     }
 
-    /// Shuffle cells to have random order in LRUQueue as at startup all cells have the same priority.
+    chassert(total_size == main_priority->getSize(lock));
+    chassert(total_size <= main_priority->getSizeLimit());
+
+    /// Shuffle file_segment_metadatas to have random order in LRUQueue
+    /// as at startup all file_segment_metadatas have the same priority.
     pcg64 generator(randomSeed());
     std::shuffle(queue_entries.begin(), queue_entries.end(), generator);
-    for (const auto & [it, file_segment] : queue_entries)
+    for (auto & [it, file_segment] : queue_entries)
     {
-        /// Cell cache size changed and, for example, 1st file segment fits into cache
+        /// Cache size changed and, for example, 1st file segment fits into cache
         /// and 2nd file segment will fit only if first was evicted, then first will be removed and
-        /// cell is nullptr here.
+        /// file_segment_metadata is nullptr here.
         if (file_segment.expired())
             continue;
 
-        it->use(cache_lock);
+        it->use(lock);
     }
+}
+
+FileCache::~FileCache()
+{
+    deactivateBackgroundOperations();
+}
+
+void FileCache::deactivateBackgroundOperations()
+{
+    if (cleanup_task)
+        cleanup_task->deactivate();
+}
+
+void FileCache::cleanup()
+{
+    metadata.doCleanup();
+}
+
+void FileCache::cleanupThreadFunc()
+{
 #ifndef NDEBUG
-    assertCacheCorrectness(cache_lock);
+    assertCacheCorrectness();
 #endif
-}
 
-void FileCache::reduceSizeToDownloaded(
-    const Key & key, size_t offset,
-    std::lock_guard<std::mutex> & cache_lock, std::unique_lock<std::mutex> & segment_lock)
-{
-    /**
-     * In case file was partially downloaded and it's download cannot be continued
-     * because of no space left in cache, we need to be able to cut cell's size to downloaded_size.
-     */
-
-    auto * cell = getCell(key, offset, cache_lock);
-
-    if (!cell)
+    try
     {
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "No cell found for key: {}, offset: {}",
-            key.toString(), offset);
+        cleanup();
+    }
+    catch (...)
+    {
+        tryLogCurrentException(__PRETTY_FUNCTION__);
     }
 
-    const auto & file_segment = cell->file_segment;
-
-    size_t downloaded_size = file_segment->downloaded_size;
-    size_t full_size = file_segment->range().size();
-
-    if (downloaded_size == full_size)
-    {
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Nothing to reduce, file segment fully downloaded: {}",
-            file_segment->getInfoForLogUnlocked(segment_lock));
-    }
-
-    CreateFileSegmentSettings create_settings(file_segment->getKind());
-
-    cell->file_segment = std::make_shared<FileSegment>(
-        offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED, create_settings);
-
-    assert(file_segment->reserved_size == downloaded_size);
+    cleanup_task->scheduleAfter(delayed_cleanup_interval_ms);
 }
 
-bool FileCache::isLastFileSegmentHolder(
-    const Key & key, size_t offset,
-    std::lock_guard<std::mutex> & cache_lock, std::unique_lock<std::mutex> & /* segment_lock */)
+FileSegmentsHolderPtr FileCache::getSnapshot()
 {
-    auto * cell = getCell(key, offset, cache_lock);
-
-    if (!cell)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "No cell found for key: {}, offset: {}", key.toString(), offset);
-
-    /// The caller of this method is last file segment holder if use count is 2 (the second pointer is cache itself)
-    return cell->file_segment.use_count() == 2;
-}
-
-FileSegments FileCache::getSnapshot() const
-{
-    std::lock_guard cache_lock(mutex);
+    assertInitialized();
+#ifndef NDEBUG
+    assertCacheCorrectness();
+#endif
 
     FileSegments file_segments;
-
-    for (const auto & [key, cells_by_offset] : files)
+    metadata.iterate([&](const LockedKey & locked_key)
     {
-        for (const auto & [offset, cell] : cells_by_offset)
-            file_segments.push_back(FileSegment::getSnapshot(cell.file_segment, cache_lock));
-    }
-    return file_segments;
+        for (const auto & [_, file_segment_metadata] : locked_key)
+            file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment));
+    });
+    return std::make_unique<FileSegmentsHolder>(std::move(file_segments), /* complete_on_dtor */false);
+}
+
+FileSegmentsHolderPtr FileCache::getSnapshot(const Key & key)
+{
+    FileSegments file_segments;
+    auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW);
+    for (const auto & [_, file_segment_metadata] : *locked_key->getKeyMetadata())
+        file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment));
+    return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
+}
+
+FileSegmentsHolderPtr FileCache::dumpQueue()
+{
+    assertInitialized();
+
+    FileSegments file_segments;
+    main_priority->iterate([&](LockedKey &, FileSegmentMetadataPtr segment_metadata)
+    {
+        file_segments.push_back(FileSegment::getSnapshot(segment_metadata->file_segment));
+        return PriorityIterationResult::CONTINUE;
+    }, cache_guard.lock());
+
+    return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
 }
 
 std::vector<String> FileCache::tryGetCachePaths(const Key & key)
 {
-    std::lock_guard cache_lock(mutex);
+    assertInitialized();
+
+    auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::RETURN_NULL);
+    if (!locked_key)
+        return {};
 
     std::vector<String> cache_paths;
 
-    const auto & cells_by_offset = files[key];
-
-    for (const auto & [offset, cell] : cells_by_offset)
+    for (const auto & [offset, file_segment_metadata] : *locked_key->getKeyMetadata())
     {
-        if (cell.file_segment->state() == FileSegment::State::DOWNLOADED)
-            cache_paths.push_back(getPathInLocalCache(key, offset, cell.file_segment->getKind()));
+        if (file_segment_metadata->file_segment->state() == FileSegment::State::DOWNLOADED)
+            cache_paths.push_back(metadata.getPathInLocalCache(key, offset, file_segment_metadata->file_segment->getKind()));
     }
-
     return cache_paths;
 }
 
 size_t FileCache::getUsedCacheSize() const
 {
-    std::lock_guard cache_lock(mutex);
-    return getUsedCacheSizeUnlocked(cache_lock);
-}
-
-size_t FileCache::getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
-{
-    return main_priority->getCacheSize(cache_lock);
-}
-
-size_t FileCache::getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const
-{
-    return max_size - getUsedCacheSizeUnlocked(cache_lock);
+    return main_priority->getSize(cache_guard.lock());
 }
 
 size_t FileCache::getFileSegmentsNum() const
 {
-    std::lock_guard cache_lock(mutex);
-    return getFileSegmentsNumUnlocked(cache_lock);
+    return main_priority->getElementsCount(cache_guard.lock());
 }
 
-size_t FileCache::getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const
+void FileCache::assertCacheCorrectness()
 {
-    return main_priority->getElementsNum(cache_lock);
-}
-
-FileCache::FileSegmentCell::FileSegmentCell(
-    FileSegmentPtr file_segment_,
-    FileCache * cache,
-    std::lock_guard<std::mutex> & cache_lock)
-    : file_segment(file_segment_)
-{
-    /**
-     * Cell can be created with either DOWNLOADED or EMPTY file segment's state.
-     * File segment acquires DOWNLOADING state and creates LRUQueue iterator on first
-     * successful getOrSetDownaloder call.
-     */
-
-    switch (file_segment->download_state)
+    auto lock = cache_guard.lock();
+    main_priority->iterate([&](LockedKey &, FileSegmentMetadataPtr segment_metadata)
     {
-        case FileSegment::State::DOWNLOADED:
-        {
-            queue_iterator = cache->main_priority->add(
-                file_segment->key(), file_segment->offset(), file_segment->range().size(), cache_lock);
-            break;
-        }
-        case FileSegment::State::SKIP_CACHE:
-        case FileSegment::State::EMPTY:
-        case FileSegment::State::DOWNLOADING:
-        {
-            break;
-        }
-        default:
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR,
-                "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state, got: {}",
-                FileSegment::stateToString(file_segment->download_state));
-    }
-}
-
-String FileCache::dumpStructure(const Key & key)
-{
-    std::lock_guard cache_lock(mutex);
-    return dumpStructureUnlocked(key, cache_lock);
-}
-
-String FileCache::dumpStructureUnlocked(const Key & key, std::lock_guard<std::mutex> &)
-{
-    WriteBufferFromOwnString result;
-    const auto & cells_by_offset = files[key];
-
-    for (const auto & [offset, cell] : cells_by_offset)
-        result << cell.file_segment->getInfoForLog() << "\n";
-
-    return result.str();
-}
-
-void FileCache::assertCacheCellsCorrectness(
-    const FileSegmentsByOffset & cells_by_offset, [[maybe_unused]] std::lock_guard<std::mutex> & cache_lock)
-{
-    for (const auto & [_, cell] : cells_by_offset)
-    {
-        const auto & file_segment = cell.file_segment;
-        file_segment->assertCorrectness();
-
-        if (file_segment->reserved_size != 0)
-        {
-            assert(cell.queue_iterator);
-            assert(main_priority->contains(file_segment->key(), file_segment->offset(), cache_lock));
-        }
-    }
-}
-
-void FileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock)
-{
-    assertCacheCellsCorrectness(files[key], cache_lock);
-    assertPriorityCorrectness(cache_lock);
-}
-
-void FileCache::assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock)
-{
-    for (const auto & [key, cells_by_offset] : files)
-        assertCacheCellsCorrectness(files[key], cache_lock);
-    assertPriorityCorrectness(cache_lock);
-}
-
-void FileCache::assertPriorityCorrectness(std::lock_guard<std::mutex> & cache_lock)
-{
-    [[maybe_unused]] size_t total_size = 0;
-    for (auto it = main_priority->getLowestPriorityReadIterator(cache_lock); it->valid(); it->next())
-    {
-        const auto & key = it->key();
-        auto offset = it->offset();
-        auto size = it->size();
-
-        auto * cell = getCell(key, offset, cache_lock);
-        if (!cell)
-        {
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR,
-                "Cache is in inconsistent state: LRU queue contains entries with no cache cell (assertCorrectness())");
-        }
-
-        if (cell->size() != size)
-        {
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR,
-                "Expected {} == {} size ({})",
-                cell->size(), size, cell->file_segment->getInfoForLog());
-        }
-
-        total_size += size;
-    }
-
-    assert(total_size == main_priority->getCacheSize(cache_lock));
-    assert(main_priority->getCacheSize(cache_lock) <= max_size);
-    assert(main_priority->getElementsNum(cache_lock) <= max_element_size);
+        const auto & file_segment = *segment_metadata->file_segment;
+        UNUSED(file_segment);
+        chassert(file_segment.assertCorrectness());
+        return PriorityIterationResult::CONTINUE;
+    }, lock);
 }
 
 FileCache::QueryContextHolder::QueryContextHolder(
     const String & query_id_,
     FileCache * cache_,
-    FileCache::QueryContextPtr context_)
+    FileCacheQueryLimit::QueryContextPtr context_)
     : query_id(query_id_)
     , cache(cache_)
     , context(context_)
@@ -1354,115 +1025,21 @@ FileCache::QueryContextHolder::~QueryContextHolder()
     /// If only the query_map and the current holder hold the context_query,
     /// the query has been completed and the query_context is released.
     if (context && context.use_count() == 2)
-        cache->removeQueryContext(query_id);
-}
-
-FileCache::QueryContextPtr FileCache::getCurrentQueryContext(std::lock_guard<std::mutex> & cache_lock)
-{
-    if (!isQueryInitialized())
-        return nullptr;
-
-    return getQueryContext(std::string(CurrentThread::getQueryId()), cache_lock);
-}
-
-FileCache::QueryContextPtr FileCache::getQueryContext(const String & query_id, std::lock_guard<std::mutex> & /* cache_lock */)
-{
-    auto query_iter = query_map.find(query_id);
-    return (query_iter == query_map.end()) ? nullptr : query_iter->second;
-}
-
-void FileCache::removeQueryContext(const String & query_id)
-{
-    std::lock_guard cache_lock(mutex);
-    auto query_iter = query_map.find(query_id);
-
-    if (query_iter == query_map.end())
     {
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Attempt to release query context that does not exist (query_id: {})",
-            query_id);
+        auto lock = cache->cache_guard.lock();
+        cache->query_limit->removeQueryContext(query_id, lock);
     }
-
-    query_map.erase(query_iter);
 }
 
-FileCache::QueryContextPtr FileCache::getOrSetQueryContext(
-    const String & query_id, const ReadSettings & settings, std::lock_guard<std::mutex> & cache_lock)
+FileCache::QueryContextHolderPtr FileCache::getQueryContextHolder(
+    const String & query_id, const ReadSettings & settings)
 {
-    if (query_id.empty())
-        return nullptr;
-
-    auto context = getQueryContext(query_id, cache_lock);
-    if (context)
-        return context;
-
-    auto query_context = std::make_shared<QueryContext>(settings.filesystem_cache_max_download_size, settings.skip_download_if_exceeds_query_cache);
-    auto query_iter = query_map.emplace(query_id, query_context).first;
-    return query_iter->second;
-}
-
-FileCache::QueryContextHolder FileCache::getQueryContextHolder(const String & query_id, const ReadSettings & settings)
-{
-    std::lock_guard cache_lock(mutex);
-
-    if (!enable_filesystem_query_cache_limit || settings.filesystem_cache_max_download_size == 0)
+    if (!query_limit || settings.filesystem_cache_max_download_size == 0)
         return {};
 
-    /// if enable_filesystem_query_cache_limit is true, and filesystem_cache_max_download_size large than zero,
-    /// we create context query for current query.
-    auto context = getOrSetQueryContext(query_id, settings, cache_lock);
-    return QueryContextHolder(query_id, this, context);
-}
-
-void FileCache::QueryContext::remove(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
-{
-    if (cache_size < size)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Deleted cache size exceeds existing cache size");
-
-    if (!skip_download_if_exceeds_query_cache)
-    {
-        auto record = records.find({key, offset});
-        if (record != records.end())
-        {
-            record->second->removeAndGetNext(cache_lock);
-            records.erase({key, offset});
-        }
-    }
-    cache_size -= size;
-}
-
-void FileCache::QueryContext::reserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
-{
-    if (cache_size + size > max_cache_size)
-    {
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Reserved cache size exceeds the remaining cache size (key: {}, offset: {})",
-            key.toString(), offset);
-    }
-
-    if (!skip_download_if_exceeds_query_cache)
-    {
-        auto record = records.find({key, offset});
-        if (record == records.end())
-        {
-            auto queue_iter = priority->add(key, offset, 0, cache_lock);
-            record = records.insert({{key, offset}, queue_iter}).first;
-        }
-        record->second->incrementSize(size, cache_lock);
-    }
-    cache_size += size;
-}
-
-void FileCache::QueryContext::use(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock)
-{
-    if (skip_download_if_exceeds_query_cache)
-        return;
-
-    auto record = records.find({key, offset});
-    if (record != records.end())
-        record->second->use(cache_lock);
+    auto lock = cache_guard.lock();
+    auto context = query_limit->getOrSetQueryContext(query_id, settings, lock);
+    return std::make_unique<QueryContextHolder>(query_id, this, std::move(context));
 }
 
 }
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 83435b67562..2ceb6825a54 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -9,43 +9,52 @@
 #include <unordered_map>
 #include <unordered_set>
 #include <boost/functional/hash.hpp>
-#include <boost/noncopyable.hpp>
 
-#include <Core/Types.h>
-#include <Common/ThreadPool.h>
 #include <IO/ReadSettings.h>
-#include <Interpreters/Cache/IFileCachePriority.h>
-#include <Interpreters/Cache/FileCacheKey.h>
+
+#include <Core/BackgroundSchedulePool.h>
+#include <Interpreters/Cache/LRUFileCachePriority.h>
 #include <Interpreters/Cache/FileCache_fwd.h>
 #include <Interpreters/Cache/FileSegment.h>
+#include <Interpreters/Cache/Metadata.h>
+#include <Interpreters/Cache/QueryLimit.h>
+#include <Interpreters/Cache/FileCache_fwd_internal.h>
+#include <filesystem>
 
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
 /// Local cache for remote filesystem files, represented as a set of non-overlapping non-empty file segments.
 /// Different caching algorithms are implemented using IFileCachePriority.
 class FileCache : private boost::noncopyable
 {
-
-friend class FileSegment;
-friend class IFileCachePriority;
-friend struct FileSegmentsHolder;
-friend class FileSegmentRangeWriter;
-
-struct QueryContext;
-using QueryContextPtr = std::shared_ptr<QueryContext>;
-
 public:
     using Key = DB::FileCacheKey;
+    using QueryLimit = DB::FileCacheQueryLimit;
+    using Priority = IFileCachePriority;
+    using PriorityEntry = IFileCachePriority::Entry;
+    using PriorityIterator = IFileCachePriority::Iterator;
+    using PriorityIterationResult = IFileCachePriority::IterationResult;
 
     explicit FileCache(const FileCacheSettings & settings);
 
-    ~FileCache() = default;
+    ~FileCache();
 
     void initialize();
 
-    const String & getBasePath() const { return cache_base_path; }
+    const String & getBasePath() const;
+
+    static Key createKeyForPath(const String & path);
+
+    String getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const;
+
+    String getPathInLocalCache(const Key & key) const;
 
     /**
      * Given an `offset` and `size` representing [offset, offset + size) bytes interval,
@@ -58,8 +67,7 @@ public:
      * As long as pointers to returned file segments are held
      * it is guaranteed that these file segments are not removed from cache.
      */
-    FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
-    FileSegmentsHolder set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
+    FileSegmentsHolderPtr getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
 
     /**
      * Segments in returned list are ordered in ascending order and represent a full contiguous
@@ -70,53 +78,40 @@ public:
      * with the destruction of the holder, while in getOrSet() EMPTY file segments can eventually change
      * it's state (and become DOWNLOADED).
      */
-    FileSegmentsHolder get(const Key & key, size_t offset, size_t size);
+    FileSegmentsHolderPtr get(const Key & key, size_t offset, size_t size);
+
+    FileSegmentsHolderPtr set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
 
     /// Remove files by `key`. Removes files which might be used at the moment.
-    void removeIfExists(const Key & key);
+    void removeKeyIfExists(const Key & key);
 
     /// Remove files by `key`. Will not remove files which are used at the moment.
-    void removeIfReleasable();
-
-    static Key hash(const String & path);
-
-    String getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const;
-
-    String getPathInLocalCache(const Key & key) const;
+    void removeAllReleasable();
 
     std::vector<String> tryGetCachePaths(const Key & key);
 
-    size_t capacity() const { return max_size; }
-
     size_t getUsedCacheSize() const;
 
     size_t getFileSegmentsNum() const;
 
-    static bool isReadOnly();
+    size_t getMaxFileSegmentSize() const { return max_file_segment_size; }
 
-    /**
-     * Create a file segment of exactly requested size with EMPTY state.
-     * Throw exception if requested size exceeds max allowed file segment size.
-     * This method is for protected usage: file segment range writer uses it
-     * to dynamically allocate file segments.
-     */
-    FileSegmentPtr createFileSegmentForDownload(
-         const Key & key,
-         size_t offset,
-         size_t size,
-         const CreateFileSegmentSettings & create_settings,
-         std::lock_guard<std::mutex> & cache_lock);
+    bool tryReserve(FileSegment & file_segment, size_t size);
 
-    FileSegments getSnapshot() const;
+    FileSegmentsHolderPtr getSnapshot();
 
-    /// For debug.
-    String dumpStructure(const Key & key);
+    FileSegmentsHolderPtr getSnapshot(const Key & key);
 
-    /// Save a query context information, and adopt different cache policies
-    /// for different queries through the context cache layer.
+    FileSegmentsHolderPtr dumpQueue();
+
+    void cleanup();
+
+    void deactivateBackgroundOperations();
+
+    /// For per query cache limit.
     struct QueryContextHolder : private boost::noncopyable
     {
-        QueryContextHolder(const String & query_id_, FileCache * cache_, QueryContextPtr context_);
+        QueryContextHolder(const String & query_id_, FileCache * cache_, QueryLimit::QueryContextPtr context_);
 
         QueryContextHolder() = default;
 
@@ -124,198 +119,95 @@ public:
 
         String query_id;
         FileCache * cache = nullptr;
-        QueryContextPtr context;
+        QueryLimit::QueryContextPtr context;
     };
+    using QueryContextHolderPtr = std::unique_ptr<QueryContextHolder>;
+    QueryContextHolderPtr getQueryContextHolder(const String & query_id, const ReadSettings & settings);
 
-    QueryContextHolder getQueryContextHolder(const String & query_id, const ReadSettings & settings);
+    CacheGuard::Lock lockCache() { return cache_guard.lock(); }
 
 private:
-    String cache_base_path;
+    using KeyAndOffset = FileCacheKeyAndOffset;
 
-    const size_t max_size;
-    const size_t max_element_size;
     const size_t max_file_segment_size;
-
     const bool allow_persistent_files;
-    const size_t enable_cache_hits_threshold;
-    const bool enable_filesystem_query_cache_limit;
+    const size_t bypass_cache_threshold = 0;
+    const size_t delayed_cleanup_interval_ms;
 
-    const bool enable_bypass_cache_with_threashold;
-    const size_t bypass_cache_threashold;
-
-    mutable std::mutex mutex;
     Poco::Logger * log;
 
-    bool is_initialized = false;
-    std::exception_ptr initialization_exception;
+    std::exception_ptr init_exception;
+    std::atomic<bool> is_initialized = false;
+    mutable std::mutex init_mutex;
 
-    void assertInitialized(std::lock_guard<std::mutex> & cache_lock) const;
+    CacheMetadata metadata;
 
-    bool tryReserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
+    FileCachePriorityPtr main_priority;
+    mutable CacheGuard cache_guard;
 
-    void remove(
-        Key key,
-        size_t offset,
-        std::lock_guard<std::mutex> & cache_lock,
-        std::unique_lock<std::mutex> & segment_lock);
-
-    void remove(
-        FileSegmentPtr file_segment,
-        std::lock_guard<std::mutex> & cache_lock);
-
-    bool isLastFileSegmentHolder(
-        const Key & key,
-        size_t offset,
-        std::lock_guard<std::mutex> & cache_lock,
-        std::unique_lock<std::mutex> & segment_lock);
-
-    void reduceSizeToDownloaded(
-        const Key & key,
-        size_t offset,
-        std::lock_guard<std::mutex> & cache_lock,
-        std::unique_lock<std::mutex> & segment_lock);
-
-    struct FileSegmentCell : private boost::noncopyable
+    struct HitsCountStash
     {
-        FileSegmentPtr file_segment;
-
-        /// Iterator is put here on first reservation attempt, if successful.
-        IFileCachePriority::WriteIterator queue_iterator;
-
-        /// Pointer to file segment is always hold by the cache itself.
-        /// Apart from pointer in cache, it can be hold by cache users, when they call
-        /// getorSet(), but cache users always hold it via FileSegmentsHolder.
-        bool releasable() const { return file_segment.unique(); }
-
-        size_t size() const { return file_segment->reserved_size; }
-
-        FileSegmentCell(FileSegmentPtr file_segment_, FileCache * cache, std::lock_guard<std::mutex> & cache_lock);
-
-        FileSegmentCell(FileSegmentCell && other) noexcept
-            : file_segment(std::move(other.file_segment)), queue_iterator(std::move(other.queue_iterator)) {}
-    };
-
-    using AccessKeyAndOffset = std::pair<Key, size_t>;
-    struct KeyAndOffsetHash
-    {
-        std::size_t operator()(const AccessKeyAndOffset & key) const
+        HitsCountStash(size_t hits_threashold_, size_t queue_size_)
+            : hits_threshold(hits_threashold_), queue(std::make_unique<LRUFileCachePriority>(0, queue_size_))
         {
-            return std::hash<UInt128>()(key.first.key) ^ std::hash<UInt64>()(key.second);
+            if (!queue_size_)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Queue size for hits queue must be non-zero");
         }
+
+        const size_t hits_threshold;
+        FileCachePriorityPtr queue;
+        using Records = std::unordered_map<KeyAndOffset, PriorityIterator, FileCacheKeyAndOffsetHash>;
+        Records records;
     };
 
-    using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
-    using CachedFiles = std::unordered_map<Key, FileSegmentsByOffset>;
-    using FileCacheRecords = std::unordered_map<AccessKeyAndOffset, IFileCachePriority::WriteIterator, KeyAndOffsetHash>;
+    /**
+     * A HitsCountStash allows to cache certain data only after it reached
+     * a certain hit rate, e.g. if hit rate it 5, then data is cached on 6th cache hit.
+     */
+    mutable std::unique_ptr<HitsCountStash> stash;
+    /**
+     * A QueryLimit allows to control cache write limit per query.
+     * E.g. if a query needs n bytes from cache, but it has only k bytes, where 0 <= k <= n
+     * then allowed loaded cache size is std::min(n - k, max_query_cache_size).
+     */
+    FileCacheQueryLimitPtr query_limit;
+    /**
+     * A background cleanup task.
+     * Clears removed cache entries from metadata.
+     */
+    BackgroundSchedulePool::TaskHolder cleanup_task;
 
-    CachedFiles files;
-    std::unique_ptr<IFileCachePriority> main_priority;
+    void assertInitialized() const;
 
-    FileCacheRecords stash_records;
-    std::unique_ptr<IFileCachePriority> stash_priority;
-    size_t max_stash_element_size;
+    void assertCacheCorrectness();
 
-    void loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock);
+    void loadMetadata();
 
-    FileSegments getImpl(const Key & key, const FileSegment::Range & range, std::lock_guard<std::mutex> & cache_lock);
+    FileSegments getImpl(const LockedKey & locked_key, const FileSegment::Range & range) const;
 
-    FileSegmentCell * getCell(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock);
-
-    /// Returns non-owned pointer to the cell stored in the `files` map.
-    /// Doesn't reserve any space.
-    FileSegmentCell * addCell(
-        const Key & key,
+    FileSegments splitRangeIntoFileSegments(
+        LockedKey & locked_key,
         size_t offset,
         size_t size,
         FileSegment::State state,
-        const CreateFileSegmentSettings & create_settings,
-        std::lock_guard<std::mutex> & cache_lock);
-
-    static void useCell(const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & cache_lock);
-
-    bool tryReserveForMainList(
-        const Key & key,
-        size_t offset,
-        size_t size,
-        QueryContextPtr query_context,
-        std::lock_guard<std::mutex> & cache_lock);
-
-    FileSegments splitRangeIntoCells(
-        const Key & key,
-        size_t offset,
-        size_t size,
-        FileSegment::State state,
-        const CreateFileSegmentSettings & create_settings,
-        std::lock_guard<std::mutex> & cache_lock);
-
-    String dumpStructureUnlocked(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
+        const CreateFileSegmentSettings & create_settings);
 
     void fillHolesWithEmptyFileSegments(
+        LockedKey & locked_key,
         FileSegments & file_segments,
-        const Key & key,
         const FileSegment::Range & range,
         bool fill_with_detached_file_segments,
-        const CreateFileSegmentSettings & settings,
-        std::lock_guard<std::mutex> & cache_lock);
+        const CreateFileSegmentSettings & settings);
 
-    size_t getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
+    KeyMetadata::iterator addFileSegment(
+        LockedKey & locked_key,
+        size_t offset,
+        size_t size,
+        FileSegment::State state,
+        const CreateFileSegmentSettings & create_settings,
+        const CacheGuard::Lock *);
 
-    size_t getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
-
-    size_t getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
-
-    void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
-
-    void removeKeyDirectoryIfExists(const Key & key, std::lock_guard<std::mutex> & cache_lock) const;
-
-    /// Used to track and control the cache access of each query.
-    /// Through it, we can realize the processing of different queries by the cache layer.
-    struct QueryContext
-    {
-        FileCacheRecords records;
-        FileCachePriorityPtr priority;
-
-        size_t cache_size = 0;
-        size_t max_cache_size;
-
-        bool skip_download_if_exceeds_query_cache;
-
-        QueryContext(size_t max_cache_size_, bool skip_download_if_exceeds_query_cache_)
-            : max_cache_size(max_cache_size_)
-            , skip_download_if_exceeds_query_cache(skip_download_if_exceeds_query_cache_) {}
-
-        size_t getMaxCacheSize() const { return max_cache_size; }
-
-        size_t getCacheSize() const { return cache_size; }
-
-        FileCachePriorityPtr getPriority() const { return priority; }
-
-        bool isSkipDownloadIfExceed() const { return skip_download_if_exceeds_query_cache; }
-
-        void remove(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
-
-        void reserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock);
-
-        void use(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock);
-    };
-
-    using QueryContextMap = std::unordered_map<String, QueryContextPtr>;
-    QueryContextMap query_map;
-
-    QueryContextPtr getCurrentQueryContext(std::lock_guard<std::mutex> & cache_lock);
-
-    QueryContextPtr getQueryContext(const String & query_id, std::lock_guard<std::mutex> & cache_lock);
-
-    void removeQueryContext(const String & query_id);
-
-    QueryContextPtr getOrSetQueryContext(const String & query_id, const ReadSettings & settings, std::lock_guard<std::mutex> &);
-
-public:
-    void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
-
-    void assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock);
-
-    void assertPriorityCorrectness(std::lock_guard<std::mutex> & cache_lock);
+    void cleanupThreadFunc();
 };
 
 }
diff --git a/src/Interpreters/Cache/FileCacheKey.cpp b/src/Interpreters/Cache/FileCacheKey.cpp
new file mode 100644
index 00000000000..f97cdc058aa
--- /dev/null
+++ b/src/Interpreters/Cache/FileCacheKey.cpp
@@ -0,0 +1,31 @@
+#include "FileCacheKey.h"
+
+#include <base/hex.h>
+#include <Common/SipHash.h>
+#include <Core/UUID.h>
+
+
+namespace DB
+{
+
+FileCacheKey::FileCacheKey(const std::string & path)
+    : key(sipHash128(path.data(), path.size()))
+{
+}
+
+FileCacheKey::FileCacheKey(const UInt128 & key_)
+    : key(key_)
+{
+}
+
+std::string FileCacheKey::toString() const
+{
+    return getHexUIntLowercase(key);
+}
+
+FileCacheKey FileCacheKey::random()
+{
+    return FileCacheKey(UUIDHelpers::generateV4().toUnderType());
+}
+
+}
diff --git a/src/Interpreters/Cache/FileCacheKey.h b/src/Interpreters/Cache/FileCacheKey.h
index 67e1466e2d4..bab8359732c 100644
--- a/src/Interpreters/Cache/FileCacheKey.h
+++ b/src/Interpreters/Cache/FileCacheKey.h
@@ -1,26 +1,37 @@
 #pragma once
 #include <Core/Types.h>
-#include <base/hex.h>
-#include <Core/UUID.h>
+#include <fmt/format.h>
 
 namespace DB
 {
 
 struct FileCacheKey
 {
-    UInt128 key;
+    using KeyHash = UInt128;
+    KeyHash key;
 
-    String toString() const { return getHexUIntLowercase(key); }
+    std::string toString() const;
 
     FileCacheKey() = default;
 
-    explicit FileCacheKey(const UInt128 & key_) : key(key_) { }
+    explicit FileCacheKey(const std::string & path);
 
-    static FileCacheKey random() { return FileCacheKey(UUIDHelpers::generateV4().toUnderType()); }
+    explicit FileCacheKey(const UInt128 & key_);
+
+    static FileCacheKey random();
 
     bool operator==(const FileCacheKey & other) const { return key == other.key; }
 };
 
+using FileCacheKeyAndOffset = std::pair<FileCacheKey, size_t>;
+struct FileCacheKeyAndOffsetHash
+{
+    std::size_t operator()(const FileCacheKeyAndOffset & key) const
+    {
+        return std::hash<UInt128>()(key.first.key) ^ std::hash<UInt64>()(key.second);
+    }
+};
+
 }
 
 namespace std
@@ -32,3 +43,13 @@ struct hash<DB::FileCacheKey>
 };
 
 }
+
+template <>
+struct fmt::formatter<DB::FileCacheKey> : fmt::formatter<std::string>
+{
+    template <typename FormatCtx>
+    auto format(const DB::FileCacheKey & key, FormatCtx & ctx) const
+    {
+        return fmt::formatter<std::string>::format(key.toString(), ctx);
+    }
+};
diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp
index 9d5282047aa..1737defd316 100644
--- a/src/Interpreters/Cache/FileCacheSettings.cpp
+++ b/src/Interpreters/Cache/FileCacheSettings.cpp
@@ -30,24 +30,26 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
     if (path.empty())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Disk Cache requires non-empty `path` field (cache base path) in config");
 
-    max_elements = config.getUInt64(config_prefix + ".max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS);
+    max_elements = config.getUInt64(config_prefix + ".max_elements", FILECACHE_DEFAULT_MAX_ELEMENTS);
     if (config.has(config_prefix + ".max_file_segment_size"))
         max_file_segment_size = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".max_file_segment_size"));
     else
-        max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
+        max_file_segment_size = FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
 
     cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false);
     enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false);
-    enable_cache_hits_threshold = config.getUInt64(config_prefix + ".enable_cache_hits_threshold", REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD);
+    cache_hits_threshold = config.getUInt64(config_prefix + ".cache_hits_threshold", FILECACHE_DEFAULT_HITS_THRESHOLD);
 
     enable_bypass_cache_with_threashold = config.getUInt64(config_prefix + ".enable_bypass_cache_with_threashold", false);
 
     if (config.has(config_prefix + ".bypass_cache_threashold"))
         bypass_cache_threashold = parseWithSizeSuffix<uint64_t>(config.getString(config_prefix + ".bypass_cache_threashold"));
     else
-         bypass_cache_threashold = REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD;
+         bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD;
 
     do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", false);
+
+    delayed_cleanup_interval_ms = config.getUInt64(config_prefix + ".delayed_cleanup_interval_ms", FILECACHE_DELAYED_CLEANUP_INTERVAL_MS);
 }
 
 }
diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h
index 689c3ef70fb..e316cc6d6fe 100644
--- a/src/Interpreters/Cache/FileCacheSettings.h
+++ b/src/Interpreters/Cache/FileCacheSettings.h
@@ -13,18 +13,19 @@ struct FileCacheSettings
     std::string base_path;
 
     size_t max_size = 0;
-    size_t max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS;
-    size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
+    size_t max_elements = FILECACHE_DEFAULT_MAX_ELEMENTS;
+    size_t max_file_segment_size = FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
 
     bool cache_on_write_operations = false;
 
-    size_t enable_cache_hits_threshold = REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD;
+    size_t cache_hits_threshold = FILECACHE_DEFAULT_HITS_THRESHOLD;
     bool enable_filesystem_query_cache_limit = false;
 
     bool do_not_evict_index_and_mark_files = true;
 
     bool enable_bypass_cache_with_threashold = false;
-    size_t bypass_cache_threashold = REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD;
+    size_t bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD;
+    size_t delayed_cleanup_interval_ms = FILECACHE_DELAYED_CLEANUP_INTERVAL_MS;
 
     void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
 };
diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h
index 72dc1144fb9..afd8d86074e 100644
--- a/src/Interpreters/Cache/FileCache_fwd.h
+++ b/src/Interpreters/Cache/FileCache_fwd.h
@@ -4,10 +4,11 @@
 namespace DB
 {
 
-static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024;
-static constexpr int REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024;
-static constexpr int REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD = 0;
-static constexpr size_t REMOTE_FS_OBJECTS_CACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;;
+static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024;
+static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024;
+static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0;
+static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;
+static constexpr size_t FILECACHE_DELAYED_CLEANUP_INTERVAL_MS = 1000 * 60; /// 1 min
 
 class FileCache;
 using FileCachePtr = std::shared_ptr<FileCache>;
diff --git a/src/Interpreters/Cache/FileCache_fwd_internal.h b/src/Interpreters/Cache/FileCache_fwd_internal.h
new file mode 100644
index 00000000000..5ded018a674
--- /dev/null
+++ b/src/Interpreters/Cache/FileCache_fwd_internal.h
@@ -0,0 +1,26 @@
+#pragma once
+#include <list>
+
+namespace DB
+{
+
+class FileCache;
+using FileCachePtr = std::shared_ptr<FileCache>;
+
+class IFileCachePriority;
+using FileCachePriorityPtr = std::unique_ptr<IFileCachePriority>;
+
+class FileSegment;
+using FileSegmentPtr = std::shared_ptr<FileSegment>;
+using FileSegments = std::list<FileSegmentPtr>;
+
+struct FileSegmentMetadata;
+using FileSegmentMetadataPtr = std::shared_ptr<FileSegmentMetadata>;
+
+struct LockedKey;
+using LockedKeyPtr = std::shared_ptr<LockedKey>;
+
+struct KeyMetadata;
+using KeyMetadataPtr = std::shared_ptr<KeyMetadata>;
+
+}
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index 4a11ade5b42..8ff6c782fe5 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -11,10 +11,7 @@
 
 #include <magic_enum.hpp>
 
-namespace CurrentMetrics
-{
-extern const Metric CacheDetachedFileSegments;
-}
+namespace fs = std::filesystem;
 
 namespace DB
 {
@@ -30,23 +27,27 @@ String toString(FileSegmentKind kind)
 }
 
 FileSegment::FileSegment(
+        const Key & key_,
         size_t offset_,
         size_t size_,
-        const Key & key_,
-        FileCache * cache_,
         State download_state_,
-        const CreateFileSegmentSettings & settings)
-    : segment_range(offset_, offset_ + size_ - 1)
+        const CreateFileSegmentSettings & settings,
+        FileCache * cache_,
+        std::weak_ptr<KeyMetadata> key_metadata_,
+        Priority::Iterator queue_iterator_)
+    : file_key(key_)
+    , segment_range(offset_, offset_ + size_ - 1)
+    , segment_kind(settings.kind)
+    , is_unbound(settings.unbounded)
     , download_state(download_state_)
-    , file_key(key_)
+    , key_metadata(key_metadata_)
+    , queue_iterator(queue_iterator_)
     , cache(cache_)
-#ifndef NDEBUG
-    , log(&Poco::Logger::get(fmt::format("FileSegment({}) : {}", getHexUIntLowercase(key_), range().toString())))
+#ifdef ABORT_ON_LOGICAL_ERROR
+    , log(&Poco::Logger::get(fmt::format("FileSegment({}) : {}", key_.toString(), range().toString())))
 #else
     , log(&Poco::Logger::get("FileSegment"))
 #endif
-    , segment_kind(settings.kind)
-    , is_unbound(settings.unbounded)
 {
     /// On creation, file segment state can be EMPTY, DOWNLOADED, DOWNLOADING.
     switch (download_state)
@@ -55,18 +56,20 @@ FileSegment::FileSegment(
         /// someone will _potentially_ want to download it (after calling getOrSetDownloader()).
         case (State::EMPTY):
         {
+            chassert(key_metadata.lock());
             break;
         }
         /// DOWNLOADED is used either on initial cache metadata load into memory on server startup
-        /// or on reduceSizeToDownloaded() -- when file segment object is updated.
+        /// or on shrinkFileSegmentToDownloadedSize() -- when file segment object is updated.
         case (State::DOWNLOADED):
         {
             reserved_size = downloaded_size = size_;
-            is_downloaded = true;
-            chassert(std::filesystem::file_size(getPathInLocalCache()) == size_);
+            chassert(fs::file_size(getPathInLocalCache()) == size_);
+            chassert(queue_iterator);
+            chassert(key_metadata.lock());
             break;
         }
-        case (State::SKIP_CACHE):
+        case (State::DETACHED):
         {
             break;
         }
@@ -74,82 +77,87 @@ FileSegment::FileSegment(
         {
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
-                "Can only create cell with either EMPTY, DOWNLOADED or SKIP_CACHE state");
+                "Can only create file segment with either EMPTY, DOWNLOADED or DETACHED state");
         }
     }
 }
 
-String FileSegment::getPathInLocalCache() const
-{
-    chassert(cache);
-    return cache->getPathInLocalCache(key(), offset(), segment_kind);
-}
-
 FileSegment::State FileSegment::state() const
 {
-    std::unique_lock segment_lock(mutex);
+    auto lock = segment_guard.lock();
     return download_state;
 }
 
-void FileSegment::setDownloadState(State state)
+String FileSegment::getPathInLocalCache() const
 {
+    return getKeyMetadata()->getFileSegmentPath(*this);
+}
+
+void FileSegment::setDownloadState(State state, const FileSegmentGuard::Lock & lock)
+{
+    if (isCompleted(false) && state != State::DETACHED)
+    {
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Updating state to {} of file segment is not allowed, because it is already completed ({})",
+            stateToString(state), getInfoForLogUnlocked(lock));
+    }
+
     LOG_TEST(log, "Updated state from {} to {}", stateToString(download_state), stateToString(state));
     download_state = state;
 }
 
-size_t FileSegment::getFirstNonDownloadedOffset() const
+size_t FileSegment::getReservedSize() const
 {
-    std::unique_lock segment_lock(mutex);
-    return getFirstNonDownloadedOffsetUnlocked(segment_lock);
+    auto lock = segment_guard.lock();
+    return reserved_size;
 }
 
-size_t FileSegment::getFirstNonDownloadedOffsetUnlocked(std::unique_lock<std::mutex> & segment_lock) const
+FileSegment::Priority::Iterator FileSegment::getQueueIterator() const
 {
-    return range().left + getDownloadedSizeUnlocked(segment_lock);
+    auto lock = segment_guard.lock();
+    return queue_iterator;
 }
 
-size_t FileSegment::getCurrentWriteOffset() const
+void FileSegment::setQueueIterator(Priority::Iterator iterator)
 {
-    std::unique_lock segment_lock(mutex);
-    return getCurrentWriteOffsetUnlocked(segment_lock);
+    auto lock = segment_guard.lock();
+    if (queue_iterator)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Queue iterator cannot be set twice");
+    queue_iterator = iterator;
 }
 
-size_t FileSegment::getCurrentWriteOffsetUnlocked(std::unique_lock<std::mutex> & segment_lock) const
+size_t FileSegment::getFirstNonDownloadedOffset(bool sync) const
 {
-    return getFirstNonDownloadedOffsetUnlocked(segment_lock);
+    return range().left + getDownloadedSize(sync);
 }
 
-size_t FileSegment::getDownloadedSize() const
+size_t FileSegment::getCurrentWriteOffset(bool sync) const
 {
-    std::unique_lock segment_lock(mutex);
-    return getDownloadedSizeUnlocked(segment_lock);
+    return getFirstNonDownloadedOffset(sync);
 }
 
-size_t FileSegment::getDownloadedSizeUnlocked(std::unique_lock<std::mutex> & /* segment_lock */) const
+size_t FileSegment::getDownloadedSize(bool sync) const
 {
-    if (download_state == State::DOWNLOADED)
+    if (sync)
+    {
+        std::lock_guard lock(download_mutex);
         return downloaded_size;
-
-    std::unique_lock download_lock(download_mutex);
+    }
     return downloaded_size;
 }
 
 void FileSegment::setDownloadedSize(size_t delta)
 {
-    std::unique_lock download_lock(download_mutex);
-    setDownloadedSizeUnlocked(download_lock, delta);
-}
-
-void FileSegment::setDownloadedSizeUnlocked(std::unique_lock<std::mutex> & /* download_lock */, size_t delta)
-{
+    auto lock = segment_guard.lock();
     downloaded_size += delta;
     assert(downloaded_size == std::filesystem::file_size(getPathInLocalCache()));
 }
 
 bool FileSegment::isDownloaded() const
 {
-    std::lock_guard segment_lock(mutex);
-    return is_downloaded;
+    auto lock = segment_guard.lock();
+    return download_state == State::DOWNLOADED;
 }
 
 String FileSegment::getCallerId()
@@ -164,70 +172,74 @@ String FileSegment::getCallerId()
 
 String FileSegment::getDownloader() const
 {
-    std::unique_lock segment_lock(mutex);
-    return getDownloaderUnlocked(segment_lock);
+    auto lock = segment_guard.lock();
+    return getDownloaderUnlocked(lock);
 }
 
-String FileSegment::getDownloaderUnlocked(std::unique_lock<std::mutex> & /* segment_lock */) const
+String FileSegment::getDownloaderUnlocked(const FileSegmentGuard::Lock &) const
 {
     return downloader_id;
 }
 
 String FileSegment::getOrSetDownloader()
 {
-    std::unique_lock segment_lock(mutex);
+    auto lock = segment_guard.lock();
 
-    assertNotDetachedUnlocked(segment_lock);
+    assertNotDetachedUnlocked(lock);
 
-    auto current_downloader = getDownloaderUnlocked(segment_lock);
+    auto current_downloader = getDownloaderUnlocked(lock);
 
     if (current_downloader.empty())
     {
+        const auto caller_id = getCallerId();
         bool allow_new_downloader = download_state == State::EMPTY || download_state == State::PARTIALLY_DOWNLOADED;
         if (!allow_new_downloader)
             return "notAllowed:" + stateToString(download_state);
 
-        current_downloader = downloader_id = getCallerId();
-        setDownloadState(State::DOWNLOADING);
+        current_downloader = downloader_id = caller_id;
+        setDownloadState(State::DOWNLOADING, lock);
+        chassert(key_metadata.lock());
     }
 
     return current_downloader;
 }
 
-void FileSegment::resetDownloadingStateUnlocked([[maybe_unused]] std::unique_lock<std::mutex> & segment_lock)
+void FileSegment::resetDownloadingStateUnlocked(const FileSegmentGuard::Lock & lock)
 {
-    assert(isDownloaderUnlocked(segment_lock));
+    assert(isDownloaderUnlocked(lock));
     assert(download_state == State::DOWNLOADING);
 
-    size_t current_downloaded_size = getDownloadedSizeUnlocked(segment_lock);
+    size_t current_downloaded_size = getDownloadedSize(true);
     /// range().size() can equal 0 in case of write-though cache.
     if (!is_unbound && current_downloaded_size != 0 && current_downloaded_size == range().size())
-        setDownloadedUnlocked(segment_lock);
+        setDownloadedUnlocked(lock);
     else
-        setDownloadState(State::PARTIALLY_DOWNLOADED);
+        setDownloadState(State::PARTIALLY_DOWNLOADED, lock);
 }
 
 void FileSegment::resetDownloader()
 {
-    std::unique_lock segment_lock(mutex);
+    auto lock = segment_guard.lock();
 
-    assertNotDetachedUnlocked(segment_lock);
-    assertIsDownloaderUnlocked("resetDownloader", segment_lock);
+    SCOPE_EXIT({ cv.notify_all(); });
 
-    resetDownloadingStateUnlocked(segment_lock);
-    resetDownloaderUnlocked(segment_lock);
+    assertNotDetachedUnlocked(lock);
+    assertIsDownloaderUnlocked("resetDownloader", lock);
+
+    resetDownloadingStateUnlocked(lock);
+    resetDownloaderUnlocked(lock);
 }
 
-void FileSegment::resetDownloaderUnlocked(std::unique_lock<std::mutex> & /* segment_lock */)
+void FileSegment::resetDownloaderUnlocked(const FileSegmentGuard::Lock &)
 {
     LOG_TEST(log, "Resetting downloader from {}", downloader_id);
     downloader_id.clear();
 }
 
-void FileSegment::assertIsDownloaderUnlocked(const std::string & operation, std::unique_lock<std::mutex> & segment_lock) const
+void FileSegment::assertIsDownloaderUnlocked(const std::string & operation, const FileSegmentGuard::Lock & lock) const
 {
     auto caller = getCallerId();
-    auto current_downloader = getDownloaderUnlocked(segment_lock);
+    auto current_downloader = getDownloaderUnlocked(lock);
     LOG_TEST(log, "Downloader id: {}, caller id: {}, operation: {}", current_downloader, caller, operation);
 
     if (caller != current_downloader)
@@ -242,41 +254,53 @@ void FileSegment::assertIsDownloaderUnlocked(const std::string & operation, std:
 
 bool FileSegment::isDownloader() const
 {
-    std::unique_lock segment_lock(mutex);
-    return isDownloaderUnlocked(segment_lock);
+    auto lock = segment_guard.lock();
+    return isDownloaderUnlocked(lock);
 }
 
-bool FileSegment::isDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock) const
+bool FileSegment::isDownloaderUnlocked(const FileSegmentGuard::Lock & lock) const
 {
-    return getCallerId() == getDownloaderUnlocked(segment_lock);
+    return getCallerId() == getDownloaderUnlocked(lock);
 }
 
 FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader()
 {
-    std::unique_lock segment_lock(mutex);
-    assertIsDownloaderUnlocked("getRemoteFileReader", segment_lock);
+    auto lock = segment_guard.lock();
+    assertIsDownloaderUnlocked("getRemoteFileReader", lock);
     return remote_file_reader;
 }
 
+void FileSegment::resetRemoteFileReader()
+{
+    auto lock = segment_guard.lock();
+    assertIsDownloaderUnlocked("resetRemoteFileReader", lock);
+    remote_file_reader.reset();
+}
+
 FileSegment::RemoteFileReaderPtr FileSegment::extractRemoteFileReader()
 {
-    std::lock_guard cache_lock(cache->mutex);
-    std::unique_lock segment_lock(mutex);
-
-    if (!is_detached)
+    auto locked_key = lockKeyMetadata(false);
+    if (!locked_key)
     {
-        bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
-        if (!downloader_id.empty() || !is_last_holder)
-            return nullptr;
+        assert(isDetached());
+        return std::move(remote_file_reader);
     }
 
+    auto segment_lock = segment_guard.lock();
+
+    assert(download_state != State::DETACHED);
+
+    bool is_last_holder = locked_key->isLastOwnerOfFileSegment(offset());
+    if (!downloader_id.empty() || !is_last_holder)
+        return nullptr;
+
     return std::move(remote_file_reader);
 }
 
 void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_)
 {
-    std::unique_lock segment_lock(mutex);
-    assertIsDownloaderUnlocked("setRemoteFileReader", segment_lock);
+    auto lock = segment_guard.lock();
+    assertIsDownloaderUnlocked("setRemoteFileReader", lock);
 
     if (remote_file_reader)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Remote file reader already exists");
@@ -284,54 +308,30 @@ void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_)
     remote_file_reader = remote_file_reader_;
 }
 
-void FileSegment::resetRemoteFileReader()
-{
-    std::unique_lock segment_lock(mutex);
-    assertIsDownloaderUnlocked("resetRemoteFileReader", segment_lock);
-
-    remote_file_reader.reset();
-}
-
-std::unique_ptr<WriteBufferFromFile> FileSegment::detachWriter()
-{
-    std::unique_lock segment_lock(mutex);
-
-    if (!cache_writer)
-    {
-        if (detached_writer)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Writer is already detached");
-
-        auto download_path = getPathInLocalCache();
-        cache_writer = std::make_unique<WriteBufferFromFile>(download_path);
-    }
-    detached_writer = true;
-    return std::move(cache_writer);
-}
-
 void FileSegment::write(const char * from, size_t size, size_t offset)
 {
     if (!size)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing zero size is not allowed");
 
     {
-        std::unique_lock segment_lock(mutex);
+        auto lock = segment_guard.lock();
 
-        assertIsDownloaderUnlocked("write", segment_lock);
-        assertNotDetachedUnlocked(segment_lock);
+        assertIsDownloaderUnlocked("write", lock);
+        assertNotDetachedUnlocked(lock);
 
         if (download_state != State::DOWNLOADING)
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
                 "Expected DOWNLOADING state, got {}", stateToString(download_state));
 
-        size_t first_non_downloaded_offset = getFirstNonDownloadedOffsetUnlocked(segment_lock);
+        size_t first_non_downloaded_offset = getFirstNonDownloadedOffset(false);
         if (offset != first_non_downloaded_offset)
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
                 "Attempt to write {} bytes to offset: {}, but current write offset is {}",
                 size, offset, first_non_downloaded_offset);
 
-        size_t current_downloaded_size = getDownloadedSizeUnlocked(segment_lock);
+        size_t current_downloaded_size = getDownloadedSize(false);
         chassert(reserved_size >= current_downloaded_size);
         size_t free_reserved_size = reserved_size - current_downloaded_size;
 
@@ -351,25 +351,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
                     "Cache writer was finalized (downloaded size: {}, state: {})",
                     current_downloaded_size, stateToString(download_state));
 
-            if (detached_writer)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer was detached");
-
-            auto download_path = getPathInLocalCache();
-
-            try
-            {
-                cache_writer = std::make_unique<WriteBufferFromFile>(download_path);
-            }
-            catch (Exception & e)
-            {
-                wrapWithCacheInfo(e, "while opening file in local cache", segment_lock);
-
-                setDownloadFailedUnlocked(segment_lock);
-
-                cv.notify_all();
-
-                throw;
-            }
+            cache_writer = std::make_unique<WriteBufferFromFile>(getPathInLocalCache());
         }
     }
 
@@ -377,7 +359,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
     {
         cache_writer->write(from, size);
 
-        std::unique_lock download_lock(download_mutex);
+        std::lock_guard lock(download_mutex);
 
         cache_writer->next();
 
@@ -385,32 +367,41 @@ void FileSegment::write(const char * from, size_t size, size_t offset)
 
         chassert(std::filesystem::file_size(getPathInLocalCache()) == downloaded_size);
     }
+    catch (ErrnoException & e)
+    {
+        auto lock = segment_guard.lock();
+        e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lock)));
+
+        int code = e.getErrno();
+        if (code == /* No space left on device */28 || code == /* Quota exceeded */122)
+        {
+            const auto file_size = fs::file_size(getPathInLocalCache());
+            chassert(downloaded_size <= file_size);
+            chassert(reserved_size >= file_size);
+            if (downloaded_size != file_size)
+                downloaded_size = file_size;
+        }
+
+        setDownloadFailedUnlocked(lock);
+        throw;
+
+    }
     catch (Exception & e)
     {
-        std::unique_lock segment_lock(mutex);
-
-        wrapWithCacheInfo(e, "while writing into cache", segment_lock);
-
-        setDownloadFailedUnlocked(segment_lock);
-
-        cv.notify_all();
-
+        auto lock = segment_guard.lock();
+        e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lock)));
+        setDownloadFailedUnlocked(lock);
         throw;
     }
 
-    chassert(getFirstNonDownloadedOffset() == offset + size);
+    chassert(getFirstNonDownloadedOffset(false) == offset + size);
 }
 
-FileSegment::State FileSegment::wait()
+FileSegment::State FileSegment::wait(size_t offset)
 {
-    std::unique_lock segment_lock(mutex);
+    auto lock = segment_guard.lock();
 
-    if (is_detached)
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Cache file segment is in detached state, operation not allowed");
-
-    if (downloader_id.empty())
+    if (downloader_id.empty() || offset < getCurrentWriteOffset(true))
         return download_state;
 
     if (download_state == State::EMPTY)
@@ -420,15 +411,46 @@ FileSegment::State FileSegment::wait()
     {
         LOG_TEST(log, "{} waiting on: {}, current downloader: {}", getCallerId(), range().toString(), downloader_id);
 
-        chassert(!getDownloaderUnlocked(segment_lock).empty());
-        chassert(!isDownloaderUnlocked(segment_lock));
+        chassert(!getDownloaderUnlocked(lock).empty());
+        chassert(!isDownloaderUnlocked(lock));
 
-        cv.wait_for(segment_lock, std::chrono::seconds(60));
+        [[maybe_unused]] const auto ok = cv.wait_for(lock, std::chrono::seconds(60), [&, this]()
+        {
+            return download_state != State::DOWNLOADING || offset < getCurrentWriteOffset(true);
+        });
+        /// chassert(ok);
     }
 
     return download_state;
 }
 
+KeyMetadataPtr FileSegment::getKeyMetadata() const
+{
+    auto metadata = tryGetKeyMetadata();
+    if (metadata)
+        return metadata;
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot lock key, key metadata is not set ({})", stateToString(download_state));
+}
+
+KeyMetadataPtr FileSegment::tryGetKeyMetadata() const
+{
+    auto metadata = key_metadata.lock();
+    if (metadata)
+        return metadata;
+    return nullptr;
+}
+
+LockedKeyPtr FileSegment::lockKeyMetadata(bool assert_exists) const
+{
+    if (assert_exists)
+        return getKeyMetadata()->lock();
+
+    auto metadata = tryGetKeyMetadata();
+    if (!metadata)
+        return nullptr;
+    return metadata->tryLock();
+}
+
 bool FileSegment::reserve(size_t size_to_reserve)
 {
     if (!size_to_reserve)
@@ -438,13 +460,12 @@ bool FileSegment::reserve(size_t size_to_reserve)
 
     bool is_file_segment_size_exceeded;
     {
-        std::unique_lock segment_lock(mutex);
+        auto lock = segment_guard.lock();
 
+        assertNotDetachedUnlocked(lock);
+        assertIsDownloaderUnlocked("reserve", lock);
 
-        assertNotDetachedUnlocked(segment_lock);
-        assertIsDownloaderUnlocked("reserve", segment_lock);
-
-        expected_downloaded_size = getDownloadedSizeUnlocked(segment_lock);
+        expected_downloaded_size = getDownloadedSize(false);
 
         is_file_segment_size_exceeded = expected_downloaded_size + size_to_reserve > range().size();
         if (is_file_segment_size_exceeded && !is_unbound)
@@ -467,32 +488,31 @@ bool FileSegment::reserve(size_t size_to_reserve)
     size_t already_reserved_size = reserved_size - expected_downloaded_size;
 
     bool reserved = already_reserved_size >= size_to_reserve;
+    if (reserved)
+        return reserved;
+
+    size_to_reserve = size_to_reserve - already_reserved_size;
+
+    /// This (resizable file segments) is allowed only for single threaded use of file segment.
+    /// Currently it is used only for temporary files through cache.
+    if (is_unbound && is_file_segment_size_exceeded)
+        segment_range.right = range().left + expected_downloaded_size + size_to_reserve;
+
+    reserved = cache->tryReserve(*this, size_to_reserve);
+
     if (!reserved)
-    {
-        std::lock_guard cache_lock(cache->mutex);
-        std::lock_guard segment_lock(mutex);
-
-        size_to_reserve = size_to_reserve - already_reserved_size;
-
-        if (is_unbound && is_file_segment_size_exceeded)
-        {
-            segment_range.right = range().left + expected_downloaded_size + size_to_reserve;
-        }
-
-        reserved = cache->tryReserve(key(), offset(), size_to_reserve, cache_lock);
-
-        if (reserved)
-            reserved_size += size_to_reserve;
-    }
+        setDownloadFailedUnlocked(segment_guard.lock());
 
     return reserved;
 }
 
-void FileSegment::setDownloadedUnlocked([[maybe_unused]] std::unique_lock<std::mutex> & segment_lock)
+void FileSegment::setDownloadedUnlocked(const FileSegmentGuard::Lock &)
 {
-    if (is_downloaded)
+    if (download_state == State::DOWNLOADED)
         return;
 
+    download_state = State::DOWNLOADED;
+
     if (cache_writer)
     {
         cache_writer->finalize();
@@ -500,19 +520,17 @@ void FileSegment::setDownloadedUnlocked([[maybe_unused]] std::unique_lock<std::m
         remote_file_reader.reset();
     }
 
-    download_state = State::DOWNLOADED;
-    is_downloaded = true;
-
-    assert(getDownloadedSizeUnlocked(segment_lock) > 0);
-    assert(std::filesystem::file_size(getPathInLocalCache()) > 0);
+    chassert(getDownloadedSize(false) > 0);
+    chassert(fs::file_size(getPathInLocalCache()) > 0);
 }
 
-void FileSegment::setDownloadFailedUnlocked(std::unique_lock<std::mutex> & segment_lock)
+void FileSegment::setDownloadFailedUnlocked(const FileSegmentGuard::Lock & lock)
 {
-    LOG_INFO(log, "Setting download as failed: {}", getInfoForLogUnlocked(segment_lock));
+    LOG_INFO(log, "Setting download as failed: {}", getInfoForLogUnlocked(lock));
 
-    setDownloadState(State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
-    resetDownloaderUnlocked(segment_lock);
+    SCOPE_EXIT({ cv.notify_all(); });
+
+    setDownloadState(State::PARTIALLY_DOWNLOADED_NO_CONTINUATION, lock);
 
     if (cache_writer)
     {
@@ -525,80 +543,60 @@ void FileSegment::setDownloadFailedUnlocked(std::unique_lock<std::mutex> & segme
 
 void FileSegment::completePartAndResetDownloader()
 {
-    std::unique_lock segment_lock(mutex);
-    completePartAndResetDownloaderUnlocked(segment_lock);
+    auto lock = segment_guard.lock();
+
+    SCOPE_EXIT({ cv.notify_all(); });
+
+    assertNotDetachedUnlocked(lock);
+    assertIsDownloaderUnlocked("completePartAndResetDownloader", lock);
+
+    chassert(download_state == State::DOWNLOADING
+             || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
+
+    if (download_state == State::DOWNLOADING)
+        resetDownloadingStateUnlocked(lock);
+
+    resetDownloaderUnlocked(lock);
+
+    LOG_TEST(log, "Complete batch. ({})", getInfoForLogUnlocked(lock));
 }
 
-void FileSegment::completePartAndResetDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock)
+void FileSegment::complete()
 {
-    assertNotDetachedUnlocked(segment_lock);
-    assertIsDownloaderUnlocked("completePartAndResetDownloader", segment_lock);
-
-    resetDownloadingStateUnlocked(segment_lock);
-    resetDownloaderUnlocked(segment_lock);
-
-    LOG_TEST(log, "Complete batch. ({})", getInfoForLogUnlocked(segment_lock));
-    cv.notify_all();
-}
-
-void FileSegment::completeWithState(State state)
-{
-    std::lock_guard cache_lock(cache->mutex);
-    std::unique_lock segment_lock(mutex);
-
-    assertNotDetachedUnlocked(segment_lock);
-    assertIsDownloaderUnlocked("complete", segment_lock);
-
-    if (state != State::DOWNLOADED
-        && state != State::PARTIALLY_DOWNLOADED
-        && state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)
-    {
-        cv.notify_all();
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Cannot complete file segment with state: {}", stateToString(state));
-    }
-
-    setDownloadState(state);
-    completeBasedOnCurrentState(cache_lock, segment_lock);
-}
-
-void FileSegment::completeWithoutState()
-{
-    std::lock_guard cache_lock(cache->mutex);
-    completeWithoutStateUnlocked(cache_lock);
-}
-
-void FileSegment::completeWithoutStateUnlocked(std::lock_guard<std::mutex> & cache_lock)
-{
-    std::unique_lock segment_lock(mutex);
-    completeBasedOnCurrentState(cache_lock, segment_lock);
-}
-
-void FileSegment::completeBasedOnCurrentState(std::lock_guard<std::mutex> & cache_lock, std::unique_lock<std::mutex> & segment_lock)
-{
-    if (is_detached)
+    if (isCompleted())
         return;
 
-    bool is_downloader = isDownloaderUnlocked(segment_lock);
-    bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
-    size_t current_downloaded_size = getDownloadedSizeUnlocked(segment_lock);
+    auto locked_key = lockKeyMetadata(false);
+    if (!locked_key)
+    {
+        /// If we failed to lock a key, it must be in detached state.
+        if (isDetached())
+            return;
+
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot complete file segment: {}", getInfoForLog());
+    }
+
+    auto segment_lock = segment_guard.lock();
+
+    if (isCompleted(false))
+        return;
+
+    const bool is_downloader = isDownloaderUnlocked(segment_lock);
+    const bool is_last_holder = locked_key->isLastOwnerOfFileSegment(offset());
+    const size_t current_downloaded_size = getDownloadedSize(true);
 
     SCOPE_EXIT({
         if (is_downloader)
-        {
             cv.notify_all();
-        }
     });
 
     LOG_TEST(
-        log,
-        "Complete based on current state (is_last_holder: {}, {})",
+        log, "Complete based on current state (is_last_holder: {}, {})",
         is_last_holder, getInfoForLogUnlocked(segment_lock));
 
     if (is_downloader)
     {
-        if (download_state == State::DOWNLOADING) /// != in case of completeWithState
+        if (download_state == State::DOWNLOADING)
             resetDownloadingStateUnlocked(segment_lock);
         resetDownloaderUnlocked(segment_lock);
     }
@@ -616,25 +614,18 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard<std::mutex> & cach
     if (segment_kind == FileSegmentKind::Temporary && is_last_holder)
     {
         LOG_TEST(log, "Removing temporary file segment: {}", getInfoForLogUnlocked(segment_lock));
-        detach(cache_lock, segment_lock);
-        setDownloadState(State::SKIP_CACHE);
-        cache->remove(key(), offset(), cache_lock, segment_lock);
+        detach(segment_lock, *locked_key);
+        setDownloadState(State::DETACHED, segment_lock);
+        locked_key->removeFileSegment(offset(), segment_lock);
         return;
     }
 
     switch (download_state)
     {
-        case State::SKIP_CACHE:
-        {
-            if (is_last_holder)
-                cache->remove(key(), offset(), cache_lock, segment_lock);
-            break;
-        }
         case State::DOWNLOADED:
         {
-            chassert(getDownloadedSizeUnlocked(segment_lock) == range().size());
-            chassert(getDownloadedSizeUnlocked(segment_lock) == std::filesystem::file_size(getPathInLocalCache()));
-            chassert(is_downloaded);
+            chassert(current_downloaded_size == range().size());
+            chassert(current_downloaded_size == fs::file_size(getPathInLocalCache()));
             chassert(!cache_writer);
             break;
         }
@@ -647,74 +638,71 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard<std::mutex> & cach
         case State::PARTIALLY_DOWNLOADED:
         case State::PARTIALLY_DOWNLOADED_NO_CONTINUATION:
         {
+            chassert(current_downloaded_size != range().size());
+
             if (is_last_holder)
             {
                 if (current_downloaded_size == 0)
                 {
-                    LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString());
-
-                    setDownloadState(State::SKIP_CACHE);
-                    cache->remove(key(), offset(), cache_lock, segment_lock);
+                    LOG_TEST(log, "Remove file segment {} (nothing downloaded)", range().toString());
+                    locked_key->removeFileSegment(offset(), segment_lock);
                 }
                 else
                 {
-                    LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size);
+                    LOG_TEST(log, "Resize file segment {} to downloaded: {}", range().toString(), current_downloaded_size);
 
                     /**
-                    * Only last holder of current file segment can resize the cell,
+                    * Only last holder of current file segment can resize the file segment,
                     * because there is an invariant that file segments returned to users
                     * in FileSegmentsHolder represent a contiguous range, so we can resize
                     * it only when nobody needs it.
                     */
-                    setDownloadState(State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
 
                     /// Resize this file segment by creating a copy file segment with DOWNLOADED state,
                     /// but current file segment should remain PARRTIALLY_DOWNLOADED_NO_CONTINUATION and with detached state,
                     /// because otherwise an invariant that getOrSet() returns a contiguous range of file segments will be broken
                     /// (this will be crucial for other file segment holder, not for current one).
-                    cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock);
+                    locked_key->shrinkFileSegmentToDownloadedSize(offset(), segment_lock);
+
+                    /// We mark current file segment with state DETACHED, even though the data is still in cache
+                    /// (but a separate file segment) because is_last_holder is satisfied, so it does not matter.
                 }
 
-                detachAssumeStateFinalized(segment_lock);
+                setDetachedState(segment_lock);
             }
             break;
         }
+        default:
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected state while completing file segment");
     }
 
-    is_completed = true;
     LOG_TEST(log, "Completed file segment: {}", getInfoForLogUnlocked(segment_lock));
 }
 
 String FileSegment::getInfoForLog() const
 {
-    std::unique_lock segment_lock(mutex);
-    return getInfoForLogUnlocked(segment_lock);
+    auto lock = segment_guard.lock();
+    return getInfoForLogUnlocked(lock);
 }
 
-String FileSegment::getInfoForLogUnlocked(std::unique_lock<std::mutex> & segment_lock) const
+String FileSegment::getInfoForLogUnlocked(const FileSegmentGuard::Lock &) const
 {
     WriteBufferFromOwnString info;
     info << "File segment: " << range().toString() << ", ";
     info << "key: " << key().toString() << ", ";
-    info << "state: " << download_state << ", ";
-    info << "downloaded size: " << getDownloadedSizeUnlocked(segment_lock) << ", ";
-    info << "reserved size: " << reserved_size << ", ";
+    info << "state: " << download_state.load() << ", ";
+    info << "downloaded size: " << getDownloadedSize(false) << ", ";
+    info << "reserved size: " << reserved_size.load() << ", ";
     info << "downloader id: " << (downloader_id.empty() ? "None" : downloader_id) << ", ";
-    info << "current write offset: " << getCurrentWriteOffsetUnlocked(segment_lock) << ", ";
-    info << "first non-downloaded offset: " << getFirstNonDownloadedOffsetUnlocked(segment_lock) << ", ";
+    info << "current write offset: " << getCurrentWriteOffset(false) << ", ";
+    info << "first non-downloaded offset: " << getFirstNonDownloadedOffset(false) << ", ";
     info << "caller id: " << getCallerId() << ", ";
-    info << "detached: " << is_detached << ", ";
     info << "kind: " << toString(segment_kind) << ", ";
     info << "unbound: " << is_unbound;
 
     return info.str();
 }
 
-void FileSegment::wrapWithCacheInfo(Exception & e, const String & message, std::unique_lock<std::mutex> & segment_lock) const
-{
-    e.addMessage(fmt::format("{}, current cache state: {}", message, getInfoForLogUnlocked(segment_lock)));
-}
-
 String FileSegment::stateToString(FileSegment::State state)
 {
     switch (state)
@@ -729,189 +717,167 @@ String FileSegment::stateToString(FileSegment::State state)
             return "PARTIALLY DOWNLOADED";
         case FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION:
             return "PARTIALLY DOWNLOADED NO CONTINUATION";
-        case FileSegment::State::SKIP_CACHE:
-            return "SKIP_CACHE";
+        case FileSegment::State::DETACHED:
+            return "DETACHED";
     }
     UNREACHABLE();
 }
 
-void FileSegment::assertCorrectness() const
+bool FileSegment::assertCorrectness() const
 {
-    std::unique_lock segment_lock(mutex);
-    assertCorrectnessUnlocked(segment_lock);
+    return assertCorrectnessUnlocked(segment_guard.lock());
 }
 
-void FileSegment::assertCorrectnessUnlocked(std::unique_lock<std::mutex> & segment_lock) const
+bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) const
 {
-    auto current_downloader = getDownloaderUnlocked(segment_lock);
-    chassert(current_downloader.empty() == (download_state != FileSegment::State::DOWNLOADING));
-    chassert(!current_downloader.empty() == (download_state == FileSegment::State::DOWNLOADING));
-    chassert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(getPathInLocalCache()) > 0);
-}
+    auto check_iterator = [this](const Priority::Iterator & it)
+    {
+        UNUSED(this);
+        if (!it)
+            return;
 
-void FileSegment::throwIfDetachedUnlocked(std::unique_lock<std::mutex> & segment_lock) const
-{
-    throw Exception(
-        ErrorCodes::LOGICAL_ERROR,
-        "Cache file segment is in detached state, operation not allowed. "
-        "It can happen when cache was concurrently dropped with SYSTEM DROP FILESYSTEM CACHE FORCE. "
-        "Please, retry. File segment info: {}", getInfoForLogUnlocked(segment_lock));
+        const auto & entry = it->getEntry();
+        UNUSED(entry);
+        chassert(entry.size == reserved_size);
+        chassert(entry.key == key());
+        chassert(entry.offset == offset());
+    };
+
+    if (download_state == State::DOWNLOADED)
+    {
+        chassert(downloader_id.empty());
+        chassert(downloaded_size == reserved_size);
+        chassert(std::filesystem::file_size(getPathInLocalCache()) > 0);
+        chassert(queue_iterator);
+        check_iterator(queue_iterator);
+    }
+    else
+    {
+        if (download_state == State::DOWNLOADED)
+        {
+            chassert(!downloader_id.empty());
+        }
+        else if (download_state == State::PARTIALLY_DOWNLOADED
+                 || download_state == State::EMPTY)
+        {
+            chassert(downloader_id.empty());
+        }
+
+        chassert(reserved_size >= downloaded_size);
+        chassert((reserved_size == 0) || queue_iterator);
+        check_iterator(queue_iterator);
+    }
+
+    return true;
 }
 
 void FileSegment::assertNotDetached() const
 {
-    std::unique_lock segment_lock(mutex);
-    assertNotDetachedUnlocked(segment_lock);
+    auto lock = segment_guard.lock();
+    assertNotDetachedUnlocked(lock);
 }
 
-void FileSegment::assertNotDetachedUnlocked(std::unique_lock<std::mutex> & segment_lock) const
+void FileSegment::assertNotDetachedUnlocked(const FileSegmentGuard::Lock & lock) const
 {
-    if (is_detached)
-        throwIfDetachedUnlocked(segment_lock);
-}
-
-void FileSegment::assertDetachedStatus(std::unique_lock<std::mutex> & segment_lock) const
-{
-    /// Detached file segment is allowed to have only a certain subset of states.
-    /// It should be either EMPTY or one of the finalized states.
-
-    if (download_state != State::EMPTY && !hasFinalizedStateUnlocked(segment_lock))
+    if (download_state == State::DETACHED)
     {
         throw Exception(
             ErrorCodes::LOGICAL_ERROR,
-            "Detached file segment has incorrect state: {}",
-            getInfoForLogUnlocked(segment_lock));
+            "Cache file segment is in detached state, operation not allowed. "
+            "It can happen when cache was concurrently dropped with SYSTEM DROP FILESYSTEM CACHE FORCE. "
+            "Please, retry. File segment info: {}", getInfoForLogUnlocked(lock));
     }
 }
 
-FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & /* cache_lock */)
+FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment)
 {
-    std::unique_lock segment_lock(file_segment->mutex);
+    auto lock = file_segment->segment_guard.lock();
 
     auto snapshot = std::make_shared<FileSegment>(
+        file_segment->key(),
         file_segment->offset(),
         file_segment->range().size(),
-        file_segment->key(),
-        nullptr,
-        State::EMPTY,
-        CreateFileSegmentSettings{});
+        State::DETACHED,
+        CreateFileSegmentSettings(file_segment->getKind(), file_segment->is_unbound));
 
     snapshot->hits_count = file_segment->getHitsCount();
+    snapshot->downloaded_size = file_segment->getDownloadedSize(false);
+    snapshot->download_state = file_segment->download_state.load();
     snapshot->ref_count = file_segment.use_count();
-    snapshot->downloaded_size = file_segment->getDownloadedSizeUnlocked(segment_lock);
-    snapshot->download_state = file_segment->download_state;
-    snapshot->segment_kind = file_segment->getKind();
-    snapshot->is_unbound = file_segment->is_unbound;
 
     return snapshot;
 }
 
-bool FileSegment::hasFinalizedStateUnlocked(std::unique_lock<std::mutex> & /* segment_lock */) const
-{
-    return download_state == State::DOWNLOADED
-        || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION
-        || download_state == State::SKIP_CACHE;
-}
-
 bool FileSegment::isDetached() const
 {
-    std::unique_lock segment_lock(mutex);
-    return is_detached;
+    auto lock = segment_guard.lock();
+    return download_state == State::DETACHED;
 }
 
-bool FileSegment::isCompleted() const
+bool FileSegment::isCompleted(bool sync) const
 {
-    std::unique_lock segment_lock(mutex);
-    return is_completed;
+    auto is_completed_state = [this]() -> bool
+    {
+        return download_state == State::DOWNLOADED || download_state == State::DETACHED;
+    };
+
+    if (sync)
+    {
+        if (is_completed_state())
+            return true;
+
+        auto lock = segment_guard.lock();
+        return is_completed_state();
+    }
+
+    return is_completed_state();
 }
 
-void FileSegment::detach(std::lock_guard<std::mutex> & /* cache_lock */, std::unique_lock<std::mutex> & segment_lock)
+void FileSegment::setDetachedState(const FileSegmentGuard::Lock & lock)
 {
-    if (is_detached)
+    setDownloadState(State::DETACHED, lock);
+    key_metadata.reset();
+    cache = nullptr;
+}
+
+void FileSegment::detach(const FileSegmentGuard::Lock & lock, const LockedKey &)
+{
+    if (download_state == State::DETACHED)
         return;
 
-    if (download_state == State::DOWNLOADING)
-        resetDownloadingStateUnlocked(segment_lock);
-    else
-        setDownloadState(State::PARTIALLY_DOWNLOADED_NO_CONTINUATION);
-
-    resetDownloaderUnlocked(segment_lock);
-    detachAssumeStateFinalized(segment_lock);
+    resetDownloaderUnlocked(lock);
+    setDetachedState(lock);
 }
 
-void FileSegment::detachAssumeStateFinalized(std::unique_lock<std::mutex> & segment_lock)
+void FileSegment::use()
 {
-    is_detached = true;
-    CurrentMetrics::add(CurrentMetrics::CacheDetachedFileSegments);
-    LOG_TEST(log, "Detached file segment: {}", getInfoForLogUnlocked(segment_lock));
-}
-
-FileSegment::~FileSegment()
-{
-    std::unique_lock segment_lock(mutex);
-    if (is_detached)
-        CurrentMetrics::sub(CurrentMetrics::CacheDetachedFileSegments);
-}
-
-void FileSegmentsHolder::reset()
-{
-    /// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from
-    /// FileSegmentsHolder right after calling file_segment->complete(), so on destruction here
-    /// remain only uncompleted file segments.
-
-    SCOPE_EXIT({
-        file_segments.clear();
-    });
-
-    FileCache * cache = nullptr;
-
-    for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();)
+    if (!cache)
     {
-        auto current_file_segment_it = file_segment_it;
-        auto & file_segment = *current_file_segment_it;
-
-        if (!cache)
-            cache = file_segment->cache;
-
-        assert(cache == file_segment->cache); /// all segments should belong to the same cache
-
-        try
-        {
-            bool is_detached = false;
-
-            {
-                std::unique_lock segment_lock(file_segment->mutex);
-                is_detached = file_segment->isDetached(segment_lock);
-                if (is_detached)
-                    file_segment->assertDetachedStatus(segment_lock);
-            }
-
-            if (is_detached)
-            {
-                /// This file segment is not owned by cache, so it will be destructed
-                /// at this point, therefore no completion required.
-                file_segment_it = file_segments.erase(current_file_segment_it);
-                continue;
-            }
-
-            /// File segment pointer must be reset right after calling complete() and
-            /// under the same mutex, because complete() checks for segment pointers.
-            std::lock_guard cache_lock(cache->mutex);
-
-            file_segment->completeWithoutStateUnlocked(cache_lock);
-
-            file_segment_it = file_segments.erase(current_file_segment_it);
-        }
-        catch (...)
-        {
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-        }
+        chassert(isCompleted(true));
+        return;
     }
+
+    auto it = getQueueIterator();
+    if (it)
+    {
+        auto cache_lock = cache->lockCache();
+        it->use(cache_lock);
+    }
+}
+
+FileSegments::iterator FileSegmentsHolder::completeAndPopFrontImpl()
+{
+    front().complete();
+    return file_segments.erase(file_segments.begin());
 }
 
 FileSegmentsHolder::~FileSegmentsHolder()
 {
-    reset();
+    if (!complete_on_dtor)
+        return;
+
+    for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();)
+        file_segment_it = completeAndPopFrontImpl();
 }
 
 String FileSegmentsHolder::toString()
@@ -922,7 +888,7 @@ String FileSegmentsHolder::toString()
         if (!ranges.empty())
             ranges += ", ";
         ranges += file_segment->range().toString();
-        if (file_segment->is_unbound)
+        if (file_segment->isUnbound())
             ranges += "(unbound)";
     }
     return ranges;
diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h
index 44bc5662c40..60883631177 100644
--- a/src/Interpreters/Cache/FileSegment.h
+++ b/src/Interpreters/Cache/FileSegment.h
@@ -2,13 +2,16 @@
 
 #include <boost/noncopyable.hpp>
 #include <Interpreters/Cache/FileCacheKey.h>
+#include <Interpreters/Cache/Guards.h>
 
 #include <IO/WriteBufferFromFile.h>
 #include <IO/ReadBufferFromFileBase.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
+#include <IO/OpenedFileCache.h>
 #include <base/getThreadId.h>
-#include <list>
+#include <Interpreters/Cache/IFileCachePriority.h>
+#include <Interpreters/Cache/FileCache_fwd_internal.h>
 #include <queue>
 
 
@@ -22,14 +25,8 @@ extern const Metric CacheFileSegments;
 namespace DB
 {
 
-class FileCache;
 class ReadBufferFromFileBase;
 
-class FileSegment;
-using FileSegmentPtr = std::shared_ptr<FileSegment>;
-using FileSegments = std::list<FileSegmentPtr>;
-
-
 /*
  * FileSegmentKind is used to specify the eviction policy for file segments.
  */
@@ -61,17 +58,13 @@ struct CreateFileSegmentSettings
     CreateFileSegmentSettings() = default;
 
     explicit CreateFileSegmentSettings(FileSegmentKind kind_, bool unbounded_ = false)
-        : kind(kind_), unbounded(unbounded_)
-    {}
+        : kind(kind_), unbounded(unbounded_) {}
 };
 
 class FileSegment : private boost::noncopyable, public std::enable_shared_from_this<FileSegment>
 {
-
-friend class FileCache;
-friend struct FileSegmentsHolder;
-friend class FileSegmentRangeWriter;
-friend class StorageSystemFilesystemCache;
+friend struct LockedKey;
+friend class FileCache; /// Because of reserved_size in tryReserve().
 
 public:
     using Key = FileCacheKey;
@@ -79,6 +72,7 @@ public:
     using LocalCacheWriterPtr = std::unique_ptr<WriteBufferFromFile>;
     using Downloader = std::string;
     using DownloaderId = std::string;
+    using Priority = IFileCachePriority;
 
     enum class State
     {
@@ -111,18 +105,20 @@ public:
          * If file segment cannot possibly be downloaded (first space reservation attempt failed), mark
          * this file segment as out of cache scope.
          */
-        SKIP_CACHE,
+        DETACHED,
     };
 
     FileSegment(
+        const Key & key_,
         size_t offset_,
         size_t size_,
-        const Key & key_,
-        FileCache * cache_,
         State download_state_,
-        const CreateFileSegmentSettings & create_settings);
+        const CreateFileSegmentSettings & create_settings = {},
+        FileCache * cache_ = nullptr,
+        std::weak_ptr<KeyMetadata> key_metadata_ = std::weak_ptr<KeyMetadata>(),
+        Priority::Iterator queue_iterator_ = Priority::Iterator{});
 
-    ~FileSegment();
+    ~FileSegment() = default;
 
     State state() const;
 
@@ -158,11 +154,10 @@ public:
     size_t offset() const { return range().left; }
 
     FileSegmentKind getKind() const { return segment_kind; }
-    bool isPersistent() const { return segment_kind == FileSegmentKind::Persistent; }
-    bool isUnbound() const { return is_unbound; }
 
-    using UniqueId = std::pair<FileCacheKey, size_t>;
-    UniqueId getUniqueId() const { return std::pair(key(), offset()); }
+    bool isPersistent() const { return segment_kind == FileSegmentKind::Persistent; }
+
+    bool isUnbound() const { return is_unbound; }
 
     String getPathInLocalCache() const;
 
@@ -177,7 +172,7 @@ public:
     DownloaderId getDownloader() const;
 
     /// Wait for the change of state from DOWNLOADING to any other.
-    State wait();
+    State wait(size_t offset);
 
     bool isDownloaded() const;
 
@@ -187,11 +182,13 @@ public:
 
     void incrementHitsCount() { ++hits_count; }
 
-    size_t getCurrentWriteOffset() const;
+    size_t getCurrentWriteOffset(bool sync) const;
 
-    size_t getFirstNonDownloadedOffset() const;
+    size_t getFirstNonDownloadedOffset(bool sync) const;
 
-    size_t getDownloadedSize() const;
+    size_t getDownloadedSize(bool sync) const;
+
+    size_t getReservedSize() const;
 
     /// Now detached status can be used in the following cases:
     /// 1. there is only 1 remaining file segment holder
@@ -207,15 +204,43 @@ public:
     /// 2. Detached file segment can still be hold by some cache users, but it's state became
     /// immutable at the point it was detached, any non-const / stateful method will throw an
     /// exception.
-    void detach(std::lock_guard<std::mutex> & cache_lock, std::unique_lock<std::mutex> & segment_lock);
+    void detach(const FileSegmentGuard::Lock &, const LockedKey &);
 
-    static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & cache_lock);
+    static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment);
 
     bool isDetached() const;
 
-    bool isCompleted() const;
+    /// File segment has a completed state, if this state is final and
+    /// is not going to be changed. Completed states: DOWNALODED, DETACHED.
+    bool isCompleted(bool sync = false) const;
 
-    void assertCorrectness() const;
+    void use();
+
+    /**
+     * ========== Methods used by `cache` ========================
+     */
+
+    FileSegmentGuard::Lock lock() const { return segment_guard.lock(); }
+
+    Priority::Iterator getQueueIterator() const;
+
+    void setQueueIterator(Priority::Iterator iterator);
+
+    KeyMetadataPtr tryGetKeyMetadata() const;
+
+    KeyMetadataPtr getKeyMetadata() const;
+
+    bool assertCorrectness() const;
+
+    /**
+     * ========== Methods that must do cv.notify() ==================
+     */
+
+    void complete();
+
+    void completePartAndResetDownloader();
+
+    void resetDownloader();
 
     /**
      * ========== Methods for _only_ file segment's `downloader` ==================
@@ -233,16 +258,6 @@ public:
     /// Write data into reserved space.
     void write(const char * from, size_t size, size_t offset);
 
-    /// Complete file segment with a certain state.
-    void completeWithState(State state);
-
-    void completeWithoutState();
-
-    /// Complete file segment's part which was last written.
-    void completePartAndResetDownloader();
-
-    void resetDownloader();
-
     // Invariant: if state() != DOWNLOADING and remote file reader is present, the reader's
     // available() == 0, and getFileOffsetOfBufferEnd() == our getCurrentWriteOffset().
     //
@@ -252,125 +267,112 @@ public:
 
     RemoteFileReaderPtr extractRemoteFileReader();
 
-    void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
-
     void resetRemoteFileReader();
 
+    void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
+
     void setDownloadedSize(size_t delta);
 
-    LocalCacheWriterPtr detachWriter();
-
 private:
-    size_t getFirstNonDownloadedOffsetUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
-    size_t getCurrentWriteOffsetUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
-    size_t getDownloadedSizeUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
+    String getDownloaderUnlocked(const FileSegmentGuard::Lock &) const;
+    bool isDownloaderUnlocked(const FileSegmentGuard::Lock & segment_lock) const;
+    void resetDownloaderUnlocked(const FileSegmentGuard::Lock &);
 
-    String getInfoForLogUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
+    void setDownloadState(State state, const FileSegmentGuard::Lock &);
+    void resetDownloadingStateUnlocked(const FileSegmentGuard::Lock &);
+    void setDetachedState(const FileSegmentGuard::Lock &);
 
-    String getDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
-    void resetDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock);
-    void resetDownloadingStateUnlocked(std::unique_lock<std::mutex> & segment_lock);
+    String getInfoForLogUnlocked(const FileSegmentGuard::Lock &) const;
 
-    void setDownloadState(State state);
+    void setDownloadedUnlocked(const FileSegmentGuard::Lock &);
+    void setDownloadFailedUnlocked(const FileSegmentGuard::Lock &);
 
-    void setDownloadedUnlocked(std::unique_lock<std::mutex> & segment_lock);
-    void setDownloadFailedUnlocked(std::unique_lock<std::mutex> & segment_lock);
-    void setDownloadedSizeUnlocked(std::unique_lock<std::mutex> & /* download_lock */, size_t delta);
-
-    bool hasFinalizedStateUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
-
-    bool isDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
-
-    bool isDetached(std::unique_lock<std::mutex> & /* segment_lock */) const { return is_detached; }
-    void detachAssumeStateFinalized(std::unique_lock<std::mutex> & segment_lock);
-    [[noreturn]] void throwIfDetachedUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
-
-    void assertDetachedStatus(std::unique_lock<std::mutex> & segment_lock) const;
     void assertNotDetached() const;
-    void assertNotDetachedUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
-    void assertIsDownloaderUnlocked(const std::string & operation, std::unique_lock<std::mutex> & segment_lock) const;
-    void assertCorrectnessUnlocked(std::unique_lock<std::mutex> & segment_lock) const;
+    void assertNotDetachedUnlocked(const FileSegmentGuard::Lock &) const;
+    void assertIsDownloaderUnlocked(const std::string & operation, const FileSegmentGuard::Lock &) const;
+    bool assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) const;
 
-    /// completeWithoutStateUnlocked() is called from destructor of FileSegmentsHolder.
-    /// Function might check if the caller of the method
-    /// is the last alive holder of the segment. Therefore, completion and destruction
-    /// of the file segment pointer must be done under the same cache mutex.
-    void completeWithoutStateUnlocked(std::lock_guard<std::mutex> & cache_lock);
-    void completeBasedOnCurrentState(std::lock_guard<std::mutex> & cache_lock, std::unique_lock<std::mutex> & segment_lock);
-
-    void completePartAndResetDownloaderUnlocked(std::unique_lock<std::mutex> & segment_lock);
-
-    void wrapWithCacheInfo(Exception & e, const String & message, std::unique_lock<std::mutex> & segment_lock) const;
+    LockedKeyPtr lockKeyMetadata(bool assert_exists = true) const;
 
+    Key file_key;
     Range segment_range;
+    const FileSegmentKind segment_kind;
+    /// Size of the segment is not known until it is downloaded and
+    /// can be bigger than max_file_segment_size.
+    const bool is_unbound = false;
 
-    State download_state;
-
-    /// The one who prepares the download
-    DownloaderId downloader_id;
+    std::atomic<State> download_state;
+    DownloaderId downloader_id; /// The one who prepares the download
 
     RemoteFileReaderPtr remote_file_reader;
     LocalCacheWriterPtr cache_writer;
-    bool detached_writer = false;
 
     /// downloaded_size should always be less or equal to reserved_size
-    size_t downloaded_size = 0;
-    size_t reserved_size = 0;
-
-    /// global locking order rule:
-    /// 1. cache lock
-    /// 2. segment lock
-
-    mutable std::mutex mutex;
-    std::condition_variable cv;
-
-    /// Protects downloaded_size access with actual write into fs.
-    /// downloaded_size is not protected by download_mutex in methods which
-    /// can never be run in parallel to FileSegment::write() method
-    /// as downloaded_size is updated only in FileSegment::write() method.
-    /// Such methods are identified by isDownloader() check at their start,
-    /// e.g. they are executed strictly by the same thread, sequentially.
+    std::atomic<size_t> downloaded_size = 0;
+    std::atomic<size_t> reserved_size = 0;
     mutable std::mutex download_mutex;
 
-    Key file_key;
+    mutable FileSegmentGuard segment_guard;
+    std::weak_ptr<KeyMetadata> key_metadata;
+    mutable Priority::Iterator queue_iterator; /// Iterator is put here on first reservation attempt, if successful.
     FileCache * cache;
+    std::condition_variable cv;
 
     Poco::Logger * log;
 
-    /// "detached" file segment means that it is not owned by cache ("detached" from cache).
-    /// In general case, all file segments are owned by cache.
-    bool is_detached = false;
-    bool is_completed = false;
-
-    bool is_downloaded = false;
-
     std::atomic<size_t> hits_count = 0; /// cache hits.
     std::atomic<size_t> ref_count = 0; /// Used for getting snapshot state
 
-    FileSegmentKind segment_kind;
-
-    /// Size of the segment is not known until it is downloaded and can be bigger than max_file_segment_size.
-    bool is_unbound = false;
-
     CurrentMetrics::Increment metric_increment{CurrentMetrics::CacheFileSegments};
 };
 
+
 struct FileSegmentsHolder : private boost::noncopyable
 {
     FileSegmentsHolder() = default;
 
-    explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
-
-    FileSegmentsHolder(FileSegmentsHolder && other) noexcept : file_segments(std::move(other.file_segments)) {}
-
-    void reset();
-    bool empty() const { return file_segments.empty(); }
+    explicit FileSegmentsHolder(FileSegments && file_segments_, bool complete_on_dtor_ = true)
+        : file_segments(std::move(file_segments_)), complete_on_dtor(complete_on_dtor_) {}
 
     ~FileSegmentsHolder();
 
+    bool empty() const { return file_segments.empty(); }
+
+    size_t size() const { return file_segments.size(); }
+
     String toString();
 
+    void popFront() { completeAndPopFrontImpl(); }
+
+    FileSegment & front() { return *file_segments.front(); }
+
+    FileSegment & back() { return *file_segments.back(); }
+
+    FileSegment & add(FileSegmentPtr && file_segment)
+    {
+        file_segments.push_back(file_segment);
+        return *file_segments.back();
+    }
+
+    FileSegments::iterator begin() { return file_segments.begin(); }
+    FileSegments::iterator end() { return file_segments.end(); }
+
+    FileSegments::const_iterator begin() const { return file_segments.begin(); }
+    FileSegments::const_iterator end() const { return file_segments.end(); }
+
+    void moveTo(FileSegmentsHolder & holder)
+    {
+        holder.file_segments.insert(holder.file_segments.end(), file_segments.begin(), file_segments.end());
+        file_segments.clear();
+    }
+
+private:
     FileSegments file_segments{};
+    const bool complete_on_dtor = true;
+
+    FileSegments::iterator completeAndPopFrontImpl();
 };
 
+using FileSegmentsHolderPtr = std::unique_ptr<FileSegmentsHolder>;
+
 }
diff --git a/src/Interpreters/Cache/Guards.h b/src/Interpreters/Cache/Guards.h
new file mode 100644
index 00000000000..0e06495bd82
--- /dev/null
+++ b/src/Interpreters/Cache/Guards.h
@@ -0,0 +1,117 @@
+#pragma once
+#include <mutex>
+#include <Interpreters/Cache/FileCache_fwd.h>
+#include <boost/noncopyable.hpp>
+#include <map>
+
+namespace DB
+{
+/**
+ * FileCache::get/getOrSet/set
+ * 1. CacheMetadataGuard::Lock (take key lock and release metadata lock)
+ * 2. KeyGuard::Lock (hold till the end of the method)
+ *
+ * FileCache::tryReserve
+ * 1. CacheGuard::Lock
+ * 2. KeyGuard::Lock (taken without metadata lock)
+ * 3. any number of KeyGuard::Lock's for files which are going to be evicted (taken via metadata lock)
+ *
+ * FileCache::removeIfExists
+ * 1. CacheGuard::Lock
+ * 2. KeyGuard::Lock (taken via metadata lock)
+ * 3. FileSegmentGuard::Lock
+ *
+ * FileCache::removeAllReleasable
+ * 1. CacheGuard::Lock
+ * 2. any number of KeyGuard::Lock's locks (takken via metadata lock), but at a moment of time only one key lock can be hold
+ * 3. FileSegmentGuard::Lock
+ *
+ * FileCache::getSnapshot (for all cache)
+ * 1. metadata lock
+ * 2. any number of KeyGuard::Lock's locks (takken via metadata lock), but at a moment of time only one key lock can be hold
+ * 3. FileSegmentGuard::Lock
+ *
+ * FileCache::getSnapshot(key)
+ * 1. KeyGuard::Lock (taken via metadata lock)
+ * 2. FileSegmentGuard::Lock
+ *
+ * FileSegment::complete
+ * 1. CacheGuard::Lock
+ * 2. KeyGuard::Lock (taken without metadata lock)
+ * 3. FileSegmentGuard::Lock
+ *
+ * Rules:
+ * 1. Priority of locking: CacheGuard::Lock > CacheMetadataGuard::Lock > KeyGuard::Lock > FileSegmentGuard::Lock
+ * 2. If we take more than one key lock at a moment of time, we need to take CacheGuard::Lock (example: tryReserve())
+ *
+ *
+ *                                 _CacheGuard_
+ *                                 1. FileCache::tryReserve
+ *                                 2. FileCache::removeIfExists(key)
+ *                                 3. FileCache::removeAllReleasable
+ *                                 4. FileSegment::complete
+ *
+ *             _KeyGuard_                                      _CacheMetadataGuard_
+ *             1. all from CacheGuard                          1. getOrSet/get/set
+ *             2. getOrSet/get/Set
+ *
+ * *This table does not include locks taken for introspection and system tables.
+ */
+
+/**
+ * Cache priority queue guard.
+ */
+struct CacheGuard : private boost::noncopyable
+{
+    struct Lock : public std::unique_lock<std::mutex>
+    {
+        explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
+    };
+
+    Lock lock() { return Lock(mutex); }
+    std::mutex mutex;
+};
+
+/**
+ * Guard for cache metadata.
+ */
+struct CacheMetadataGuard : private boost::noncopyable
+{
+    struct Lock : public std::unique_lock<std::mutex>
+    {
+        explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
+    };
+
+    Lock lock() { return Lock(mutex); }
+    std::mutex mutex;
+};
+
+/**
+ * Key guard. A separate guard for each cache key.
+ */
+struct KeyGuard : private boost::noncopyable
+{
+    struct Lock : public std::unique_lock<std::mutex>
+    {
+        explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
+    };
+
+    Lock lock() { return Lock(mutex); }
+    std::mutex mutex;
+};
+
+/**
+ * Guard for a file segment.
+ */
+struct FileSegmentGuard : private boost::noncopyable
+{
+    struct Lock : public std::unique_lock<std::mutex>
+    {
+        explicit Lock(std::mutex & mutex_) : std::unique_lock<std::mutex>(mutex_) {}
+    };
+
+    Lock lock() { return Lock(mutex); }
+    std::mutex mutex;
+};
+
+}
diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h
index de5e3f1428c..4d5f67cc10c 100644
--- a/src/Interpreters/Cache/IFileCachePriority.h
+++ b/src/Interpreters/Cache/IFileCachePriority.h
@@ -5,33 +5,35 @@
 #include <Core/Types.h>
 #include <Common/Exception.h>
 #include <Interpreters/Cache/FileCacheKey.h>
+#include <Interpreters/Cache/Guards.h>
+#include <Interpreters/Cache/FileCache_fwd_internal.h>
 
 namespace DB
 {
 
-class IFileCachePriority;
-using FileCachePriorityPtr = std::shared_ptr<IFileCachePriority>;
-
 /// IFileCachePriority is used to maintain the priority of cached data.
-class IFileCachePriority
+class IFileCachePriority : private boost::noncopyable
 {
 public:
-    class IIterator;
     using Key = FileCacheKey;
-    using ReadIterator = std::unique_ptr<const IIterator>;
-    using WriteIterator = std::shared_ptr<IIterator>;
+    using KeyAndOffset = FileCacheKeyAndOffset;
 
-    struct FileCacheRecord
+    struct Entry
     {
-        Key key;
-        size_t offset;
-        size_t size;
-        size_t hits = 0;
+        Entry(const Key & key_, size_t offset_, size_t size_, KeyMetadataPtr key_metadata_)
+            : key(key_), offset(offset_), size(size_), key_metadata(key_metadata_) {}
 
-        FileCacheRecord(const Key & key_, size_t offset_, size_t size_) : key(key_), offset(offset_), size(size_) { }
+        Entry(const Entry & other)
+            : key(other.key), offset(other.offset), size(other.size.load()), hits(other.hits), key_metadata(other.key_metadata) {}
+
+        const Key key;
+        const size_t offset;
+        std::atomic<size_t> size;
+        size_t hits = 0;
+        const KeyMetadataPtr key_metadata;
     };
 
-    /// It provides an iterator to traverse the cache priority. Under normal circumstances,
+    /// Provides an iterator to traverse the cache priority. Under normal circumstances,
     /// the iterator can only return the records that have been directly swapped out.
     /// For example, in the LRU algorithm, it can traverse all records, but in the LRU-K, it
     /// can only traverse the records in the low priority queue.
@@ -40,56 +42,54 @@ public:
     public:
         virtual ~IIterator() = default;
 
-        virtual const Key & key() const = 0;
+        virtual size_t use(const CacheGuard::Lock &) = 0;
 
-        virtual size_t offset() const = 0;
+        virtual std::shared_ptr<IIterator> remove(const CacheGuard::Lock &) = 0;
 
-        virtual size_t size() const = 0;
+        virtual const Entry & getEntry() const = 0;
 
-        virtual size_t hits() const = 0;
+        virtual Entry & getEntry() = 0;
 
-        /// Point the iterator to the next higher priority cache record.
-        virtual void next() const = 0;
+        virtual void annul() = 0;
 
-        virtual bool valid() const = 0;
-
-        /// Mark a cache record as recently used, it will update the priority
-        /// of the cache record according to different cache algorithms.
-        virtual void use(std::lock_guard<std::mutex> &) = 0;
-
-        /// Deletes an existing cached record. And to avoid pointer suspension
-        /// the iterator should automatically point to the next record.
-        virtual void removeAndGetNext(std::lock_guard<std::mutex> &) = 0;
-
-        virtual void incrementSize(size_t, std::lock_guard<std::mutex> &) = 0;
+        virtual void updateSize(int64_t size) = 0;
     };
 
-public:
+    using Iterator = std::shared_ptr<IIterator>;
+    using ConstIterator = std::shared_ptr<const IIterator>;
+
+    enum class IterationResult
+    {
+        BREAK,
+        CONTINUE,
+        REMOVE_AND_CONTINUE,
+    };
+    using IterateFunc = std::function<IterationResult(LockedKey &, FileSegmentMetadataPtr)>;
+
+    IFileCachePriority(size_t max_size_, size_t max_elements_) : max_size(max_size_), max_elements(max_elements_) {}
+
     virtual ~IFileCachePriority() = default;
 
-    /// Add a cache record that did not exist before, and throw a
-    /// logical exception if the cache block already exists.
-    virtual WriteIterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock) = 0;
+    size_t getElementsLimit() const { return max_elements; }
 
-    /// This method is used for assertions in debug mode. So we do not care about complexity here.
-    /// Query whether a cache record exists. If it exists, return true. If not, return false.
-    virtual bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock) = 0;
+    size_t getSizeLimit() const { return max_size; }
 
-    virtual void removeAll(std::lock_guard<std::mutex> & cache_lock) = 0;
+    virtual size_t getSize(const CacheGuard::Lock &) const = 0;
 
-    /// Returns an iterator pointing to the lowest priority cached record.
-    /// We can traverse all cached records through the iterator's next().
-    virtual ReadIterator getLowestPriorityReadIterator(std::lock_guard<std::mutex> & cache_lock) = 0;
+    virtual size_t getElementsCount(const CacheGuard::Lock &) const = 0;
 
-    /// The same as getLowestPriorityReadIterator(), but it is writeable.
-    virtual WriteIterator getLowestPriorityWriteIterator(std::lock_guard<std::mutex> & cache_lock) = 0;
+    virtual Iterator add(
+        KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) = 0;
 
-    virtual size_t getElementsNum(std::lock_guard<std::mutex> & cache_lock) const = 0;
+    virtual void pop(const CacheGuard::Lock &) = 0;
 
-    size_t getCacheSize(std::lock_guard<std::mutex> &) const { return cache_size; }
+    virtual void removeAll(const CacheGuard::Lock &) = 0;
 
-protected:
-    size_t max_cache_size = 0;
-    size_t cache_size = 0;
+    virtual void iterate(IterateFunc && func, const CacheGuard::Lock &) = 0;
+
+private:
+    const size_t max_size = 0;
+    const size_t max_elements = 0;
 };
+
 };
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp
index 4987d143f52..f1798cd626c 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.cpp
+++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp
@@ -1,5 +1,7 @@
 #include <Interpreters/Cache/LRUFileCachePriority.h>
+#include <Interpreters/Cache/FileCache.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/randomSeed.h>
 #include <Common/logger_useful.h>
 
 namespace CurrentMetrics
@@ -16,8 +18,13 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-IFileCachePriority::WriteIterator LRUFileCachePriority::add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> &)
+IFileCachePriority::Iterator LRUFileCachePriority::add(
+    KeyMetadataPtr key_metadata,
+    size_t offset,
+    size_t size,
+    const CacheGuard::Lock &)
 {
+    const auto & key = key_metadata->key;
 #ifndef NDEBUG
     for (const auto & entry : queue)
     {
@@ -25,40 +32,56 @@ IFileCachePriority::WriteIterator LRUFileCachePriority::add(const Key & key, siz
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
                 "Attempt to add duplicate queue entry to queue. (Key: {}, offset: {}, size: {})",
-                entry.key.toString(), entry.offset, entry.size);
+                entry.key, entry.offset, entry.size);
     }
 #endif
 
-    auto iter = queue.insert(queue.end(), FileCacheRecord(key, offset, size));
-    cache_size += size;
+    const auto & size_limit = getSizeLimit();
+    if (size_limit && current_size + size > size_limit)
+    {
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Not enough space to add {}:{} with size {}: current size: {}/{}",
+            key, offset, size, current_size, getSizeLimit());
+    }
+
+    current_size += size;
+
+    auto iter = queue.insert(queue.end(), Entry(key, offset, size, key_metadata));
 
     CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size);
     CurrentMetrics::add(CurrentMetrics::FilesystemCacheElements);
 
-    LOG_TEST(log, "Added entry into LRU queue, key: {}, offset: {}", key.toString(), offset);
+    LOG_TEST(log, "Added entry into LRU queue, key: {}, offset: {}", key, offset);
 
     return std::make_shared<LRUFileCacheIterator>(this, iter);
 }
 
-bool LRUFileCachePriority::contains(const Key & key, size_t offset, std::lock_guard<std::mutex> &)
+void LRUFileCachePriority::removeAll(const CacheGuard::Lock &)
 {
-    for (const auto & record : queue)
-    {
-        if (key == record.key && offset == record.offset)
-            return true;
-    }
-    return false;
-}
-
-void LRUFileCachePriority::removeAll(std::lock_guard<std::mutex> &)
-{
-    CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, cache_size);
+    CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, current_size);
     CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements, queue.size());
 
     LOG_TEST(log, "Removed all entries from LRU queue");
 
     queue.clear();
-    cache_size = 0;
+    current_size = 0;
+}
+
+void LRUFileCachePriority::pop(const CacheGuard::Lock &)
+{
+    remove(queue.begin());
+}
+
+LRUFileCachePriority::LRUQueueIterator LRUFileCachePriority::remove(LRUQueueIterator it)
+{
+    current_size -= it->size;
+
+    CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, it->size);
+    CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements);
+
+    LOG_TEST(log, "Removed entry from LRU queue, key: {}, offset: {}", it->key, it->offset);
+    return queue.erase(it);
 }
 
 LRUFileCachePriority::LRUFileCacheIterator::LRUFileCacheIterator(
@@ -67,44 +90,83 @@ LRUFileCachePriority::LRUFileCacheIterator::LRUFileCacheIterator(
 {
 }
 
-IFileCachePriority::ReadIterator LRUFileCachePriority::getLowestPriorityReadIterator(std::lock_guard<std::mutex> &)
+void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock &)
 {
-    return std::make_unique<const LRUFileCacheIterator>(this, queue.begin());
+    for (auto it = queue.begin(); it != queue.end();)
+    {
+        auto locked_key = it->key_metadata->tryLock();
+        if (!locked_key || it->size == 0)
+        {
+            it = remove(it);
+            continue;
+        }
+
+        auto metadata = locked_key->tryGetByOffset(it->offset);
+        if (!metadata)
+        {
+            it = remove(it);
+            continue;
+        }
+
+        if (metadata->size() != it->size)
+        {
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "Mismatch of file segment size in file segment metadata and priority queue: {} != {} ({})",
+                it->size, metadata->size(), metadata->file_segment->getInfoForLog());
+        }
+
+        auto result = func(*locked_key, metadata);
+        switch (result)
+        {
+            case IterationResult::BREAK:
+            {
+                return;
+            }
+            case IterationResult::CONTINUE:
+            {
+                ++it;
+                break;
+            }
+            case IterationResult::REMOVE_AND_CONTINUE:
+            {
+                it = remove(it);
+                break;
+            }
+        }
+    }
 }
 
-IFileCachePriority::WriteIterator LRUFileCachePriority::getLowestPriorityWriteIterator(std::lock_guard<std::mutex> &)
+LRUFileCachePriority::Iterator LRUFileCachePriority::LRUFileCacheIterator::remove(const CacheGuard::Lock &)
 {
-    return std::make_shared<LRUFileCacheIterator>(this, queue.begin());
+    return std::make_shared<LRUFileCacheIterator>(cache_priority, cache_priority->remove(queue_iter));
 }
 
-size_t LRUFileCachePriority::getElementsNum(std::lock_guard<std::mutex> &) const
+void LRUFileCachePriority::LRUFileCacheIterator::annul()
 {
-    return queue.size();
+    cache_priority->current_size -= queue_iter->size;
+    queue_iter->size = 0;
 }
 
-void LRUFileCachePriority::LRUFileCacheIterator::removeAndGetNext(std::lock_guard<std::mutex> &)
+void LRUFileCachePriority::LRUFileCacheIterator::updateSize(int64_t size)
 {
-    cache_priority->cache_size -= queue_iter->size;
+    cache_priority->current_size += size;
 
-    CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, queue_iter->size);
-    CurrentMetrics::sub(CurrentMetrics::FilesystemCacheElements);
+    if (size > 0)
+        CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size);
+    else
+        CurrentMetrics::sub(CurrentMetrics::FilesystemCacheSize, size);
 
-    LOG_TEST(cache_priority->log, "Removed entry from LRU queue, key: {}, offset: {}", queue_iter->key.toString(), queue_iter->offset);
+    queue_iter->size += size;
 
-    queue_iter = cache_priority->queue.erase(queue_iter);
+    chassert(cache_priority->current_size >= 0);
+    chassert(queue_iter->size >= 0);
 }
 
-void LRUFileCachePriority::LRUFileCacheIterator::incrementSize(size_t size_increment, std::lock_guard<std::mutex> &)
+size_t LRUFileCachePriority::LRUFileCacheIterator::use(const CacheGuard::Lock &)
 {
-    cache_priority->cache_size += size_increment;
-    CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size_increment);
-    queue_iter->size += size_increment;
-}
-
-void LRUFileCachePriority::LRUFileCacheIterator::use(std::lock_guard<std::mutex> &)
-{
-    queue_iter->hits++;
     cache_priority->queue.splice(cache_priority->queue.end(), cache_priority->queue, queue_iter);
+    return ++queue_iter->hits;
 }
 
 };
diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h
index 2a3c4f3c801..0cb81109fcd 100644
--- a/src/Interpreters/Cache/LRUFileCachePriority.h
+++ b/src/Interpreters/Cache/LRUFileCachePriority.h
@@ -2,6 +2,8 @@
 
 #include <list>
 #include <Interpreters/Cache/IFileCachePriority.h>
+#include <Interpreters/Cache/FileCacheKey.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
@@ -12,51 +14,51 @@ class LRUFileCachePriority : public IFileCachePriority
 {
 private:
     class LRUFileCacheIterator;
-    using LRUQueue = std::list<FileCacheRecord>;
+    using LRUQueue = std::list<Entry>;
     using LRUQueueIterator = typename LRUQueue::iterator;
 
 public:
-    LRUFileCachePriority() = default;
+    LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {}
 
-    WriteIterator add(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> &) override;
+    size_t getSize(const CacheGuard::Lock &) const override { return current_size; }
 
-    bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> &) override;
+    size_t getElementsCount(const CacheGuard::Lock &) const override { return queue.size(); }
 
-    void removeAll(std::lock_guard<std::mutex> &) override;
+    Iterator add(KeyMetadataPtr key_metadata, size_t offset, size_t size, const CacheGuard::Lock &) override;
 
-    ReadIterator getLowestPriorityReadIterator(std::lock_guard<std::mutex> &) override;
+    void pop(const CacheGuard::Lock &) override;
 
-    WriteIterator getLowestPriorityWriteIterator(std::lock_guard<std::mutex> &) override;
+    void removeAll(const CacheGuard::Lock &) override;
 
-    size_t getElementsNum(std::lock_guard<std::mutex> &) const override;
+    void iterate(IterateFunc && func, const CacheGuard::Lock &) override;
 
 private:
     LRUQueue queue;
     Poco::Logger * log = &Poco::Logger::get("LRUFileCachePriority");
+
+    std::atomic<size_t> current_size = 0;
+
+    LRUQueueIterator remove(LRUQueueIterator it);
 };
 
 class LRUFileCachePriority::LRUFileCacheIterator : public IFileCachePriority::IIterator
 {
 public:
-    LRUFileCacheIterator(LRUFileCachePriority * cache_priority_, LRUFileCachePriority::LRUQueueIterator queue_iter_);
+    LRUFileCacheIterator(
+        LRUFileCachePriority * cache_priority_,
+        LRUFileCachePriority::LRUQueueIterator queue_iter_);
 
-    void next() const override { queue_iter++; }
+    const Entry & getEntry() const override { return *queue_iter; }
 
-    bool valid() const override { return queue_iter != cache_priority->queue.end(); }
+    Entry & getEntry() override { return *queue_iter; }
 
-    const Key & key() const override { return queue_iter->key; }
+    size_t use(const CacheGuard::Lock &) override;
 
-    size_t offset() const override { return queue_iter->offset; }
+    Iterator remove(const CacheGuard::Lock &) override;
 
-    size_t size() const override { return queue_iter->size; }
+    void annul() override;
 
-    size_t hits() const override { return queue_iter->hits; }
-
-    void removeAndGetNext(std::lock_guard<std::mutex> &) override;
-
-    void incrementSize(size_t size_increment, std::lock_guard<std::mutex> &) override;
-
-    void use(std::lock_guard<std::mutex> &) override;
+    void updateSize(int64_t size) override;
 
 private:
     LRUFileCachePriority * cache_priority;
diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp
new file mode 100644
index 00000000000..d97417dd290
--- /dev/null
+++ b/src/Interpreters/Cache/Metadata.cpp
@@ -0,0 +1,468 @@
+#include <Interpreters/Cache/Metadata.h>
+#include <Interpreters/Cache/FileCache.h>
+#include <Interpreters/Cache/FileSegment.h>
+#include <Common/logger_useful.h>
+#include <filesystem>
+
+namespace fs = std::filesystem;
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+FileSegmentMetadata::FileSegmentMetadata(FileSegmentPtr && file_segment_)
+    : file_segment(std::move(file_segment_))
+{
+    switch (file_segment->state())
+    {
+        case FileSegment::State::DOWNLOADED:
+        {
+            chassert(file_segment->getQueueIterator());
+            break;
+        }
+        case FileSegment::State::EMPTY:
+        case FileSegment::State::DOWNLOADING:
+        {
+            break;
+        }
+        default:
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "Can create file segment with either EMPTY, DOWNLOADED, DOWNLOADING state, got: {}",
+                FileSegment::stateToString(file_segment->state()));
+    }
+}
+
+size_t FileSegmentMetadata::size() const
+{
+    return file_segment->getReservedSize();
+}
+
+KeyMetadata::KeyMetadata(
+    const Key & key_,
+    const std::string & key_path_,
+    CleanupQueue & cleanup_queue_,
+    bool created_base_directory_)
+    : key(key_)
+    , key_path(key_path_)
+    , cleanup_queue(cleanup_queue_)
+    , created_base_directory(created_base_directory_)
+{
+    if (created_base_directory)
+        chassert(fs::exists(key_path));
+}
+
+LockedKeyPtr KeyMetadata::lock()
+{
+    auto locked = tryLock();
+    if (locked)
+        return locked;
+
+    throw Exception(
+        ErrorCodes::LOGICAL_ERROR,
+        "Cannot lock key {} (state: {})", key, magic_enum::enum_name(key_state));
+}
+
+LockedKeyPtr KeyMetadata::tryLock()
+{
+    auto locked = std::make_unique<LockedKey>(shared_from_this());
+    if (key_state == KeyMetadata::KeyState::ACTIVE)
+        return locked;
+
+    return nullptr;
+}
+
+bool KeyMetadata::createBaseDirectory()
+{
+    if (!created_base_directory.exchange(true))
+    {
+        try
+        {
+            fs::create_directories(key_path);
+        }
+        catch (...)
+        {
+            /// Avoid errors like
+            /// std::__1::__fs::filesystem::filesystem_error: filesystem error: in create_directories: No space left on device
+            /// and mark file segment with SKIP_CACHE state
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+            created_base_directory = false;
+            return false;
+        }
+    }
+    return true;
+}
+
+std::string KeyMetadata::getFileSegmentPath(const FileSegment & file_segment)
+{
+    return fs::path(key_path)
+        / CacheMetadata::getFileNameForFileSegment(file_segment.offset(), file_segment.getKind());
+}
+
+
+class CleanupQueue
+{
+    friend struct CacheMetadata;
+public:
+    void add(const FileCacheKey & key);
+    void remove(const FileCacheKey & key);
+    size_t getSize() const;
+
+private:
+    bool tryPop(FileCacheKey & key);
+
+    std::unordered_set<FileCacheKey> keys;
+    mutable std::mutex mutex;
+};
+
+
+CacheMetadata::CacheMetadata(const std::string & path_)
+    : path(path_)
+    , cleanup_queue(std::make_unique<CleanupQueue>())
+    , log(&Poco::Logger::get("CacheMetadata"))
+{
+}
+
+String CacheMetadata::getFileNameForFileSegment(size_t offset, FileSegmentKind segment_kind)
+{
+    String file_suffix;
+    switch (segment_kind)
+    {
+        case FileSegmentKind::Persistent:
+            file_suffix = "_persistent";
+            break;
+        case FileSegmentKind::Temporary:
+            file_suffix = "_temporary";
+            break;
+        case FileSegmentKind::Regular:
+            file_suffix = "";
+            break;
+    }
+    return std::to_string(offset) + file_suffix;
+}
+
+String CacheMetadata::getPathInLocalCache(const Key & key, size_t offset, FileSegmentKind segment_kind) const
+{
+    String file_suffix;
+
+    const auto key_str = key.toString();
+    return fs::path(path) / key_str.substr(0, 3) / key_str / getFileNameForFileSegment(offset, segment_kind);
+}
+
+String CacheMetadata::getPathInLocalCache(const Key & key) const
+{
+    const auto key_str = key.toString();
+    return fs::path(path) / key_str.substr(0, 3) / key_str;
+}
+
+LockedKeyPtr CacheMetadata::lockKeyMetadata(
+    const FileCacheKey & key,
+    KeyNotFoundPolicy key_not_found_policy,
+    bool is_initial_load)
+{
+    KeyMetadataPtr key_metadata;
+    {
+        auto lock = guard.lock();
+
+        auto it = find(key);
+        if (it == end())
+        {
+            if (key_not_found_policy == KeyNotFoundPolicy::THROW)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key);
+            else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL)
+                return nullptr;
+
+            it = emplace(
+                key, std::make_shared<KeyMetadata>(
+                    key, getPathInLocalCache(key), *cleanup_queue, is_initial_load)).first;
+        }
+
+        key_metadata = it->second;
+    }
+
+    {
+        auto locked_metadata = std::make_unique<LockedKey>(key_metadata);
+        const auto key_state = locked_metadata->getKeyState();
+
+        if (key_state == KeyMetadata::KeyState::ACTIVE)
+            return locked_metadata;
+
+        if (key_not_found_policy == KeyNotFoundPolicy::THROW)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key);
+
+        if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL)
+            return nullptr;
+
+        if (key_state == KeyMetadata::KeyState::REMOVING)
+        {
+            locked_metadata->removeFromCleanupQueue();
+            return locked_metadata;
+        }
+
+        chassert(key_state == KeyMetadata::KeyState::REMOVED);
+        chassert(key_not_found_policy == KeyNotFoundPolicy::CREATE_EMPTY);
+    }
+
+    /// Not we are at a case:
+    /// key_state == KeyMetadata::KeyState::REMOVED
+    /// and KeyNotFoundPolicy == CREATE_EMPTY
+    /// Retry.
+    return lockKeyMetadata(key, key_not_found_policy);
+}
+
+void CacheMetadata::iterate(IterateCacheMetadataFunc && func)
+{
+    auto lock = guard.lock();
+    for (const auto & [key, key_metadata] : *this)
+    {
+        auto locked_key = std::make_unique<LockedKey>(key_metadata);
+        const auto key_state = locked_key->getKeyState();
+
+        if (key_state == KeyMetadata::KeyState::ACTIVE)
+        {
+            func(*locked_key);
+            continue;
+        }
+
+        if (key_state == KeyMetadata::KeyState::REMOVING)
+            continue;
+
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR, "Cannot lock key {}: key does not exist", key_metadata->key);
+    }
+}
+
+void CacheMetadata::doCleanup()
+{
+    auto lock = guard.lock();
+
+    /// Let's mention this case.
+    /// This metadata cleanup is delayed so what is we marked key as deleted and
+    /// put it to deletion queue, but then the same key was added to cache before
+    /// we actually performed this delayed removal?
+    /// In this case it will work fine because on each attempt to add any key to cache
+    /// we perform this delayed removal.
+
+    FileCacheKey cleanup_key;
+    while (cleanup_queue->tryPop(cleanup_key))
+    {
+        auto it = find(cleanup_key);
+        if (it == end())
+            continue;
+
+        auto locked_metadata = std::make_unique<LockedKey>(it->second);
+        const auto key_state = locked_metadata->getKeyState();
+
+        if (key_state == KeyMetadata::KeyState::ACTIVE)
+        {
+            /// Key was added back to cache after we submitted it to removal queue.
+            continue;
+        }
+
+        locked_metadata->markAsRemoved();
+        erase(it);
+
+        try
+        {
+            const fs::path key_directory = getPathInLocalCache(cleanup_key);
+            if (fs::exists(key_directory))
+                fs::remove_all(key_directory);
+
+            const fs::path key_prefix_directory = key_directory.parent_path();
+            if (fs::exists(key_prefix_directory) && fs::is_empty(key_prefix_directory))
+                fs::remove_all(key_prefix_directory);
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+            chassert(false);
+        }
+    }
+}
+
+LockedKey::LockedKey(std::shared_ptr<KeyMetadata> key_metadata_)
+    : key_metadata(key_metadata_)
+    , lock(key_metadata->guard.lock())
+    , log(&Poco::Logger::get("LockedKey"))
+{
+}
+
+LockedKey::~LockedKey()
+{
+    if (!key_metadata->empty())
+        return;
+
+    key_metadata->key_state = KeyMetadata::KeyState::REMOVING;
+    key_metadata->cleanup_queue.add(getKey());
+}
+
+void LockedKey::removeFromCleanupQueue()
+{
+    if (key_metadata->key_state != KeyMetadata::KeyState::REMOVING)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove non-removing");
+
+    /// Just mark key_state as "not to be removed", the cleanup thread will check it and skip the key.
+    key_metadata->key_state = KeyMetadata::KeyState::ACTIVE;
+}
+
+void LockedKey::markAsRemoved()
+{
+    key_metadata->key_state = KeyMetadata::KeyState::REMOVED;
+}
+
+bool LockedKey::isLastOwnerOfFileSegment(size_t offset) const
+{
+    const auto file_segment_metadata = getByOffset(offset);
+    return file_segment_metadata->file_segment.use_count() == 2;
+}
+
+void LockedKey::removeAllReleasable()
+{
+    for (auto it = key_metadata->begin(); it != key_metadata->end();)
+    {
+        if (!it->second->releasable())
+        {
+            ++it;
+            continue;
+        }
+
+        auto file_segment = it->second->file_segment;
+        it = removeFileSegment(file_segment->offset(), file_segment->lock());
+    }
+}
+
+KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock)
+{
+    LOG_DEBUG(log, "Remove from cache. Key: {}, offset: {}", getKey(), offset);
+
+    auto it = key_metadata->find(offset);
+    if (it == key_metadata->end())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset);
+
+    auto file_segment = it->second->file_segment;
+    if (file_segment->queue_iterator)
+        file_segment->queue_iterator->annul();
+
+    const auto path = key_metadata->getFileSegmentPath(*file_segment);
+    if (fs::exists(path))
+        fs::remove(path);
+
+    file_segment->detach(segment_lock, *this);
+    return key_metadata->erase(it);
+}
+
+void LockedKey::shrinkFileSegmentToDownloadedSize(
+    size_t offset,
+    const FileSegmentGuard::Lock & segment_lock)
+{
+    /**
+     * In case file was partially downloaded and it's download cannot be continued
+     * because of no space left in cache, we need to be able to cut file segment's size to downloaded_size.
+     */
+
+    auto metadata = getByOffset(offset);
+    const auto & file_segment = metadata->file_segment;
+    chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
+
+    const size_t downloaded_size = file_segment->getDownloadedSize(false);
+    if (downloaded_size == file_segment->range().size())
+    {
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Nothing to reduce, file segment fully downloaded: {}",
+            file_segment->getInfoForLogUnlocked(segment_lock));
+    }
+
+    int64_t diff = file_segment->reserved_size - downloaded_size;
+
+    metadata->file_segment = std::make_shared<FileSegment>(
+        getKey(), offset, downloaded_size, FileSegment::State::DOWNLOADED,
+        CreateFileSegmentSettings(file_segment->getKind()),
+        file_segment->cache, key_metadata, file_segment->queue_iterator);
+
+    if (diff)
+        metadata->getQueueIterator()->updateSize(-diff);
+
+    chassert(file_segment->assertCorrectnessUnlocked(segment_lock));
+}
+
+std::shared_ptr<const FileSegmentMetadata> LockedKey::getByOffset(size_t offset) const
+{
+    auto it = key_metadata->find(offset);
+    if (it == key_metadata->end())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "There is not offset {}", offset);
+    return it->second;
+}
+
+std::shared_ptr<FileSegmentMetadata> LockedKey::getByOffset(size_t offset)
+{
+    auto it = key_metadata->find(offset);
+    if (it == key_metadata->end())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "There is not offset {}", offset);
+    return it->second;
+}
+
+std::shared_ptr<const FileSegmentMetadata> LockedKey::tryGetByOffset(size_t offset) const
+{
+    auto it = key_metadata->find(offset);
+    if (it == key_metadata->end())
+        return nullptr;
+    return it->second;
+}
+
+std::shared_ptr<FileSegmentMetadata> LockedKey::tryGetByOffset(size_t offset)
+{
+    auto it = key_metadata->find(offset);
+    if (it == key_metadata->end())
+        return nullptr;
+    return it->second;
+}
+
+std::string LockedKey::toString() const
+{
+    std::string result;
+    for (auto it = key_metadata->begin(); it != key_metadata->end(); ++it)
+    {
+        if (it != key_metadata->begin())
+            result += ", ";
+        result += std::to_string(it->first);
+    }
+    return result;
+}
+
+void CleanupQueue::add(const FileCacheKey & key)
+{
+    std::lock_guard lock(mutex);
+    keys.insert(key);
+}
+
+void CleanupQueue::remove(const FileCacheKey & key)
+{
+    std::lock_guard lock(mutex);
+    bool erased = keys.erase(key);
+    if (!erased)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key {} in removal queue", key);
+}
+
+bool CleanupQueue::tryPop(FileCacheKey & key)
+{
+    std::lock_guard lock(mutex);
+    if (keys.empty())
+        return false;
+    auto it = keys.begin();
+    key = *it;
+    keys.erase(it);
+    return true;
+}
+
+size_t CleanupQueue::getSize() const
+{
+    std::lock_guard lock(mutex);
+    return keys.size();
+}
+
+}
diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h
new file mode 100644
index 00000000000..586c7e5c2a8
--- /dev/null
+++ b/src/Interpreters/Cache/Metadata.h
@@ -0,0 +1,176 @@
+#pragma once
+#include <boost/noncopyable.hpp>
+#include <Interpreters/Cache/Guards.h>
+#include <Interpreters/Cache/IFileCachePriority.h>
+#include <Interpreters/Cache/FileCacheKey.h>
+#include <Interpreters/Cache/FileSegment.h>
+#include <Interpreters/Cache/FileCache_fwd_internal.h>
+
+namespace DB
+{
+class CleanupQueue;
+using CleanupQueuePtr = std::shared_ptr<CleanupQueue>;
+
+
+struct FileSegmentMetadata : private boost::noncopyable
+{
+    using Priority = IFileCachePriority;
+
+    explicit FileSegmentMetadata(FileSegmentPtr && file_segment_);
+
+    bool releasable() const { return file_segment.unique(); }
+
+    size_t size() const;
+
+    bool valid() const { return !removal_candidate.load(); }
+
+    Priority::Iterator getQueueIterator() { return file_segment->getQueueIterator(); }
+
+    FileSegmentPtr file_segment;
+    std::atomic<bool> removal_candidate{false};
+};
+
+using FileSegmentMetadataPtr = std::shared_ptr<FileSegmentMetadata>;
+
+
+struct KeyMetadata : public std::map<size_t, FileSegmentMetadataPtr>,
+                     private boost::noncopyable,
+                     public std::enable_shared_from_this<KeyMetadata>
+{
+    friend struct LockedKey;
+    using Key = FileCacheKey;
+
+    KeyMetadata(
+        const Key & key_,
+        const std::string & key_path_,
+        CleanupQueue & cleanup_queue_,
+        bool created_base_directory_ = false);
+
+    enum class KeyState
+    {
+        ACTIVE,
+        REMOVING,
+        REMOVED,
+    };
+
+    const Key key;
+    const std::string key_path;
+
+    LockedKeyPtr lock();
+
+    /// Return nullptr if key has non-ACTIVE state.
+    LockedKeyPtr tryLock();
+
+    bool createBaseDirectory();
+
+    std::string getFileSegmentPath(const FileSegment & file_segment);
+
+private:
+    KeyState key_state = KeyState::ACTIVE;
+    KeyGuard guard;
+    CleanupQueue & cleanup_queue;
+    std::atomic<bool> created_base_directory = false;
+};
+
+using KeyMetadataPtr = std::shared_ptr<KeyMetadata>;
+
+
+struct CacheMetadata : public std::unordered_map<FileCacheKey, KeyMetadataPtr>, private boost::noncopyable
+{
+public:
+    using Key = FileCacheKey;
+    using IterateCacheMetadataFunc = std::function<void(const LockedKey &)>;
+
+    explicit CacheMetadata(const std::string & path_);
+
+    const String & getBaseDirectory() const { return path; }
+
+    String getPathInLocalCache(
+        const Key & key,
+        size_t offset,
+        FileSegmentKind segment_kind) const;
+
+    String getPathInLocalCache(const Key & key) const;
+    static String getFileNameForFileSegment(size_t offset, FileSegmentKind segment_kind);
+
+    void iterate(IterateCacheMetadataFunc && func);
+
+    enum class KeyNotFoundPolicy
+    {
+        THROW,
+        CREATE_EMPTY,
+        RETURN_NULL,
+    };
+
+    LockedKeyPtr lockKeyMetadata(
+        const Key & key,
+        KeyNotFoundPolicy key_not_found_policy,
+        bool is_initial_load = false);
+
+    void doCleanup();
+
+private:
+    const std::string path; /// Cache base path
+    CacheMetadataGuard guard;
+    const CleanupQueuePtr cleanup_queue;
+    Poco::Logger * log;
+};
+
+
+/**
+ * `LockedKey` is an object which makes sure that as long as it exists the following is true:
+ * 1. the key cannot be removed from cache
+ *    (Why: this LockedKey locks key metadata mutex in ctor, unlocks it in dtor, and so
+ *    when key is going to be deleted, key mutex is also locked.
+ *    Why it cannot be the other way round? E.g. that ctor of LockedKey locks the key
+ *    right after it was deleted? This case it taken into consideration in createLockedKey())
+ * 2. the key cannot be modified, e.g. new offsets cannot be added to key; already existing
+ *    offsets cannot be deleted from the key
+ * And also provides some methods which allow the owner of this LockedKey object to do such
+ * modification of the key (adding/deleting offsets) and deleting the key from cache.
+ */
+struct LockedKey : private boost::noncopyable
+{
+    using Key = FileCacheKey;
+
+    explicit LockedKey(std::shared_ptr<KeyMetadata> key_metadata_);
+
+    ~LockedKey();
+
+    const Key & getKey() const { return key_metadata->key; }
+
+    auto begin() const { return key_metadata->begin(); }
+    auto end() const { return key_metadata->end(); }
+
+    std::shared_ptr<const FileSegmentMetadata> getByOffset(size_t offset) const;
+    std::shared_ptr<FileSegmentMetadata> getByOffset(size_t offset);
+
+    std::shared_ptr<const FileSegmentMetadata> tryGetByOffset(size_t offset) const;
+    std::shared_ptr<FileSegmentMetadata> tryGetByOffset(size_t offset);
+
+    KeyMetadata::KeyState getKeyState() const { return key_metadata->key_state; }
+
+    std::shared_ptr<const KeyMetadata> getKeyMetadata() const { return key_metadata; }
+    std::shared_ptr<KeyMetadata> getKeyMetadata() { return key_metadata; }
+
+    void removeAllReleasable();
+
+    KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &);
+
+    void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &);
+
+    bool isLastOwnerOfFileSegment(size_t offset) const;
+
+    void removeFromCleanupQueue();
+
+    void markAsRemoved();
+
+    std::string toString() const;
+
+private:
+    const std::shared_ptr<KeyMetadata> key_metadata;
+    KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`.
+    Poco::Logger * log;
+};
+
+}
diff --git a/src/Interpreters/Cache/QueryLimit.cpp b/src/Interpreters/Cache/QueryLimit.cpp
new file mode 100644
index 00000000000..fc7556f21f5
--- /dev/null
+++ b/src/Interpreters/Cache/QueryLimit.cpp
@@ -0,0 +1,112 @@
+#include <Interpreters/Cache/QueryLimit.h>
+#include <Interpreters/Cache/Metadata.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+static bool isQueryInitialized()
+{
+    return CurrentThread::isInitialized()
+        && CurrentThread::get().getQueryContext()
+        && !CurrentThread::getQueryId().empty();
+}
+
+FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::tryGetQueryContext(const CacheGuard::Lock &)
+{
+    if (!isQueryInitialized())
+        return nullptr;
+
+    auto query_iter = query_map.find(std::string(CurrentThread::getQueryId()));
+    return (query_iter == query_map.end()) ? nullptr : query_iter->second;
+}
+
+void FileCacheQueryLimit::removeQueryContext(const std::string & query_id, const CacheGuard::Lock &)
+{
+    auto query_iter = query_map.find(query_id);
+    if (query_iter == query_map.end())
+    {
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Attempt to release query context that does not exist (query_id: {})",
+            query_id);
+    }
+    query_map.erase(query_iter);
+}
+
+FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::getOrSetQueryContext(
+    const std::string & query_id,
+    const ReadSettings & settings,
+    const CacheGuard::Lock &)
+{
+    if (query_id.empty())
+        return nullptr;
+
+    auto [it, inserted] = query_map.emplace(query_id, nullptr);
+    if (inserted)
+    {
+        it->second = std::make_shared<QueryContext>(
+            settings.filesystem_cache_max_download_size,
+            !settings.skip_download_if_exceeds_query_cache);
+    }
+
+    return it->second;
+}
+
+FileCacheQueryLimit::QueryContext::QueryContext(
+    size_t query_cache_size,
+    bool recache_on_query_limit_exceeded_)
+    : priority(LRUFileCachePriority(query_cache_size, 0))
+    , recache_on_query_limit_exceeded(recache_on_query_limit_exceeded_)
+{
+}
+
+void FileCacheQueryLimit::QueryContext::add(
+    const FileSegment & file_segment,
+    const CacheGuard::Lock & lock)
+{
+    const auto key = file_segment.key();
+    const auto offset = file_segment.offset();
+
+    auto it = getPriority().add(
+        file_segment.getKeyMetadata(), offset, file_segment.range().size(), lock);
+
+    auto [_, inserted] = records.emplace(FileCacheKeyAndOffset{key, offset}, it);
+    if (!inserted)
+    {
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Cannot add offset {} to query context under key {}, it already exists",
+            offset, key);
+    }
+}
+
+void FileCacheQueryLimit::QueryContext::remove(
+    const Key & key,
+    size_t offset,
+    const CacheGuard::Lock & lock)
+{
+    auto record = records.find({key, offset});
+    if (record == records.end())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no {}:{} in query context", key, offset);
+
+    record->second->remove(lock);
+    records.erase({key, offset});
+}
+
+IFileCachePriority::Iterator FileCacheQueryLimit::QueryContext::tryGet(
+    const Key & key,
+    size_t offset,
+    const CacheGuard::Lock &)
+{
+    auto it = records.find({key, offset});
+    if (it == records.end())
+        return nullptr;
+    return it->second;
+
+}
+
+}
diff --git a/src/Interpreters/Cache/QueryLimit.h b/src/Interpreters/Cache/QueryLimit.h
new file mode 100644
index 00000000000..5c08584bacf
--- /dev/null
+++ b/src/Interpreters/Cache/QueryLimit.h
@@ -0,0 +1,67 @@
+#pragma once
+#include <Interpreters/Cache/Guards.h>
+#include <Interpreters/Cache/LRUFileCachePriority.h>
+
+namespace DB
+{
+struct ReadSettings;
+class FileSegment;
+
+class FileCacheQueryLimit
+{
+public:
+    class QueryContext;
+    using QueryContextPtr = std::shared_ptr<QueryContext>;
+
+    QueryContextPtr tryGetQueryContext(const CacheGuard::Lock & lock);
+
+    QueryContextPtr getOrSetQueryContext(
+        const std::string & query_id,
+        const ReadSettings & settings,
+        const CacheGuard::Lock &);
+
+    void removeQueryContext(const std::string & query_id, const CacheGuard::Lock &);
+
+    class QueryContext
+    {
+    public:
+        using Key = FileCacheKey;
+        using Priority = IFileCachePriority;
+        using PriorityIterator = IFileCachePriority::Iterator;
+
+        QueryContext(size_t query_cache_size, bool recache_on_query_limit_exceeded_);
+
+        Priority & getPriority() { return priority; }
+        const Priority & getPriority() const { return priority; }
+
+        bool recacheOnFileCacheQueryLimitExceeded() const { return recache_on_query_limit_exceeded; }
+
+        IFileCachePriority::Iterator tryGet(
+            const Key & key,
+            size_t offset,
+            const CacheGuard::Lock &);
+
+        void add(
+            const FileSegment & file_segment,
+            const CacheGuard::Lock &);
+
+        void remove(
+            const Key & key,
+            size_t offset,
+            const CacheGuard::Lock &);
+
+    private:
+        using Records = std::unordered_map<FileCacheKeyAndOffset, IFileCachePriority::Iterator, FileCacheKeyAndOffsetHash>;
+        Records records;
+        LRUFileCachePriority priority;
+        const bool recache_on_query_limit_exceeded;
+    };
+
+private:
+    using QueryContextMap = std::unordered_map<String, QueryContextPtr>;
+    QueryContextMap query_map;
+};
+
+using FileCacheQueryLimitPtr = std::unique_ptr<FileCacheQueryLimit>;
+
+}
diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp
index a1f0992afdf..1eac87a804d 100644
--- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp
+++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp
@@ -17,17 +17,17 @@ namespace ErrorCodes
 }
 
 WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_)
-    : WriteBufferFromFileDecorator(file_segment_->detachWriter())
+    : WriteBufferFromFileDecorator(std::make_unique<WriteBufferFromFile>(file_segment_->getPathInLocalCache()))
     , file_segment(file_segment_)
 {
 }
 
-WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolder && segment_holder_)
+WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder_)
     : WriteBufferFromFileDecorator(
-        segment_holder_.file_segments.size() == 1
-        ? segment_holder_.file_segments.front()->detachWriter()
+        segment_holder_->size() == 1
+        ? std::make_unique<WriteBufferFromFile>(segment_holder_->front().getPathInLocalCache())
         : throw Exception(ErrorCodes::LOGICAL_ERROR, "WriteBufferToFileSegment can be created only from single segment"))
-    , file_segment(segment_holder_.file_segments.front().get())
+    , file_segment(&segment_holder_->front())
     , segment_holder(std::move(segment_holder_))
 {
 }
diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h
index 1305bcf8387..4d1e82996a3 100644
--- a/src/Interpreters/Cache/WriteBufferToFileSegment.h
+++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h
@@ -13,7 +13,7 @@ class WriteBufferToFileSegment : public WriteBufferFromFileDecorator, public IRe
 {
 public:
     explicit WriteBufferToFileSegment(FileSegment * file_segment_);
-    explicit WriteBufferToFileSegment(FileSegmentsHolder && segment_holder);
+    explicit WriteBufferToFileSegment(FileSegmentsHolderPtr segment_holder);
 
     void nextImpl() override;
 
@@ -28,7 +28,7 @@ private:
     FileSegment * file_segment;
 
     /// Empty if file_segment is not owned by this WriteBufferToFileSegment
-    FileSegmentsHolder segment_holder;
+    FileSegmentsHolderPtr segment_holder;
 };
 
 
diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp
index 445c227dd29..b696b539013 100644
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@@ -127,24 +127,17 @@ Cluster::Address::Address(
 
 
 Cluster::Address::Address(
-    const String & host_port_,
-    const String & user_,
-    const String & password_,
-    UInt16 clickhouse_port,
-    bool treat_local_port_as_remote,
-    bool secure_,
-    Int64 priority_,
+    const DatabaseReplicaInfo & info,
+    const ClusterConnectionParameters & params,
     UInt32 shard_index_,
-    UInt32 replica_index_,
-    String cluster_name_,
-    String cluster_secret_)
-    : user(user_), password(password_)
+    UInt32 replica_index_)
+    : user(params.username), password(params.password)
 {
     bool can_be_local = true;
     std::pair<std::string, UInt16> parsed_host_port;
-    if (!treat_local_port_as_remote)
+    if (!params.treat_local_port_as_remote)
     {
-        parsed_host_port = parseAddress(host_port_, clickhouse_port);
+        parsed_host_port = parseAddress(info.hostname, params.clickhouse_port);
     }
     else
     {
@@ -154,23 +147,25 @@ Cluster::Address::Address(
         /// If it doesn't include a port then use the default one and it could be local (if the address is)
         try
         {
-            parsed_host_port = parseAddress(host_port_, 0);
+            parsed_host_port = parseAddress(info.hostname, 0);
             can_be_local = false;
         }
         catch (...)
         {
-            parsed_host_port = parseAddress(host_port_, clickhouse_port);
+            parsed_host_port = parseAddress(info.hostname, params.clickhouse_port);
         }
     }
     host_name = parsed_host_port.first;
+    database_shard_name = info.shard_name;
+    database_replica_name = info.replica_name;
     port = parsed_host_port.second;
-    secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable;
-    priority = priority_;
-    is_local = can_be_local && isLocal(clickhouse_port);
+    secure = params.secure ? Protocol::Secure::Enable : Protocol::Secure::Disable;
+    priority = params.priority;
+    is_local = can_be_local && isLocal(params.clickhouse_port);
     shard_index = shard_index_;
     replica_index = replica_index_;
-    cluster = cluster_name_;
-    cluster_secret = cluster_secret_;
+    cluster = params.cluster_name;
+    cluster_secret = params.cluster_secret;
 }
 
 
@@ -492,44 +487,8 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
                     throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", replica_key);
             }
 
-            Addresses shard_local_addresses;
-            Addresses shard_all_addresses;
-
-            ConnectionPoolPtrs all_replicas_pools;
-            all_replicas_pools.reserve(replica_addresses.size());
-
-            for (const auto & replica : replica_addresses)
-            {
-                auto replica_pool = ConnectionPoolFactory::instance().get(
-                    static_cast<unsigned>(settings.distributed_connections_pool_size),
-                    replica.host_name, replica.port,
-                    replica.default_database, replica.user, replica.password, replica.quota_key,
-                    replica.cluster, replica.cluster_secret,
-                    "server", replica.compression,
-                    replica.secure, replica.priority);
-
-                all_replicas_pools.emplace_back(replica_pool);
-                if (replica.is_local)
-                    shard_local_addresses.push_back(replica);
-                shard_all_addresses.push_back(replica);
-            }
-            ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
-                        all_replicas_pools, settings.load_balancing,
-                        settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
-
-            if (weight)
-                slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
-
-            shards_info.push_back({
-                std::move(insert_paths),
-                current_shard_num,
-                weight,
-                std::move(shard_local_addresses),
-                std::move(shard_all_addresses),
-                std::move(shard_pool),
-                std::move(all_replicas_pools),
-                internal_replication
-            });
+            addShard(settings, std::move(replica_addresses), false, current_shard_num,
+                     std::move(insert_paths), /* treat_local_as_remote */ weight, internal_replication);
         }
         else
             throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", key);
@@ -547,79 +506,102 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
 Cluster::Cluster(
     const Settings & settings,
     const std::vector<std::vector<String>> & names,
-    const String & username,
-    const String & password,
-    UInt16 clickhouse_port,
-    bool treat_local_as_remote,
-    bool treat_local_port_as_remote,
-    bool secure,
-    Int64 priority,
-    String cluster_name,
-    String cluster_secret)
+    const ClusterConnectionParameters & params)
 {
     UInt32 current_shard_num = 1;
 
-    secret = cluster_secret;
+    secret = params.cluster_secret;
 
     for (const auto & shard : names)
     {
         Addresses current;
         for (const auto & replica : shard)
             current.emplace_back(
-                replica,
-                username,
-                password,
-                clickhouse_port,
-                treat_local_port_as_remote,
-                secure,
-                priority,
+                DatabaseReplicaInfo{replica, "", ""},
+                params,
                 current_shard_num,
-                current.size() + 1,
-                cluster_name,
-                cluster_secret);
+                current.size() + 1);
 
         addresses_with_failover.emplace_back(current);
 
-        Addresses shard_local_addresses;
-        Addresses all_addresses;
-        ConnectionPoolPtrs all_replicas;
-        all_replicas.reserve(current.size());
-
-        for (const auto & replica : current)
-        {
-            auto replica_pool = ConnectionPoolFactory::instance().get(
-                static_cast<unsigned>(settings.distributed_connections_pool_size),
-                replica.host_name, replica.port,
-                replica.default_database, replica.user, replica.password, replica.quota_key,
-                replica.cluster, replica.cluster_secret,
-                "server", replica.compression, replica.secure, replica.priority);
-            all_replicas.emplace_back(replica_pool);
-            if (replica.is_local && !treat_local_as_remote)
-                shard_local_addresses.push_back(replica);
-            all_addresses.push_back(replica);
-        }
-
-        ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
-                all_replicas, settings.load_balancing,
-                settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
-
-        slot_to_shard.insert(std::end(slot_to_shard), default_weight, shards_info.size());
-        shards_info.push_back({
-            {}, // insert_path_for_internal_replication
-            current_shard_num,
-            default_weight,
-            std::move(shard_local_addresses),
-            std::move(all_addresses),
-            std::move(shard_pool),
-            std::move(all_replicas),
-            false // has_internal_replication
-        });
+        addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num);
         ++current_shard_num;
     }
 
     initMisc();
 }
 
+Cluster::Cluster(
+    const Settings & settings,
+    const std::vector<std::vector<DatabaseReplicaInfo>> & infos,
+    const ClusterConnectionParameters & params)
+{
+    UInt32 current_shard_num = 1;
+
+    secret = params.cluster_secret;
+
+    for (const auto & shard : infos)
+    {
+        Addresses current;
+        for (const auto & replica : shard)
+            current.emplace_back(
+                replica,
+                params,
+                current_shard_num,
+                current.size() + 1);
+
+        addresses_with_failover.emplace_back(current);
+
+        addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num);
+        ++current_shard_num;
+    }
+
+    initMisc();
+}
+
+void Cluster::addShard(const Settings & settings, Addresses && addresses, bool treat_local_as_remote, UInt32 current_shard_num,
+                       ShardInfoInsertPathForInternalReplication && insert_paths, UInt32 weight, bool internal_replication)
+{
+    Addresses shard_local_addresses;
+    Addresses shard_all_addresses;
+
+    ConnectionPoolPtrs all_replicas_pools;
+    all_replicas_pools.reserve(addresses.size());
+
+    for (const auto & replica : addresses)
+    {
+        auto replica_pool = ConnectionPoolFactory::instance().get(
+            static_cast<unsigned>(settings.distributed_connections_pool_size),
+            replica.host_name, replica.port,
+            replica.default_database, replica.user, replica.password, replica.quota_key,
+            replica.cluster, replica.cluster_secret,
+            "server", replica.compression,
+            replica.secure, replica.priority);
+
+        all_replicas_pools.emplace_back(replica_pool);
+        if (replica.is_local && !treat_local_as_remote)
+            shard_local_addresses.push_back(replica);
+        shard_all_addresses.push_back(replica);
+    }
+    ConnectionPoolWithFailoverPtr shard_pool = std::make_shared<ConnectionPoolWithFailover>(
+        all_replicas_pools, settings.load_balancing,
+        settings.distributed_replica_error_half_life.totalSeconds(), settings.distributed_replica_error_cap);
+
+    if (weight)
+        slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size());
+
+    shards_info.push_back({
+        std::move(insert_paths),
+        current_shard_num,
+        weight,
+        std::move(shard_local_addresses),
+        std::move(shard_all_addresses),
+        std::move(shard_pool),
+        std::move(all_replicas_pools),
+        internal_replication
+    });
+}
+
 
 Poco::Timespan Cluster::saturate(Poco::Timespan v, Poco::Timespan limit)
 {
diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h
index d74f75c941e..4798384f29c 100644
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@@ -29,6 +29,26 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+struct DatabaseReplicaInfo
+{
+    String hostname;
+    String shard_name;
+    String replica_name;
+};
+
+struct ClusterConnectionParameters
+{
+    const String & username;
+    const String & password;
+    UInt16 clickhouse_port;
+    bool treat_local_as_remote;
+    bool treat_local_port_as_remote;
+    bool secure = false;
+    Int64 priority = 1;
+    String cluster_name;
+    String cluster_secret;
+};
+
 /// Cluster contains connection pools to each node
 /// With the local nodes, the connection is not established, but the request is executed directly.
 /// Therefore we store only the number of local nodes
@@ -51,15 +71,13 @@ public:
     Cluster(
         const Settings & settings,
         const std::vector<std::vector<String>> & names,
-        const String & username,
-        const String & password,
-        UInt16 clickhouse_port,
-        bool treat_local_as_remote,
-        bool treat_local_port_as_remote,
-        bool secure = false,
-        Int64 priority = 1,
-        String cluster_name = "",
-        String cluster_secret = "");
+        const ClusterConnectionParameters & params);
+
+
+    Cluster(
+        const Settings & settings,
+        const std::vector<std::vector<DatabaseReplicaInfo>> & infos,
+        const ClusterConnectionParameters & params);
 
     Cluster(const Cluster &)= delete;
     Cluster & operator=(const Cluster &) = delete;
@@ -90,6 +108,8 @@ public:
         */
 
         String host_name;
+        String database_shard_name;
+        String database_replica_name;
         UInt16 port{0};
         String user;
         String password;
@@ -125,16 +145,15 @@ public:
 
         Address(
             const String & host_port_,
-            const String & user_,
-            const String & password_,
-            UInt16 clickhouse_port,
-            bool treat_local_port_as_remote,
-            bool secure_ = false,
-            Int64 priority_ = 1,
-            UInt32 shard_index_ = 0,
-            UInt32 replica_index_ = 0,
-            String cluster_name = "",
-            String cluster_secret_ = "");
+            const ClusterConnectionParameters & params,
+            UInt32 shard_index_,
+            UInt32 replica_index_);
+
+        Address(
+            const DatabaseReplicaInfo & info,
+            const ClusterConnectionParameters & params,
+            UInt32 shard_index_,
+            UInt32 replica_index_);
 
         /// Returns 'escaped_host_name:port'
         String toString() const;
@@ -276,6 +295,9 @@ private:
     struct ReplicasAsShardsTag {};
     Cluster(ReplicasAsShardsTag, const Cluster & from, const Settings & settings, size_t max_replicas_from_shard);
 
+    void addShard(const Settings & settings, Addresses && addresses, bool treat_local_as_remote, UInt32 current_shard_num,
+                  ShardInfoInsertPathForInternalReplication && insert_paths = {}, UInt32 weight = 1, bool internal_replication = false);
+
     /// Inter-server secret
     String secret;
 
diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp
index 2e7ccb816a2..610403a5262 100644
--- a/src/Interpreters/ClusterDiscovery.cpp
+++ b/src/Interpreters/ClusterDiscovery.cpp
@@ -237,15 +237,20 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
     }
 
     bool secure = cluster_info.current_node.secure;
-    auto cluster = std::make_shared<Cluster>(
-        context->getSettingsRef(),
-        shards,
+    ClusterConnectionParameters params{
         /* username= */ context->getUserName(),
         /* password= */ "",
         /* clickhouse_port= */ secure ? context->getTCPPortSecure().value_or(DBMS_DEFAULT_SECURE_PORT) : context->getTCPPort(),
         /* treat_local_as_remote= */ false,
         /* treat_local_port_as_remote= */ false, /// should be set only for clickhouse-local, but cluster discovery is not used there
-        /* secure= */ secure);
+        /* secure= */ secure,
+        /* priority= */ 1,
+        /* cluster_name= */ "",
+        /* password= */ ""};
+    auto cluster = std::make_shared<Cluster>(
+        context->getSettingsRef(),
+        shards,
+        params);
     return cluster;
 }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 66725ba5aa3..31c0f2fc87a 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -19,7 +19,6 @@
 #include <Coordination/KeeperDispatcher.h>
 #include <Compression/ICompressionCodec.h>
 #include <Core/BackgroundSchedulePool.h>
-#include <Core/ServerSettings.h>
 #include <Formats/FormatFactory.h>
 #include <Databases/IDatabase.h>
 #include <Storages/IStorage.h>
@@ -43,6 +42,9 @@
 #include <Interpreters/ExternalLoaderXMLConfigRepository.h>
 #include <Interpreters/TemporaryDataOnDisk.h>
 #include <Interpreters/Cache/QueryCache.h>
+#include <Interpreters/Cache/FileCacheFactory.h>
+#include <Interpreters/Cache/FileCache.h>
+#include <Core/ServerSettings.h>
 #include <Interpreters/PreparedSets.h>
 #include <Core/Settings.h>
 #include <Core/SettingsQuirks.h>
@@ -107,15 +109,12 @@
 #include <Interpreters/Lemmatizers.h>
 #include <Interpreters/ClusterDiscovery.h>
 #include <Interpreters/TransactionLog.h>
-#include <Interpreters/Cache/FileCacheFactory.h>
 #include <filesystem>
 #include <re2/re2.h>
 #include <Storages/StorageView.h>
 #include <Parsers/ASTFunction.h>
 #include <base/find_symbols.h>
 
-#include <Interpreters/Cache/FileCache.h>
-
 #if USE_ROCKSDB
 #include <rocksdb/table.h>
 #endif
@@ -536,6 +535,12 @@ struct ContextSharedPart : boost::noncopyable
         /// take it as well, which will cause deadlock.
         delete_ddl_worker.reset();
 
+        /// Background operations in cache use background schedule pool.
+        /// Deactivate them before destructing it.
+        const auto & caches = FileCacheFactory::instance().getAll();
+        for (const auto & [_, cache] : caches)
+            cache->cache->deactivateBackgroundOperations();
+
         {
             auto lock = std::lock_guard(mutex);
 
@@ -1607,26 +1612,42 @@ StoragePtr Context::getViewSource() const
     return view_source;
 }
 
+bool Context::displaySecretsInShowAndSelect() const
+{
+    return shared->server_settings.display_secrets_in_show_and_select;
+}
+
 Settings Context::getSettings() const
 {
     auto lock = getLock();
     return settings;
 }
 
-
 void Context::setSettings(const Settings & settings_)
 {
     auto lock = getLock();
-    auto old_readonly = settings.readonly;
-    auto old_allow_ddl = settings.allow_ddl;
-    auto old_allow_introspection_functions = settings.allow_introspection_functions;
+    const auto old_readonly = settings.readonly;
+    const auto old_allow_ddl = settings.allow_ddl;
+    const auto old_allow_introspection_functions = settings.allow_introspection_functions;
+    const auto old_display_secrets = settings.format_display_secrets_in_show_and_select;
 
     settings = settings_;
 
-    if ((settings.readonly != old_readonly) || (settings.allow_ddl != old_allow_ddl) || (settings.allow_introspection_functions != old_allow_introspection_functions))
+    if ((settings.readonly != old_readonly)
+        || (settings.allow_ddl != old_allow_ddl)
+        || (settings.allow_introspection_functions != old_allow_introspection_functions)
+        || (settings.format_display_secrets_in_show_and_select != old_display_secrets))
         calculateAccessRights();
 }
 
+void Context::recalculateAccessRightsIfNeeded(std::string_view name)
+{
+    if (name == "readonly"
+        || name == "allow_ddl"
+        || name == "allow_introspection_functions"
+        || name == "format_display_secrets_in_show_and_select")
+        calculateAccessRights();
+}
 
 void Context::setSetting(std::string_view name, const String & value)
 {
@@ -1637,12 +1658,9 @@ void Context::setSetting(std::string_view name, const String & value)
         return;
     }
     settings.set(name, value);
-
-    if (name == "readonly" || name == "allow_ddl" || name == "allow_introspection_functions")
-        calculateAccessRights();
+    recalculateAccessRightsIfNeeded(name);
 }
 
-
 void Context::setSetting(std::string_view name, const Field & value)
 {
     auto lock = getLock();
@@ -1652,12 +1670,9 @@ void Context::setSetting(std::string_view name, const Field & value)
         return;
     }
     settings.set(name, value);
-
-    if (name == "readonly" || name == "allow_ddl" || name == "allow_introspection_functions")
-        calculateAccessRights();
+    recalculateAccessRightsIfNeeded(name);
 }
 
-
 void Context::applySettingChange(const SettingChange & change)
 {
     try
@@ -4269,7 +4284,6 @@ ReadSettings Context::getReadSettings() const
     res.enable_filesystem_cache = settings.enable_filesystem_cache;
     res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache;
     res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log;
-    res.enable_filesystem_cache_on_lower_level = settings.enable_filesystem_cache_on_lower_level;
 
     res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size;
     res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache;
@@ -4283,8 +4297,10 @@ ReadSettings Context::getReadSettings() const
             "Invalid value '{}' for max_read_buffer_size", settings.max_read_buffer_size);
     }
 
-    res.local_fs_buffer_size = settings.max_read_buffer_size;
-    res.remote_fs_buffer_size = settings.max_read_buffer_size;
+    res.local_fs_buffer_size
+        = settings.max_read_buffer_size_local_fs ? settings.max_read_buffer_size_local_fs : settings.max_read_buffer_size;
+    res.remote_fs_buffer_size
+        = settings.max_read_buffer_size_remote_fs ? settings.max_read_buffer_size_remote_fs : settings.max_read_buffer_size;
     res.prefetch_buffer_size = settings.prefetch_buffer_size;
     res.direct_io_threshold = settings.min_bytes_to_use_direct_io;
     res.mmap_threshold = settings.min_bytes_to_use_mmap_io;
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 36a6a8e6f49..c5e8d3bc862 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -683,6 +683,7 @@ public:
     MultiVersion<Macros>::Version getMacros() const;
     void setMacros(std::unique_ptr<Macros> && macros);
 
+    bool displaySecretsInShowAndSelect() const;
     Settings getSettings() const;
     void setSettings(const Settings & settings_);
 
@@ -1145,6 +1146,7 @@ private:
 
     /// Compute and set actual user settings, client_info.current_user should be set
     void calculateAccessRights();
+    void recalculateAccessRightsIfNeeded(std::string_view setting_name);
 
     template <typename... Args>
     void checkAccessImpl(const Args &... args) const;
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index cc54e7620f6..96a86df7ffd 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -595,7 +595,6 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_
         no_makeset_for_subqueries,
         false /* no_makeset */,
         only_consts,
-        !isRemoteStorage() /* create_source_for_in */,
         getAggregationKeysInfo(),
         false /* build_expression_with_window_functions */,
         is_create_parameterized_view);
@@ -616,7 +615,6 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP
         true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */,
         true /* no_makeset */,
         only_consts,
-        !isRemoteStorage() /* create_source_for_in */,
         getAggregationKeysInfo(),
         false /* build_expression_with_window_functions */,
         is_create_parameterized_view);
@@ -639,7 +637,6 @@ void ExpressionAnalyzer::getRootActionsForHaving(
         no_makeset_for_subqueries,
         false /* no_makeset */,
         only_consts,
-        true /* create_source_for_in */,
         getAggregationKeysInfo(),
         false /* build_expression_with_window_functions */,
         is_create_parameterized_view);
@@ -661,7 +658,6 @@ void ExpressionAnalyzer::getRootActionsForWindowFunctions(const ASTPtr & ast, bo
         no_makeset_for_subqueries,
         false /* no_makeset */,
         false /*only_consts */,
-        !isRemoteStorage() /* create_source_for_in */,
         getAggregationKeysInfo(),
         true);
     ActionsVisitor(visitor_data, log.stream()).visit(ast);
@@ -1066,13 +1062,6 @@ static std::shared_ptr<IJoin> chooseJoinAlgorithm(
 {
     const auto & settings = context->getSettings();
 
-    Block left_sample_block(left_sample_columns);
-    for (auto & column : left_sample_block)
-    {
-        if (!column.column)
-            column.column = column.type->createColumn();
-    }
-
     Block right_sample_block = joined_plan->getCurrentDataStream().header;
 
     std::vector<String> tried_algorithms;
@@ -1118,7 +1107,10 @@ static std::shared_ptr<IJoin> chooseJoinAlgorithm(
     if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH))
     {
         tried_algorithms.push_back(toString(JoinAlgorithm::GRACE_HASH));
-        if (GraceHashJoin::isSupported(analyzed_join))
+
+        // Grace hash join requires that columns exist in left_sample_block.
+        Block left_sample_block(left_sample_columns);
+        if (sanitizeBlock(left_sample_block, false) && GraceHashJoin::isSupported(analyzed_join))
             return std::make_shared<GraceHashJoin>(context, analyzed_join, left_sample_block, right_sample_block, context->getTempDataOnDisk());
     }
 
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index fe0244ff314..68bde0d3edb 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -517,7 +517,7 @@ size_t HashJoin::getTotalByteCount() const
 
     res += data->blocks_allocated_size;
     res += data->blocks_nullmaps_allocated_size;
-    res += data->pool.size();
+    res += data->pool.allocatedBytes();
 
     if (data->type != Type::CROSS)
     {
diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp
index b8f6a9b308d..ca875ee57b2 100644
--- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp
+++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp
@@ -20,7 +20,7 @@ static Block getSampleBlock()
         ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "max_elements"},
         ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "max_file_segment_size"},
         ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "cache_on_write_operations"},
-        ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "enable_cache_hits_threshold"},
+        ColumnWithTypeAndName{std::make_shared<DataTypeNumber<UInt8>>(), "cache_hits_threshold"},
         ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "current_size"},
         ColumnWithTypeAndName{std::make_shared<DataTypeUInt64>(), "current_elements"},
         ColumnWithTypeAndName{std::make_shared<DataTypeString>(), "path"},
@@ -45,7 +45,7 @@ BlockIO InterpreterDescribeCacheQuery::execute()
     res_columns[1]->insert(settings.max_elements);
     res_columns[2]->insert(settings.max_file_segment_size);
     res_columns[3]->insert(settings.cache_on_write_operations);
-    res_columns[4]->insert(settings.enable_cache_hits_threshold);
+    res_columns[4]->insert(settings.cache_hits_threshold);
     res_columns[5]->insert(cache->getUsedCacheSize());
     res_columns[6]->insert(cache->getFileSegmentsNum());
     res_columns[7]->insert(cache->getBasePath());
diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp
index 5e1b74681fe..0d60f13af66 100644
--- a/src/Interpreters/InterpreterShowCreateQuery.cpp
+++ b/src/Interpreters/InterpreterShowCreateQuery.cpp
@@ -9,6 +9,7 @@
 #include <Common/typeid_cast.h>
 #include <Access/Common/AccessFlags.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/formatWithPossiblyHidingSecrets.h>
 #include <Interpreters/InterpreterShowCreateQuery.h>
 #include <Parsers/ASTCreateQuery.h>
 
@@ -94,10 +95,8 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl()
         create.to_inner_uuid = UUIDHelpers::Nil;
     }
 
-    String res = create_query->formatWithSecretsHidden(/* max_length= */ 0, /* one_line= */ false);
-
     MutableColumnPtr column = ColumnString::create();
-    column->insert(res);
+    column->insert(format({.ctx = getContext(), .query = *create_query, .one_line = false}));
 
     return QueryPipeline(std::make_shared<SourceFromSingleChunk>(Block{{
         std::move(column),
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 36cb57c3678..415cf7028da 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -364,12 +364,12 @@ BlockIO InterpreterSystemQuery::execute()
             {
                 auto caches = FileCacheFactory::instance().getAll();
                 for (const auto & [_, cache_data] : caches)
-                    cache_data->cache->removeIfReleasable();
+                    cache_data->cache->removeAllReleasable();
             }
             else
             {
                 auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache;
-                cache->removeIfReleasable();
+                cache->removeAllReleasable();
             }
             break;
         }
@@ -830,7 +830,9 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
     {
         if (!query_.replica_zk_path.empty() && fs::path(replicated->getZooKeeperPath()) != fs::path(query_.replica_zk_path))
             return;
-        if (replicated->getFullReplicaName() != query_.replica)
+        String full_replica_name = query_.shard.empty() ? query_.replica
+                                                        : DatabaseReplicated::getFullReplicaName(query_.shard, query_.replica);
+        if (replicated->getFullReplicaName() != full_replica_name)
             return;
 
         throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "There is a local database {}, which has the same path in ZooKeeper "
@@ -845,7 +847,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
         if (auto * replicated = dynamic_cast<DatabaseReplicated *>(database.get()))
         {
             check_not_local_replica(replicated, query);
-            DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.replica);
+            DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.shard, query.replica);
         }
         else
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database {} is not Replicated, cannot drop replica", query.getDatabase());
@@ -870,7 +872,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
             }
 
             check_not_local_replica(replicated, query);
-            DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.replica);
+            DatabaseReplicated::dropReplica(replicated, replicated->getZooKeeperPath(), query.shard, query.replica);
             LOG_TRACE(log, "Dropped replica {} of Replicated database {}", query.replica, backQuoteIfNeed(database->getDatabaseName()));
         }
     }
@@ -883,7 +885,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query)
             if (auto * replicated = dynamic_cast<DatabaseReplicated *>(elem.second.get()))
                 check_not_local_replica(replicated, query);
 
-        DatabaseReplicated::dropReplica(nullptr, query.replica_zk_path, query.replica);
+        DatabaseReplicated::dropReplica(nullptr, query.replica_zk_path, query.shard, query.replica);
         LOG_INFO(log, "Dropped replica {} of Replicated database with path {}", query.replica, query.replica_zk_path);
     }
     else
diff --git a/src/Interpreters/MergeTreeTransaction.cpp b/src/Interpreters/MergeTreeTransaction.cpp
index bfdda354c9b..1358e3ed3c2 100644
--- a/src/Interpreters/MergeTreeTransaction.cpp
+++ b/src/Interpreters/MergeTreeTransaction.cpp
@@ -184,7 +184,7 @@ scope_guard MergeTreeTransaction::beforeCommit()
 
     /// We should wait for mutations to finish before committing transaction, because some mutation may fail and cause rollback.
     for (const auto & table_and_mutation : mutations_to_wait)
-        table_and_mutation.first->waitForMutation(table_and_mutation.second);
+        table_and_mutation.first->waitForMutation(table_and_mutation.second, /* wait_for_another_mutation */ false);
 
     assert([&]()
     {
diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h
index 443867806d6..d7c06a60464 100644
--- a/src/Interpreters/Session.h
+++ b/src/Interpreters/Session.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <Common/SettingsChanges.h>
-#include <Access/Common/AuthenticationData.h>
+#include <Access/AuthenticationData.h>
 #include <Interpreters/ClientInfo.h>
 #include <Interpreters/Context_fwd.h>
 
diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp
index 79aac63b40c..c930013e52b 100644
--- a/src/Interpreters/SessionLog.cpp
+++ b/src/Interpreters/SessionLog.cpp
@@ -87,9 +87,10 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes()
             AUTH_TYPE_NAME_AND_VALUE(AuthType::LDAP),
             AUTH_TYPE_NAME_AND_VALUE(AuthType::KERBEROS),
             AUTH_TYPE_NAME_AND_VALUE(AuthType::SSL_CERTIFICATE),
+            AUTH_TYPE_NAME_AND_VALUE(AuthType::BCRYPT_PASSWORD),
         });
 #undef AUTH_TYPE_NAME_AND_VALUE
-    static_assert(static_cast<int>(AuthenticationType::MAX) == 7);
+    static_assert(static_cast<int>(AuthenticationType::MAX) == 8);
 
     auto interface_type_column = std::make_shared<DataTypeEnum8>(
         DataTypeEnum8::Values
diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h
index 990c7ffea01..1282ac09c4d 100644
--- a/src/Interpreters/SessionLog.h
+++ b/src/Interpreters/SessionLog.h
@@ -2,7 +2,7 @@
 
 #include <Interpreters/SystemLog.h>
 #include <Interpreters/ClientInfo.h>
-#include <Access/Common/AuthenticationData.h>
+#include <Access/Common/AuthenticationType.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/NamesAndAliases.h>
 #include <Columns/IColumn.h>
diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp
index d8c42cba9c1..e09a66a38e6 100644
--- a/src/Interpreters/SortedBlocksWriter.cpp
+++ b/src/Interpreters/SortedBlocksWriter.cpp
@@ -165,6 +165,7 @@ SortedBlocksWriter::TmpFilePtr SortedBlocksWriter::flush(const BlocksList & bloc
             pipeline.getNumStreams(),
             sort_description,
             rows_in_block,
+            /*max_block_size_bytes=*/0,
             SortingQueueStrategy::Default);
 
         pipeline.addTransform(std::move(transform));
@@ -220,6 +221,7 @@ SortedBlocksWriter::PremergedFiles SortedBlocksWriter::premerge()
                             pipeline.getNumStreams(),
                             sort_description,
                             rows_in_block,
+                            /*max_block_size_bytes=*/0,
                             SortingQueueStrategy::Default);
 
                         pipeline.addTransform(std::move(transform));
@@ -254,6 +256,7 @@ SortedBlocksWriter::SortedFiles SortedBlocksWriter::finishMerge(std::function<vo
             pipeline.getNumStreams(),
             sort_description,
             rows_in_block,
+            /*max_block_size_bytes=*/0,
             SortingQueueStrategy::Default);
 
         pipeline.addTransform(std::move(transform));
@@ -331,6 +334,7 @@ Block SortedBlocksBuffer::mergeBlocks(Blocks && blocks) const
                 builder.getNumStreams(),
                 sort_description,
                 num_rows,
+                /*max_block_size_bytes=*/0,
                 SortingQueueStrategy::Default);
 
             builder.addTransform(std::move(transform));
diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp
index 7ea7a265263..2d882083f3d 100644
--- a/src/Interpreters/TableJoin.cpp
+++ b/src/Interpreters/TableJoin.cpp
@@ -147,6 +147,7 @@ void TableJoin::addDisjunct()
 void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast)
 {
     addKey(left_table_ast->getColumnName(), right_table_ast->getAliasOrColumnName(), left_table_ast, right_table_ast);
+    right_key_aliases[right_table_ast->getColumnName()] = right_table_ast->getAliasOrColumnName();
 }
 
 /// @return how many times right key appears in ON section.
@@ -662,6 +663,14 @@ String TableJoin::renamedRightColumnName(const String & name) const
     return name;
 }
 
+String TableJoin::renamedRightColumnNameWithAlias(const String & name) const
+{
+    auto renamed = renamedRightColumnName(name);
+    if (const auto it = right_key_aliases.find(renamed); it != right_key_aliases.end())
+        return it->second;
+    return renamed;
+}
+
 void TableJoin::setRename(const String & from, const String & to)
 {
     renames[from] = to;
diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h
index 99b683b7713..0e0c905e30c 100644
--- a/src/Interpreters/TableJoin.h
+++ b/src/Interpreters/TableJoin.h
@@ -156,6 +156,13 @@ private:
     /// Original name -> name. Only renamed columns.
     std::unordered_map<String, String> renames;
 
+    /// Map column name to actual key name that can be an alias.
+    /// Example: SELECT r.id as rid from t JOIN r ON t.id = rid
+    /// Map: r.id -> rid
+    /// Required only for StorageJoin to map join keys back to original column names.
+    /// (workaround for ExpressionAnalyzer)
+    std::unordered_map<String, String> right_key_aliases;
+
     VolumePtr tmp_volume;
 
     std::shared_ptr<StorageJoin> right_storage_join;
@@ -333,6 +340,7 @@ public:
     Block getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const;
 
     String renamedRightColumnName(const String & name) const;
+    String renamedRightColumnNameWithAlias(const String & name) const;
     void setRename(const String & from, const String & to);
 
     void resetKeys();
diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp
index 11877eccc5c..69fef21dbab 100644
--- a/src/Interpreters/TemporaryDataOnDisk.cpp
+++ b/src/Interpreters/TemporaryDataOnDisk.cpp
@@ -92,12 +92,15 @@ TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, si
     throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache and no volume");
 }
 
-FileSegmentsHolder TemporaryDataOnDisk::createCacheFile(size_t max_file_size)
+FileSegmentsHolderPtr TemporaryDataOnDisk::createCacheFile(size_t max_file_size)
 {
     if (!file_cache)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache");
 
-    return file_cache->set(FileSegment::Key::random(), 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true));
+    const auto key = FileSegment::Key::random();
+    auto holder = file_cache->set(key, 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true));
+    fs::create_directories(file_cache->getPathInLocalCache(key));
+    return holder;
 }
 
 TemporaryFileOnDiskHolder TemporaryDataOnDisk::createRegularFile(size_t max_file_size)
@@ -237,15 +240,14 @@ TemporaryFileStream::TemporaryFileStream(TemporaryFileOnDiskHolder file_, const
     LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", file->getPath());
 }
 
-TemporaryFileStream::TemporaryFileStream(FileSegmentsHolder && segments_, const Block & header_, TemporaryDataOnDisk * parent_)
+TemporaryFileStream::TemporaryFileStream(FileSegmentsHolderPtr segments_, const Block & header_, TemporaryDataOnDisk * parent_)
     : parent(parent_)
     , header(header_)
     , segment_holder(std::move(segments_))
 {
-    if (segment_holder.file_segments.size() != 1)
+    if (segment_holder->size() != 1)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream can be created only from single segment");
-    auto & segment = segment_holder.file_segments.front();
-    auto out_buf = std::make_unique<WriteBufferToFileSegment>(segment.get());
+    auto out_buf = std::make_unique<WriteBufferToFileSegment>(&segment_holder->front());
 
     LOG_TEST(&Poco::Logger::get("TemporaryFileStream"), "Writing to temporary file {}", out_buf->getFileName());
     out_writer = std::make_unique<OutputWriter>(std::move(out_buf), header);
@@ -336,7 +338,7 @@ void TemporaryFileStream::updateAllocAndCheck()
 
 bool TemporaryFileStream::isEof() const
 {
-    return file == nullptr && segment_holder.empty();
+    return file == nullptr && !segment_holder;
 }
 
 void TemporaryFileStream::release()
@@ -356,7 +358,7 @@ void TemporaryFileStream::release()
         parent->deltaAllocAndCheck(-stat.compressed_size, -stat.uncompressed_size);
     }
 
-    if (!segment_holder.empty())
+    if (segment_holder)
         segment_holder.reset();
 }
 
@@ -364,8 +366,8 @@ String TemporaryFileStream::getPath() const
 {
     if (file)
         return file->getPath();
-    if (!segment_holder.file_segments.empty())
-        return segment_holder.file_segments.front()->getPathInLocalCache();
+    if (segment_holder && !segment_holder->empty())
+        return segment_holder->front().getPathInLocalCache();
 
     throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryFileStream has no file");
 }
diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h
index d06f0564559..14eefbf984d 100644
--- a/src/Interpreters/TemporaryDataOnDisk.h
+++ b/src/Interpreters/TemporaryDataOnDisk.h
@@ -103,7 +103,7 @@ public:
     const StatAtomic & getStat() const { return stat; }
 
 private:
-    FileSegmentsHolder createCacheFile(size_t max_file_size);
+    FileSegmentsHolderPtr createCacheFile(size_t max_file_size);
     TemporaryFileOnDiskHolder createRegularFile(size_t max_file_size);
 
     mutable std::mutex mutex;
@@ -130,7 +130,7 @@ public:
     };
 
     TemporaryFileStream(TemporaryFileOnDiskHolder file_, const Block & header_, TemporaryDataOnDisk * parent_);
-    TemporaryFileStream(FileSegmentsHolder && segments_, const Block & header_, TemporaryDataOnDisk * parent_);
+    TemporaryFileStream(FileSegmentsHolderPtr segments_, const Block & header_, TemporaryDataOnDisk * parent_);
 
     size_t write(const Block & block);
     void flush();
@@ -161,7 +161,7 @@ private:
 
     /// Data can be stored in file directly or in the cache
     TemporaryFileOnDiskHolder file;
-    FileSegmentsHolder segment_holder;
+    FileSegmentsHolderPtr segment_holder;
 
     Stat stat;
 
diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp
index faa6d1f9f02..48f4d510af7 100644
--- a/src/Interpreters/ZooKeeperLog.cpp
+++ b/src/Interpreters/ZooKeeperLog.cpp
@@ -87,6 +87,7 @@ NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes()
                 {"Auth",                static_cast<Int16>(Coordination::OpNum::Auth)},
                 {"SessionID",           static_cast<Int16>(Coordination::OpNum::SessionID)},
                 {"FilteredList",        static_cast<Int16>(Coordination::OpNum::FilteredList)},
+                {"CheckNotExists",      static_cast<Int16>(Coordination::OpNum::CheckNotExists)},
             });
 
     auto error_enum = getCoordinationErrorCodesEnumType();
diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp
index 1e69a73e9f2..08ac2fce556 100644
--- a/src/Interpreters/convertFieldToType.cpp
+++ b/src/Interpreters/convertFieldToType.cpp
@@ -237,6 +237,20 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
             return src;
         }
 
+        /// For toDate('xxx') in 1::Int64, we CAST `src` to UInt64, which may
+        /// produce wrong result in some special cases.
+        if (which_type.isDate() && src.getType() == Field::Types::Int64)
+        {
+            return convertNumericType<UInt64>(src, type);
+        }
+
+        /// For toDate32('xxx') in 1, we CAST `src` to Int64. Also, it may
+        /// produce wrong result in some special cases.
+        if (which_type.isDate32() && src.getType() == Field::Types::UInt64)
+        {
+            return convertNumericType<Int64>(src, type);
+        }
+
         if (which_type.isDateTime64()
             && (src.getType() == Field::Types::UInt64 || src.getType() == Field::Types::Int64 || src.getType() == Field::Types::Decimal64))
         {
diff --git a/src/Interpreters/examples/string_hash_map.cpp b/src/Interpreters/examples/string_hash_map.cpp
index 15df8c399de..f55ed983fbc 100644
--- a/src/Interpreters/examples/string_hash_map.cpp
+++ b/src/Interpreters/examples/string_hash_map.cpp
@@ -156,7 +156,7 @@ void NO_INLINE bench(const std::vector<StringRef> & data, DB::Arena &, const cha
         }
         watch.stop();
 
-        std::cerr << "arena-memory " << pool.size() + map.getBufferSizeInBytes() << std::endl;
+        std::cerr << "arena-memory " << pool.allocatedBytes() + map.getBufferSizeInBytes() << std::endl;
         std::cerr << "single-run " << std::setprecision(3)
                   << watch.elapsedSeconds() << std::endl;
     }
diff --git a/src/Interpreters/examples/string_hash_set.cpp b/src/Interpreters/examples/string_hash_set.cpp
index 355789e97ec..527ada1579d 100644
--- a/src/Interpreters/examples/string_hash_set.cpp
+++ b/src/Interpreters/examples/string_hash_set.cpp
@@ -34,7 +34,7 @@ void NO_INLINE bench(const std::vector<StringRef> & data, DB::Arena & pool, cons
         }
         watch.stop();
 
-        std::cerr << "arena-memory " << pool.size() + set.getBufferSizeInBytes() << std::endl;
+        std::cerr << "arena-memory " << pool.allocatedBytes() + set.getBufferSizeInBytes() << std::endl;
         std::cerr << "single-run " << std::setprecision(3)
                   << watch.elapsedSeconds() << std::endl;
     }
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 00a5d0ed1d8..7852f4cefa8 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -57,6 +57,7 @@
 #include <Interpreters/SelectQueryOptions.h>
 #include <Interpreters/TransactionLog.h>
 #include <Interpreters/executeQuery.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Common/ProfileEvents.h>
 
 #include <IO/CompressionMethod.h>
@@ -526,6 +527,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         context->initializeExternalTablesIfSet();
 
         auto * insert_query = ast->as<ASTInsertQuery>();
+        bool async_insert_enabled = settings.async_insert;
 
         /// Resolve database before trying to use async insert feature - to properly hash the query.
         if (insert_query)
@@ -534,6 +536,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                 insert_query->table_id = context->resolveStorageID(insert_query->table_id);
             else if (auto table = insert_query->getTable(); !table.empty())
                 insert_query->table_id = context->resolveStorageID(StorageID{insert_query->getDatabase(), table});
+
+            if (insert_query->table_id)
+                if (auto table = DatabaseCatalog::instance().tryGetTable(insert_query->table_id, context))
+                    async_insert_enabled |= table->areAsynchronousInsertsEnabled();
         }
 
         if (insert_query && insert_query->select)
@@ -568,7 +574,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         auto * queue = context->getAsynchronousInsertQueue();
         auto * logger = &Poco::Logger::get("executeQuery");
 
-        if (insert_query && settings.async_insert)
+        if (insert_query && async_insert_enabled)
         {
             String reason;
 
diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h
new file mode 100644
index 00000000000..25e1e7a5616
--- /dev/null
+++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h
@@ -0,0 +1,25 @@
+#pragma once
+#include "Access/ContextAccess.h"
+#include "Interpreters/Context.h"
+
+namespace DB
+{
+struct SecretHidingFormatSettings
+{
+    // We can't store const Context& as there's a dangerous usage {.ctx = *getContext()}
+    // which is UB in case getContext()'s return ptr is the only one holding the object
+    const ContextPtr & ctx;
+    const IAST & query;
+    size_t max_length = 0;
+    bool one_line = true;
+};
+
+inline String format(const SecretHidingFormatSettings & settings)
+{
+    const bool show_secrets = settings.ctx->displaySecretsInShowAndSelect()
+        && settings.ctx->getSettingsRef().format_display_secrets_in_show_and_select
+        && settings.ctx->getAccess()->isGranted(AccessType::displaySecretsInShowAndSelect);
+
+    return settings.query.formatWithPossiblyHidingSensitiveData(settings.max_length, settings.one_line, show_secrets);
+}
+}
diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp
index 0754c394f66..e26d412b35e 100644
--- a/src/Interpreters/tests/gtest_lru_file_cache.cpp
+++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp
@@ -16,6 +16,9 @@
 #include <filesystem>
 #include <thread>
 #include <DataTypes/DataTypesNumber.h>
+#include <Poco/Util/XMLConfiguration.h>
+#include <Poco/DOM/DOMParser.h>
+#include <base/sleep.h>
 
 #include <Poco/ConsoleChannel.h>
 #include <Disks/IO/CachedOnDiskWriteBufferFromFile.h>
@@ -26,21 +29,6 @@ using namespace DB;
 
 static constexpr auto TEST_LOG_LEVEL = "debug";
 
-void assertRange(
-    [[maybe_unused]] size_t assert_n, DB::FileSegmentPtr file_segment,
-    const DB::FileSegment::Range & expected_range, DB::FileSegment::State expected_state)
-{
-    auto range = file_segment->range();
-
-    std::cerr << fmt::format("\nAssert #{} : {} == {} (state: {} == {})\n", assert_n,
-                             range.toString(), expected_range.toString(),
-                             toString(file_segment->state()), toString(expected_state));
-
-    ASSERT_EQ(range.left, expected_range.left);
-    ASSERT_EQ(range.right, expected_range.right);
-    ASSERT_EQ(file_segment->state(), expected_state);
-}
-
 void printRanges(const auto & segments)
 {
     std::cerr << "\nHaving file segments: ";
@@ -48,21 +36,16 @@ void printRanges(const auto & segments)
         std::cerr << '\n' << segment->range().toString() << " (state: " + DB::FileSegment::stateToString(segment->state()) + ")" << "\n";
 }
 
-std::vector<DB::FileSegmentPtr> fromHolder(const DB::FileSegmentsHolder & holder)
-{
-    return std::vector<DB::FileSegmentPtr>(holder.file_segments.begin(), holder.file_segments.end());
-}
-
 String getFileSegmentPath(const String & base_path, const DB::FileCache::Key & key, size_t offset)
 {
     auto key_str = key.toString();
     return fs::path(base_path) / key_str.substr(0, 3) / key_str / DB::toString(offset);
 }
 
-void download(const std::string & cache_base_path, DB::FileSegmentPtr file_segment)
+void download(const std::string & cache_base_path, DB::FileSegment & file_segment)
 {
-    const auto & key = file_segment->key();
-    size_t size = file_segment->range().size();
+    const auto & key = file_segment.key();
+    size_t size = file_segment.range().size();
 
     auto key_str = key.toString();
     auto subdir = fs::path(cache_base_path) / key_str.substr(0, 3) / key_str;
@@ -70,29 +53,94 @@ void download(const std::string & cache_base_path, DB::FileSegmentPtr file_segme
         fs::create_directories(subdir);
 
     std::string data(size, '0');
-    file_segment->write(data.data(), size, file_segment->getCurrentWriteOffset());
+    file_segment.write(data.data(), size, file_segment.getCurrentWriteOffset(false));
 }
 
-void prepareAndDownload(const std::string & cache_base_path, DB::FileSegmentPtr file_segment)
-{
-    ASSERT_TRUE(file_segment->reserve(file_segment->range().size()));
-    download(cache_base_path, file_segment);
-}
+using Range = FileSegment::Range;
+using Ranges = std::vector<Range>;
+using State = FileSegment::State;
+using States = std::vector<State>;
+using Holder = FileSegmentsHolder;
+using HolderPtr = FileSegmentsHolderPtr;
 
-void complete(const std::string & cache_base_path, const DB::FileSegmentsHolder & holder)
+fs::path caches_dir = fs::current_path() / "lru_cache_test";
+std::string cache_base_path = caches_dir / "cache1" / "";
+
+
+void assertEqual(const HolderPtr & holder, const Ranges & expected_ranges, const States & expected_states = {})
 {
-    for (const auto & file_segment : holder.file_segments)
+    std::cerr << "Holder: " << holder->toString() << "\n";
+    ASSERT_EQ(holder->size(), expected_ranges.size());
+
+    if (!expected_states.empty())
+        ASSERT_EQ(holder->size(), expected_states.size());
+
+    auto get_expected_state = [&](size_t i)
     {
-        ASSERT_TRUE(file_segment->getOrSetDownloader() == DB::FileSegment::getCallerId());
-        prepareAndDownload(cache_base_path, file_segment);
-        file_segment->completeWithoutState();
+        if (expected_states.empty())
+            return State::DOWNLOADED;
+        else
+            return expected_states[i];
+    };
+
+    size_t i = 0;
+    for (const auto & file_segment : *holder)
+    {
+        ASSERT_EQ(file_segment->range(), expected_ranges[i]);
+        ASSERT_EQ(file_segment->state(), get_expected_state(i));
+        ++i;
     }
 }
 
+FileSegment & get(const HolderPtr & holder, int i)
+{
+    auto it = std::next(holder->begin(), i);
+    if (it == holder->end())
+        std::terminate();
+    return **it;
+}
+
+void download(FileSegment & file_segment)
+{
+    std::cerr << "Downloading range " << file_segment.range().toString() << "\n";
+
+    ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId());
+    ASSERT_EQ(file_segment.state(), State::DOWNLOADING);
+    ASSERT_EQ(file_segment.getDownloadedSize(false), 0);
+
+    ASSERT_TRUE(file_segment.reserve(file_segment.range().size()));
+    download(cache_base_path, file_segment);
+    ASSERT_EQ(file_segment.state(), State::DOWNLOADING);
+
+    file_segment.complete();
+    ASSERT_EQ(file_segment.state(), State::DOWNLOADED);
+}
+
+void assertDownloadFails(FileSegment & file_segment)
+{
+    ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId());
+    ASSERT_EQ(file_segment.getDownloadedSize(false), 0);
+    ASSERT_FALSE(file_segment.reserve(file_segment.range().size()));
+    file_segment.complete();
+}
+
+void download(const HolderPtr & holder)
+{
+    for (auto & it : *holder)
+    {
+        download(*it);
+    }
+}
+
+void increasePriority(const HolderPtr & holder)
+{
+    for (auto & it : *holder)
+        it->use();
+}
+
 class FileCacheTest : public ::testing::Test
 {
 public:
-
     static void setupLogs(const std::string & level)
     {
         Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
@@ -118,8 +166,6 @@ public:
             fs::remove_all(cache_base_path);
     }
 
-    fs::path caches_dir = fs::current_path() / "lru_cache_test";
-    std::string cache_base_path = caches_dir / "cache1" / "";
 };
 
 TEST_F(FileCacheTest, get)
@@ -128,6 +174,14 @@ TEST_F(FileCacheTest, get)
 
     /// To work with cache need query_id and query context.
     std::string query_id = "query_id";
+
+    Poco::XML::DOMParser dom_parser;
+    std::string xml(R"CONFIG(<clickhouse>
+</clickhouse>)CONFIG");
+    Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
+    Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
+    getMutableContext().context->setConfig(config);
+
     auto query_context = DB::Context::createCopy(getContext().context);
     query_context->makeQueryContext();
     query_context->setCurrentQueryId(query_id);
@@ -140,255 +194,249 @@ TEST_F(FileCacheTest, get)
     settings.max_elements = 5;
 
     {
+        std::cerr << "Step 1\n";
         auto cache = DB::FileCache(settings);
         cache.initialize();
-        auto key = cache.hash("key1");
+        auto key = cache.createKeyForPath("key1");
 
         {
             auto holder = cache.getOrSet(key, 0, 10, {});  /// Add range [0, 9]
-            auto segments = fromHolder(holder);
-            /// Range was not present in cache. It should be added in cache as one while file segment.
-            ASSERT_EQ(segments.size(), 1);
-
-            assertRange(1, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::EMPTY);
-
-            /// Exception because space not reserved.
-            /// EXPECT_THROW(download(segments[0]), DB::Exception);
-            /// Exception because space can be reserved only by downloader
-            /// EXPECT_THROW(segments[0]->reserve(segments[0]->range().size()), DB::Exception);
-
-            ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            ASSERT_TRUE(segments[0]->reserve(segments[0]->range().size()));
-            assertRange(2, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADING);
-
-            download(cache_base_path, segments[0]);
-            segments[0]->completeWithoutState();
-            assertRange(3, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
+            assertEqual(holder, { Range(0, 9) }, { State::EMPTY });
+            download(holder->front());
+            assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED });
+            increasePriority(holder);
         }
 
         /// Current cache:    [__________]
         ///                   ^          ^
         ///                   0          9
+        assertEqual(cache.getSnapshot(key), { Range(0, 9) });
+        assertEqual(cache.dumpQueue(), { Range(0, 9) });
         ASSERT_EQ(cache.getFileSegmentsNum(), 1);
         ASSERT_EQ(cache.getUsedCacheSize(), 10);
 
+        std::cerr << "Step 2\n";
+
         {
             /// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
             auto holder = cache.getOrSet(key, 5, 10, {});
-            auto segments = fromHolder(holder);
-            ASSERT_EQ(segments.size(), 2);
-
-            assertRange(4, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
-            assertRange(5, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::EMPTY);
-
-            ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            prepareAndDownload(cache_base_path, segments[1]);
-            segments[1]->completeWithoutState();
-            assertRange(6, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
+            assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::EMPTY });
+            download(get(holder, 1));
+            assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::DOWNLOADED });
+            increasePriority(holder);
         }
 
         /// Current cache:    [__________][_____]
         ///                   ^          ^^     ^
         ///                   0          910    14
+        assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14) });
+        assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
         ASSERT_EQ(cache.getFileSegmentsNum(), 2);
         ASSERT_EQ(cache.getUsedCacheSize(), 15);
 
+        std::cerr << "Step 3\n";
+
+        /// Get [9, 9]
         {
-            auto holder = cache.getOrSet(key, 9, 1, {});  /// Get [9, 9]
-            auto segments = fromHolder(holder);
-            ASSERT_EQ(segments.size(), 1);
-            assertRange(7, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
+            auto holder = cache.getOrSet(key, 9, 1, {});
+            assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED });
+            increasePriority(holder);
+        }
+
+        assertEqual(cache.dumpQueue(), { Range(10, 14), Range(0, 9) });
+        /// Get [9, 10]
+        assertEqual(cache.getOrSet(key, 9, 2, {}),
+                    { Range(0, 9),       Range(10, 14) },
+                    { State::DOWNLOADED, State::DOWNLOADED });
+
+        /// Get [10, 10]
+        {
+            auto holder = cache.getOrSet(key, 10, 1, {});
+            assertEqual(holder, { Range(10, 14) }, { State::DOWNLOADED });
+            increasePriority(holder);
+        }
+
+        assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14) });
+        assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14) });
+        ASSERT_EQ(cache.getFileSegmentsNum(), 2);
+        ASSERT_EQ(cache.getUsedCacheSize(), 15);
+
+        std::cerr << "Step 4\n";
+
+        {
+            auto holder = cache.getOrSet(key, 17, 4, {});
+            download(holder); /// Get [17, 20]
+            increasePriority(holder);
         }
 
         {
-            auto holder = cache.getOrSet(key, 9, 2, {});  /// Get [9, 10]
-            auto segments = fromHolder(holder);
-            ASSERT_EQ(segments.size(), 2);
-            assertRange(8, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
-            assertRange(9, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
+            auto holder = cache.getOrSet(key, 24, 3, {});
+            download(holder); /// Get [24, 26]
+            increasePriority(holder);
         }
 
         {
-            auto holder = cache.getOrSet(key, 10, 1, {});  /// Get [10, 10]
-            auto segments = fromHolder(holder);
-            ASSERT_EQ(segments.size(), 1);
-            assertRange(10, segments[0], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
+            auto holder = cache.getOrSet(key, 27, 1, {});
+            download(holder); /// Get [27, 27]
+            increasePriority(holder);
         }
 
-        complete(cache_base_path, cache.getOrSet(key, 17, 4, {})); /// Get [17, 20]
-        complete(cache_base_path, cache.getOrSet(key, 24, 3, {})); /// Get [24, 26]
-        /// completeWithState(cache.getOrSet(key, 27, 1, false)); /// Get [27, 27]
-
         /// Current cache:    [__________][_____]   [____]    [___][]
         ///                   ^          ^^     ^   ^    ^    ^   ^^^
         ///                   0          910    14  17   20   24  2627
         ///
-        ASSERT_EQ(cache.getFileSegmentsNum(), 4);
-        ASSERT_EQ(cache.getUsedCacheSize(), 22);
+        assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14), Range(17, 20), Range(24, 26), Range(27, 27) });
+        assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14), Range(17, 20), Range(24, 26), Range(27, 27) });
+        ASSERT_EQ(cache.getFileSegmentsNum(), 5);
+        ASSERT_EQ(cache.getUsedCacheSize(), 23);
 
+        std::cerr << "Step 5\n";
         {
             auto holder = cache.getOrSet(key, 0, 26, {}); /// Get [0, 25]
-            auto segments = fromHolder(holder);
-            ASSERT_EQ(segments.size(), 6);
-
-            assertRange(11, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED);
-            assertRange(12, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
-
-            /// Missing [15, 16] should be added in cache.
-            assertRange(13, segments[2], DB::FileSegment::Range(15, 16), DB::FileSegment::State::EMPTY);
-
-            ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            prepareAndDownload(cache_base_path, segments[2]);
-
-            segments[2]->completeWithoutState();
-
-            assertRange(14, segments[3], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED);
-
-            /// New [21, 23], but will not be added in cache because of elements limit (5)
-            assertRange(15, segments[4], DB::FileSegment::Range(21, 23), DB::FileSegment::State::EMPTY);
-            ASSERT_TRUE(segments[4]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            ASSERT_FALSE(segments[4]->reserve(1));
-
-            assertRange(16, segments[5], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-
-            /// Current cache:    [__________][_____][   ][____]    [___]
-            ///                   ^                            ^    ^
-            ///                   0                            20   24
-            ///
+            assertEqual(holder,
+                        { Range(0, 9),       Range(10, 14),     Range(15, 16),  Range(17, 20),     Range(21, 23), Range(24, 26) },
+                        { State::DOWNLOADED, State::DOWNLOADED, State::EMPTY,   State::DOWNLOADED, State::EMPTY,  State::DOWNLOADED });
+            download(get(holder, 2)); /// [27, 27] was evicted.
+            assertEqual(holder,
+                        { Range(0, 9),       Range(10, 14),     Range(15, 16),     Range(17, 20),     Range(21, 23), Range(24, 26) },
+                        { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY,  State::DOWNLOADED });
+            assertDownloadFails(get(holder, 4));
+            assertEqual(holder,
+                        { Range(0, 9),       Range(10, 14),     Range(15, 16),     Range(17, 20),     Range(21, 23),     Range(24, 26) },
+                        { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DETACHED, State::DOWNLOADED });
 
             /// Range [27, 27] must be evicted in previous getOrSet [0, 25].
             /// Let's not invalidate pointers to returned segments from range [0, 25] and
             /// as max elements size is reached, next attempt to put something in cache should fail.
             /// This will also check that [27, 27] was indeed evicted.
+            auto holder2 = cache.getOrSet(key, 27, 1, {});
+            assertEqual(holder2, { Range(27, 27) }, { State::EMPTY });
+            assertDownloadFails(holder2->front());
+            assertEqual(holder2, { Range(27, 27) }, { State::DETACHED });
 
-            auto holder1 = cache.getOrSet(key, 27, 1, {});
-            auto segments_1 = fromHolder(holder1); /// Get [27, 27]
-            ASSERT_EQ(segments_1.size(), 1);
-            assertRange(17, segments_1[0], DB::FileSegment::Range(27, 27), DB::FileSegment::State::EMPTY);
+            auto holder3 = cache.getOrSet(key, 28, 3, {});
+            assertEqual(holder3, { Range(28, 30) }, { State::EMPTY });
+            assertDownloadFails(holder3->front());
+            assertEqual(holder3, { Range(28, 30) }, { State::DETACHED });
+
+            increasePriority(holder);
+            increasePriority(holder2);
+            increasePriority(holder3);
         }
 
+        /// Current cache:    [__________][_____][   ][____]    [___]
+        ///                   ^                            ^    ^
+        ///                   0                            20   24
+        ///
+        assertEqual(cache.getSnapshot(key), { Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(24, 26) });
+        assertEqual(cache.dumpQueue(), { Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(24, 26) });
+        ASSERT_EQ(cache.getFileSegmentsNum(), 5);
+        ASSERT_EQ(cache.getUsedCacheSize(), 24);
+
+        std::cerr << "Step 6\n";
+
         {
             auto holder = cache.getOrSet(key, 12, 10, {}); /// Get [12, 21]
-            auto segments = fromHolder(holder);
-            ASSERT_EQ(segments.size(), 4);
-
-            assertRange(18, segments[0], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED);
-            assertRange(19, segments[1], DB::FileSegment::Range(15, 16), DB::FileSegment::State::DOWNLOADED);
-            assertRange(20, segments[2], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED);
-
-            assertRange(21, segments[3], DB::FileSegment::Range(21, 21), DB::FileSegment::State::EMPTY);
-
-            ASSERT_TRUE(segments[3]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            prepareAndDownload(cache_base_path, segments[3]);
-
-            segments[3]->completeWithoutState();
-            ASSERT_TRUE(segments[3]->state() == DB::FileSegment::State::DOWNLOADED);
+            assertEqual(holder,
+                        { Range(10, 14),     Range(15, 16),     Range(17, 20),     Range(21, 21) },
+                        { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY });
+            download(get(holder, 3));
+            assertEqual(holder,
+                        { Range(10, 14),     Range(15, 16),     Range(17, 20),     Range(21, 21) },
+                        { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED });
+            increasePriority(holder);
         }
 
         /// Current cache:    [_____][__][____][_]   [___]
         ///                   ^          ^       ^   ^   ^
         ///                   10         17      21  24  26
-
+        assertEqual(cache.getSnapshot(key), { Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21), Range(24, 26) });
+        assertEqual(cache.dumpQueue(), { Range(24, 26), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21) });
         ASSERT_EQ(cache.getFileSegmentsNum(), 5);
+        ASSERT_EQ(cache.getUsedCacheSize(), 15);
 
+        std::cerr << "Step 7\n";
         {
-            auto holder = cache.getOrSet(key, 23, 5, {}); /// Get [23, 28]
-            auto segments = fromHolder(holder);
-            ASSERT_EQ(segments.size(), 3);
-
-            assertRange(22, segments[0], DB::FileSegment::Range(23, 23), DB::FileSegment::State::EMPTY);
-            assertRange(23, segments[1], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-            assertRange(24, segments[2], DB::FileSegment::Range(27, 27), DB::FileSegment::State::EMPTY);
-
-            ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            prepareAndDownload(cache_base_path, segments[0]);
-            prepareAndDownload(cache_base_path, segments[2]);
-            segments[0]->completeWithoutState();
-            segments[2]->completeWithoutState();
+            auto holder = cache.getOrSet(key, 23, 5, {}); /// Get [23, 27]
+            assertEqual(holder,
+                        { Range(23, 23), Range(24, 26),     Range(27, 27) },
+                        { State::EMPTY,  State::DOWNLOADED, State::EMPTY });
+            download(get(holder, 0));
+            download(get(holder, 2));
+            increasePriority(holder);
         }
 
         /// Current cache:    [____][_]  [][___][__]
         ///                   ^       ^  ^^^   ^^  ^
-        ///                   17      21 2324  26  28
-
-        {
-            auto holder5 = cache.getOrSet(key, 2, 3, {}); /// Get [2, 4]
-            auto s5 = fromHolder(holder5);
-            ASSERT_EQ(s5.size(), 1);
-            assertRange(25, s5[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::EMPTY);
-
-            auto holder1 = cache.getOrSet(key, 30, 2, {}); /// Get [30, 31]
-            auto s1 = fromHolder(holder1);
-            ASSERT_EQ(s1.size(), 1);
-            assertRange(26, s1[0], DB::FileSegment::Range(30, 31), DB::FileSegment::State::EMPTY);
-
-            ASSERT_TRUE(s5[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            ASSERT_TRUE(s1[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            prepareAndDownload(cache_base_path, s5[0]);
-            prepareAndDownload(cache_base_path, s1[0]);
-            s5[0]->completeWithoutState();
-            s1[0]->completeWithoutState();
-
-            /// Current cache:    [___]       [_][___][_]   [__]
-            ///                   ^   ^       ^  ^   ^  ^   ^  ^
-            ///                   2   4       23 24  26 27  30 31
-
-            auto holder2 = cache.getOrSet(key, 23, 1, {}); /// Get [23, 23]
-            auto s2 = fromHolder(holder2);
-            ASSERT_EQ(s2.size(), 1);
-
-            auto holder3 = cache.getOrSet(key, 24, 3, {}); /// Get [24, 26]
-            auto s3 = fromHolder(holder3);
-            ASSERT_EQ(s3.size(), 1);
-
-            auto holder4 = cache.getOrSet(key, 27, 1, {}); /// Get [27, 27]
-            auto s4 = fromHolder(holder4);
-            ASSERT_EQ(s4.size(), 1);
-
-            /// All cache is now unreleasable because pointers are still hold
-            auto holder6 = cache.getOrSet(key, 0, 40, {});
-            auto f = fromHolder(holder6);
-            ASSERT_EQ(f.size(), 9);
-
-            assertRange(27, f[0], DB::FileSegment::Range(0, 1), DB::FileSegment::State::EMPTY);
-            assertRange(28, f[2], DB::FileSegment::Range(5, 22), DB::FileSegment::State::EMPTY);
-            assertRange(29, f[6], DB::FileSegment::Range(28, 29), DB::FileSegment::State::EMPTY);
-            assertRange(30, f[8], DB::FileSegment::Range(32, 39), DB::FileSegment::State::EMPTY);
-
-            ASSERT_TRUE(f[0]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            ASSERT_TRUE(f[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            ASSERT_TRUE(f[6]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            ASSERT_TRUE(f[8]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-
-            ASSERT_FALSE(f[0]->reserve(1));
-            ASSERT_FALSE(f[2]->reserve(1));
-            ASSERT_FALSE(f[6]->reserve(1));
-            ASSERT_FALSE(f[8]->reserve(1));
-        }
+        ///                   17      21 2324  26  27
+        assertEqual(cache.getSnapshot(key), { Range(17, 20), Range(21, 21), Range(23, 23), Range(24, 26), Range(27, 27) });
+        assertEqual(cache.dumpQueue(), { Range(17, 20), Range(21, 21), Range(23, 23), Range(24, 26), Range(27, 27) });
+        ASSERT_EQ(cache.getFileSegmentsNum(), 5);
+        ASSERT_EQ(cache.getUsedCacheSize(), 10);
 
+        std::cerr << "Step 8\n";
         {
             auto holder = cache.getOrSet(key, 2, 3, {}); /// Get [2, 4]
-            auto segments = fromHolder(holder);
-            ASSERT_EQ(segments.size(), 1);
-            assertRange(31, segments[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
+            assertEqual(holder, { Range(2, 4) }, { State::EMPTY });
+
+            auto holder2 = cache.getOrSet(key, 30, 2, {}); /// Get [30, 31]
+            assertEqual(holder2, { Range(30, 31) }, { State::EMPTY });
+
+            download(get(holder, 0));
+            download(get(holder2, 0));
+
+            auto holder3 = cache.getOrSet(key, 23, 1, {}); /// Get [23, 23]
+            assertEqual(holder3, { Range(23, 23) }, { State::DOWNLOADED });
+
+            auto holder4 = cache.getOrSet(key, 24, 3, {}); /// Get [24, 26]
+            assertEqual(holder4, { Range(24, 26) }, { State::DOWNLOADED });
+
+            auto holder5 = cache.getOrSet(key, 27, 1, {}); /// Get [27, 27]
+            assertEqual(holder5, { Range(27, 27) }, { State::DOWNLOADED });
+
+            auto holder6 = cache.getOrSet(key, 0, 40, {});
+            assertEqual(holder6,
+                        { Range(0, 1), Range(2, 4),        Range(5, 22), Range(23, 23),     Range(24, 26),     Range(27, 27),    Range(28, 29), Range(30, 31),     Range(32, 39) },
+                        { State::EMPTY, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::EMPTY });
+
+            assertDownloadFails(get(holder6, 0));
+            assertDownloadFails(get(holder6, 2));
+            assertDownloadFails(get(holder6, 6));
+            assertDownloadFails(get(holder6, 8));
+
+            increasePriority(holder);
+            increasePriority(holder2);
+            increasePriority(holder3);
+            increasePriority(holder4);
+            increasePriority(holder5);
+            increasePriority(holder6);
         }
 
         /// Current cache:    [___]       [_][___][_]   [__]
         ///                   ^   ^       ^  ^   ^  ^   ^  ^
         ///                   2   4       23 24  26 27  30 31
+        assertEqual(cache.getSnapshot(key), { Range(2, 4), Range(23, 23), Range(24, 26), Range(27, 27), Range(30, 31) });
+        assertEqual(cache.dumpQueue(), { Range(2, 4), Range(23, 23), Range(24, 26), Range(27, 27), Range(30, 31) });
+
+        std::cerr << "Step 9\n";
+
+        /// Get [2, 4]
+        {
+            auto holder = cache.getOrSet(key, 2, 3, {});
+            assertEqual(holder, { Range(2, 4) }, { State::DOWNLOADED });
+            increasePriority(holder);
+        }
+
 
         {
             auto holder = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29]
-            auto segments = fromHolder(holder);
-            ASSERT_EQ(segments.size(), 3);
+            assertEqual(holder,
+                        { Range(24, 26),     Range(27, 27),     Range(28, 29) },
+                        { State::DOWNLOADED, State::DOWNLOADED, State::EMPTY });
 
-            assertRange(32, segments[0], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-            assertRange(33, segments[1], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
-
-            assertRange(34, segments[2], DB::FileSegment::Range(28, 29), DB::FileSegment::State::EMPTY);
-            ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADING);
+            auto & file_segment = get(holder, 2);
+            ASSERT_TRUE(file_segment.getOrSetDownloader() == FileSegment::getCallerId());
+            ASSERT_TRUE(file_segment.state() == State::DOWNLOADING);
 
             bool lets_start_download = false;
             std::mutex mutex;
@@ -403,16 +451,13 @@ TEST_F(FileCacheTest, get)
                 chassert(&DB::CurrentThread::get() == &thread_status_1);
                 DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);
 
-                auto holder_2 = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29] once again.
-                auto segments_2 = fromHolder(holder_2);
-                ASSERT_EQ(segments.size(), 3);
+                auto holder2 = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29] once again.
+                assertEqual(holder2,
+                            { Range(24, 26),     Range(27, 27),     Range(28, 29) },
+                            { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADING });
 
-                assertRange(35, segments_2[0], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-                assertRange(36, segments_2[1], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
-                assertRange(37, segments_2[2], DB::FileSegment::Range(28, 29), DB::FileSegment::State::DOWNLOADING);
-
-                ASSERT_TRUE(segments[2]->getOrSetDownloader() != DB::FileSegment::getCallerId());
-                ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADING);
+                auto & file_segment2 = get(holder2, 2);
+                ASSERT_TRUE(file_segment2.getOrSetDownloader() != FileSegment::getCallerId());
 
                 {
                     std::lock_guard lock(mutex);
@@ -420,8 +465,8 @@ TEST_F(FileCacheTest, get)
                 }
                 cv.notify_one();
 
-                segments_2[2]->wait();
-                ASSERT_TRUE(segments_2[2]->state() == DB::FileSegment::State::DOWNLOADED);
+                file_segment2.wait(file_segment2.range().left);
+                ASSERT_TRUE(file_segment2.state() == State::DOWNLOADED);
             });
 
             {
@@ -429,35 +474,34 @@ TEST_F(FileCacheTest, get)
                 cv.wait(lock, [&]{ return lets_start_download; });
             }
 
-            prepareAndDownload(cache_base_path, segments[2]);
-            segments[2]->completeWithoutState();
-            ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADED);
+            download(file_segment);
+            ASSERT_TRUE(file_segment.state() == State::DOWNLOADED);
 
             other_1.join();
+
+            increasePriority(holder);
         }
 
         /// Current cache:    [___]       [___][_][__][__]
         ///                   ^   ^       ^   ^  ^^  ^^  ^
         ///                   2   4       24  26 27  2930 31
+        assertEqual(cache.getSnapshot(key), { Range(2, 4), Range(24, 26), Range(27, 27), Range(28, 29), Range(30, 31) });
+        assertEqual(cache.dumpQueue(), { Range(30, 31), Range(2, 4), Range(24, 26), Range(27, 27), Range(28, 29) });
 
+        std::cerr << "Step 10\n";
         {
             /// Now let's check the similar case but getting ERROR state after segment->wait(), when
             /// state is changed not manually via segment->completeWithState(state) but from destructor of holder
             /// and notify_all() is also called from destructor of holder.
 
-            std::optional<DB::FileSegmentsHolder> holder;
-            holder.emplace(cache.getOrSet(key, 3, 23, {})); /// Get [3, 25]
+            auto holder = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25]
+            assertEqual(holder,
+                        { Range(2, 4),       Range(5, 23), Range(24, 26) },
+                        { State::DOWNLOADED, State::EMPTY, State::DOWNLOADED });
 
-            auto segments = fromHolder(*holder);
-            ASSERT_EQ(segments.size(), 3);
-
-            assertRange(38, segments[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
-
-            assertRange(39, segments[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::EMPTY);
-            ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-            ASSERT_TRUE(segments[1]->state() == DB::FileSegment::State::DOWNLOADING);
-
-            assertRange(40, segments[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
+            auto & file_segment = get(holder, 1);
+            ASSERT_TRUE(file_segment.getOrSetDownloader() == FileSegment::getCallerId());
+            ASSERT_TRUE(file_segment.state() == State::DOWNLOADING);
 
             bool lets_start_download = false;
             std::mutex mutex;
@@ -472,16 +516,13 @@ TEST_F(FileCacheTest, get)
                 chassert(&DB::CurrentThread::get() == &thread_status_1);
                 DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);
 
-                auto holder_2 = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25] once again
-                auto segments_2 = fromHolder(*holder);
-                ASSERT_EQ(segments_2.size(), 3);
+                auto holder2 = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25] once again
+                assertEqual(holder,
+                            { Range(2, 4),       Range(5, 23),       Range(24, 26) },
+                            { State::DOWNLOADED, State::DOWNLOADING, State::DOWNLOADED });
 
-                assertRange(41, segments_2[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
-                assertRange(42, segments_2[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::DOWNLOADING);
-                assertRange(43, segments_2[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-
-                ASSERT_TRUE(segments_2[1]->getDownloader() != DB::FileSegment::getCallerId());
-                ASSERT_TRUE(segments_2[1]->state() == DB::FileSegment::State::DOWNLOADING);
+                auto & file_segment2 = get(holder, 1);
+                ASSERT_TRUE(file_segment2.getDownloader() != FileSegment::getCallerId());
 
                 {
                     std::lock_guard lock(mutex);
@@ -489,13 +530,10 @@ TEST_F(FileCacheTest, get)
                 }
                 cv.notify_one();
 
-                segments_2[1]->wait();
-                printRanges(segments_2);
-                ASSERT_TRUE(segments_2[1]->state() == DB::FileSegment::State::PARTIALLY_DOWNLOADED);
-
-                ASSERT_TRUE(segments_2[1]->getOrSetDownloader() == DB::FileSegment::getCallerId());
-                prepareAndDownload(cache_base_path, segments_2[1]);
-                segments_2[1]->completeWithoutState();
+                file_segment2.wait(file_segment2.range().left);
+                ASSERT_TRUE(file_segment2.state() == DB::FileSegment::State::PARTIALLY_DOWNLOADED);
+                ASSERT_TRUE(file_segment2.getOrSetDownloader() == DB::FileSegment::getCallerId());
+                download(file_segment2);
             });
 
             {
@@ -505,8 +543,7 @@ TEST_F(FileCacheTest, get)
 
             holder.reset();
             other_1.join();
-            printRanges(segments);
-            ASSERT_TRUE(segments[1]->state() == DB::FileSegment::State::DOWNLOADED);
+            ASSERT_TRUE(file_segment.state() == DB::FileSegment::State::DOWNLOADED);
         }
     }
 
@@ -514,55 +551,103 @@ TEST_F(FileCacheTest, get)
     ///                   ^   ^^         ^   ^^  ^  ^
     ///                   2   45       24  2627 28 29
 
+    std::cerr << "Step 11\n";
     {
         /// Test LRUCache::restore().
 
         auto cache2 = DB::FileCache(settings);
         cache2.initialize();
-        auto key = cache2.hash("key1");
+        auto key = cache2.createKeyForPath("key1");
 
-        auto holder1 = cache2.getOrSet(key, 2, 28, {}); /// Get [2, 29]
-
-        auto segments1 = fromHolder(holder1);
-        ASSERT_EQ(segments1.size(), 5);
-
-        assertRange(44, segments1[0], DB::FileSegment::Range(2, 4), DB::FileSegment::State::DOWNLOADED);
-        assertRange(45, segments1[1], DB::FileSegment::Range(5, 23), DB::FileSegment::State::DOWNLOADED);
-        assertRange(45, segments1[2], DB::FileSegment::Range(24, 26), DB::FileSegment::State::DOWNLOADED);
-        assertRange(46, segments1[3], DB::FileSegment::Range(27, 27), DB::FileSegment::State::DOWNLOADED);
-        assertRange(47, segments1[4], DB::FileSegment::Range(28, 29), DB::FileSegment::State::DOWNLOADED);
+        /// Get [2, 29]
+        assertEqual(cache2.getOrSet(key, 2, 28, {}),
+                    { Range(2, 4),       Range(5, 23),      Range(24, 26),     Range(27, 27),     Range(28, 29) },
+                    { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED });
     }
 
+    std::cerr << "Step 12\n";
     {
         /// Test max file segment size
 
         auto settings2 = settings;
         settings2.max_file_segment_size = 10;
         settings2.base_path = caches_dir / "cache2";
+        fs::create_directories(settings2.base_path);
         auto cache2 = DB::FileCache(settings2);
         cache2.initialize();
-        auto key = cache2.hash("key1");
+        auto key = cache2.createKeyForPath("key1");
 
-        auto holder1 = cache2.getOrSet(key, 0, 25, {}); /// Get [0, 24]
-        auto segments1 = fromHolder(holder1);
-
-        ASSERT_EQ(segments1.size(), 3);
-        assertRange(48, segments1[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::EMPTY);
-        assertRange(49, segments1[1], DB::FileSegment::Range(10, 19), DB::FileSegment::State::EMPTY);
-        assertRange(50, segments1[2], DB::FileSegment::Range(20, 24), DB::FileSegment::State::EMPTY);
+        /// Get [0, 24]
+        assertEqual(cache2.getOrSet(key, 0, 25, {}),
+                    { Range(0, 9),  Range(10, 19), Range(20, 24) },
+                    { State::EMPTY, State::EMPTY,  State::EMPTY });
     }
 
+    std::cerr << "Step 13\n";
+    {
+        /// Test delated cleanup
+
+        auto cache = FileCache(settings);
+        cache.initialize();
+        cache.cleanup();
+        const auto key = cache.createKeyForPath("key10");
+        const auto key_path = cache.getPathInLocalCache(key);
+
+        cache.removeAllReleasable();
+        ASSERT_EQ(cache.getUsedCacheSize(), 0);
+        ASSERT_TRUE(!fs::exists(key_path));
+        ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
+
+        download(cache.getOrSet(key, 0, 10, {}));
+        ASSERT_EQ(cache.getUsedCacheSize(), 10);
+        ASSERT_TRUE(fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular)));
+
+        cache.removeAllReleasable();
+        ASSERT_EQ(cache.getUsedCacheSize(), 0);
+        ASSERT_TRUE(fs::exists(key_path));
+        ASSERT_TRUE(!fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular)));
+
+        cache.cleanup();
+        ASSERT_TRUE(!fs::exists(key_path));
+        ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
+    }
+
+    std::cerr << "Step 14\n";
+    {
+        /// Test background thread delated cleanup
+
+        auto settings2{settings};
+        settings2.delayed_cleanup_interval_ms = 0;
+        auto cache = DB::FileCache(settings2);
+        cache.initialize();
+        const auto key = cache.createKeyForPath("key10");
+        const auto key_path = cache.getPathInLocalCache(key);
+
+        cache.removeAllReleasable();
+        ASSERT_EQ(cache.getUsedCacheSize(), 0);
+        ASSERT_TRUE(!fs::exists(key_path));
+        ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));
+
+        download(cache.getOrSet(key, 0, 10, {}));
+        ASSERT_EQ(cache.getUsedCacheSize(), 10);
+        ASSERT_TRUE(fs::exists(key_path));
+
+        cache.removeAllReleasable();
+        ASSERT_EQ(cache.getUsedCacheSize(), 0);
+        sleepForSeconds(2);
+        ASSERT_TRUE(!fs::exists(key_path));
+    }
 }
 
 TEST_F(FileCacheTest, writeBuffer)
 {
-    DB::FileCacheSettings settings;
+    FileCacheSettings settings;
     settings.max_size = 100;
     settings.max_elements = 5;
     settings.max_file_segment_size = 5;
     settings.base_path = cache_base_path;
 
-    DB::FileCache cache(settings);
+    FileCache cache(settings);
     cache.initialize();
 
     auto write_to_cache = [&cache](const String & key, const Strings & data, bool flush)
@@ -571,10 +656,13 @@ TEST_F(FileCacheTest, writeBuffer)
         segment_settings.kind = FileSegmentKind::Temporary;
         segment_settings.unbounded = true;
 
-        auto holder = cache.set(cache.hash(key), 0, 3, segment_settings);
-        EXPECT_EQ(holder.file_segments.size(), 1);
-        auto & segment = holder.file_segments.front();
-        WriteBufferToFileSegment out(segment.get());
+        auto cache_key = cache.createKeyForPath(key);
+        auto holder = cache.set(cache_key, 0, 3, segment_settings);
+        /// The same is done in TemporaryDataOnDisk::createStreamToCacheFile.
+        std::filesystem::create_directories(cache.getPathInLocalCache(cache_key));
+        EXPECT_EQ(holder->size(), 1);
+        auto & segment = holder->front();
+        WriteBufferToFileSegment out(&segment);
         std::list<std::thread> threads;
         std::mutex mu;
         for (const auto & s : data)
@@ -600,18 +688,18 @@ TEST_F(FileCacheTest, writeBuffer)
     std::vector<fs::path> file_segment_paths;
     {
         auto holder = write_to_cache("key1", {"abc", "defg"}, false);
-        file_segment_paths.emplace_back(holder.file_segments.front()->getPathInLocalCache());
+        file_segment_paths.emplace_back(holder->front().getPathInLocalCache());
 
         ASSERT_EQ(fs::file_size(file_segment_paths.back()), 7);
-        ASSERT_TRUE(holder.file_segments.front()->range() == FileSegment::Range(0, 7));
+        ASSERT_TRUE(holder->front().range() == FileSegment::Range(0, 7));
         ASSERT_EQ(cache.getUsedCacheSize(), 7);
 
         {
             auto holder2 = write_to_cache("key2", {"1", "22", "333", "4444", "55555"}, true);
-            file_segment_paths.emplace_back(holder2.file_segments.front()->getPathInLocalCache());
+            file_segment_paths.emplace_back(holder2->front().getPathInLocalCache());
 
             ASSERT_EQ(fs::file_size(file_segment_paths.back()), 15);
-            ASSERT_TRUE(holder2.file_segments.front()->range() == FileSegment::Range(0, 15));
+            ASSERT_TRUE(holder2->front().range() == FileSegment::Range(0, 15));
             ASSERT_EQ(cache.getUsedCacheSize(), 22);
         }
         ASSERT_FALSE(fs::exists(file_segment_paths.back()));
@@ -668,17 +756,16 @@ TEST_F(FileCacheTest, temporaryData)
 
     auto tmp_data_scope = std::make_shared<TemporaryDataOnDiskScope>(nullptr, &file_cache, 0);
 
-    auto some_data_holder = file_cache.getOrSet(file_cache.hash("some_data"), 0, 5_KiB, CreateFileSegmentSettings{});
+    auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, CreateFileSegmentSettings{});
 
     {
-        auto segments = fromHolder(some_data_holder);
-        ASSERT_EQ(segments.size(), 5);
-        for (auto & segment : segments)
+        ASSERT_EQ(some_data_holder->size(), 5);
+        for (auto & segment : *some_data_holder)
         {
             ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId());
             ASSERT_TRUE(segment->reserve(segment->range().size()));
-            download(cache_base_path, segment);
-            segment->completeWithoutState();
+            download(*segment);
+            segment->complete();
         }
     }
 
diff --git a/src/Parsers/ASTSubquery.h b/src/Parsers/ASTSubquery.h
index 7d0fabf3ed4..e4de766621a 100644
--- a/src/Parsers/ASTSubquery.h
+++ b/src/Parsers/ASTSubquery.h
@@ -21,15 +21,9 @@ public:
 
     ASTPtr clone() const override
     {
-        const auto res = std::make_shared<ASTSubquery>(*this);
-        ASTPtr ptr{res};
-
-        res->children.clear();
-
-        for (const auto & child : children)
-            res->children.emplace_back(child->clone());
-
-        return ptr;
+        auto clone = std::make_shared<ASTSubquery>(*this);
+        clone->cloneChildren();
+        return clone;
     }
 
     void updateTreeHashImpl(SipHash & hash_state) const override;
diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp
index 9520b3211e1..a91449ff035 100644
--- a/src/Parsers/ASTSystemQuery.cpp
+++ b/src/Parsers/ASTSystemQuery.cpp
@@ -104,6 +104,12 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &,
     auto print_drop_replica = [&]
     {
         settings.ostr << " " << quoteString(replica);
+        if (!shard.empty())
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM SHARD "
+                          << (settings.hilite ? hilite_none : "") << quoteString(shard);
+        }
+
         if (table)
         {
             settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM TABLE"
diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h
index dfe2389edb7..d9fcb2e8f05 100644
--- a/src/Parsers/ASTSystemQuery.h
+++ b/src/Parsers/ASTSystemQuery.h
@@ -96,6 +96,7 @@ public:
     String target_model;
     String target_function;
     String replica;
+    String shard;
     String replica_zk_path;
     bool is_drop_whole_replica{};
     String storage_policy;
diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp
new file mode 100644
index 00000000000..d730a08889d
--- /dev/null
+++ b/src/Parsers/Access/ASTAuthenticationData.cpp
@@ -0,0 +1,199 @@
+#include <Parsers/Access/ASTAuthenticationData.h>
+
+#include <Access/AccessControl.h>
+#include <Common/Exception.h>
+#include <Parsers/ASTLiteral.h>
+#include <IO/Operators.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+std::optional<String> ASTAuthenticationData::getPassword() const
+{
+    if (contains_password)
+    {
+        if (const auto * password = children[0]->as<const ASTLiteral>())
+        {
+            return password->value.safeGet<String>();
+        }
+    }
+
+    return {};
+}
+std::optional<String> ASTAuthenticationData::getSalt() const
+{
+    if (type && *type == AuthenticationType::SHA256_PASSWORD && children.size() == 2)
+    {
+        if (const auto * salt = children[1]->as<const ASTLiteral>())
+        {
+            return salt->value.safeGet<String>();
+        }
+    }
+
+    return {};
+}
+
+void ASTAuthenticationData::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const
+{
+    if (type && *type == AuthenticationType::NO_PASSWORD)
+    {
+        settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " NOT IDENTIFIED"
+                      << (settings.hilite ? IAST::hilite_none : "");
+        return;
+    }
+
+    String auth_type_name;
+    String prefix; /// "BY" or "SERVER" or "REALM"
+    bool password = false; /// either a password or hash
+    bool salt = false;
+    bool parameter = false;
+    bool parameters = false;
+
+    if (type)
+    {
+        auth_type_name = AuthenticationTypeInfo::get(*type).name;
+
+        switch (*type)
+        {
+            case AuthenticationType::PLAINTEXT_PASSWORD:
+            {
+                prefix = "BY";
+                password = true;
+                break;
+            }
+            case AuthenticationType::SHA256_PASSWORD:
+            {
+                if (contains_hash)
+                    auth_type_name = "sha256_hash";
+
+                prefix = "BY";
+                password = true;
+                if (children.size() == 2)
+                    salt = true;
+                break;
+            }
+            case AuthenticationType::DOUBLE_SHA1_PASSWORD:
+            {
+                if (contains_hash)
+                    auth_type_name = "double_sha1_hash";
+
+                prefix = "BY";
+                password = true;
+                break;
+            }
+            case AuthenticationType::LDAP:
+            {
+                prefix = "SERVER";
+                parameter = true;
+                break;
+            }
+            case AuthenticationType::KERBEROS:
+            {
+                if (!children.empty())
+                {
+                    prefix = "REALM";
+                    parameter = true;
+                }
+                break;
+            }
+            case AuthenticationType::SSL_CERTIFICATE:
+            {
+                prefix = "CN";
+                parameters = true;
+                break;
+            }
+            case AuthenticationType::BCRYPT_PASSWORD:
+            {
+                if (contains_hash)
+                    auth_type_name = "bcrypt_hash";
+
+                prefix = "BY";
+                password = true;
+                break;
+            }
+            case AuthenticationType::NO_PASSWORD: [[fallthrough]];
+            case AuthenticationType::MAX:
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "AST: Unexpected authentication type {}", toString(*type));
+        }
+    }
+    else
+    {
+        /// Default password type
+        prefix = "BY";
+        password = true;
+    }
+
+    if (password && !settings.show_secrets)
+    {
+        prefix = "";
+        password = false;
+        salt = false;
+        if (type)
+            auth_type_name = AuthenticationTypeInfo::get(*type).name;
+    }
+
+    settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED" << (settings.hilite ? IAST::hilite_none : "");
+
+    if (!auth_type_name.empty())
+    {
+        settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " WITH " << auth_type_name
+                        << (settings.hilite ? IAST::hilite_none : "");
+    }
+
+    if (!prefix.empty())
+    {
+        settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " " << prefix << (settings.hilite ? IAST::hilite_none : "");
+    }
+
+    if (password)
+    {
+        settings.ostr << " ";
+        children[0]->format(settings);
+    }
+
+    if (salt)
+    {
+        settings.ostr << " SALT ";
+        children[1]->format(settings);
+    }
+
+    if (parameter)
+    {
+        settings.ostr << " ";
+        children[0]->format(settings);
+    }
+    else if (parameters)
+    {
+        settings.ostr << " ";
+        bool need_comma = false;
+        for (const auto & child : children)
+        {
+            if (std::exchange(need_comma, true))
+                settings.ostr << ", ";
+            child->format(settings);
+        }
+    }
+}
+
+bool ASTAuthenticationData::hasSecretParts() const
+{
+    /// Default password type is used hence secret part
+    if (!type)
+        return true;
+
+    auto auth_type = *type;
+    if ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD)
+        || (auth_type == AuthenticationType::SHA256_PASSWORD)
+        || (auth_type == AuthenticationType::DOUBLE_SHA1_PASSWORD))
+        return true;
+
+    return childrenHaveSecretParts();
+}
+
+}
diff --git a/src/Parsers/Access/ASTAuthenticationData.h b/src/Parsers/Access/ASTAuthenticationData.h
new file mode 100644
index 00000000000..de166bdf234
--- /dev/null
+++ b/src/Parsers/Access/ASTAuthenticationData.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include <Parsers/IAST.h>
+#include <Access/Common/AuthenticationType.h>
+#include <optional>
+
+
+namespace DB
+{
+
+/** Represents authentication data in CREATE/ALTER USER query:
+  *  ... IDENTIFIED WITH sha256_password BY 'password'
+  *
+  * Can store password, hash and salt, LDAP server name, Kerberos Realm, or common names.
+  * They are stored in children vector as ASTLiteral or ASTQueryParameter.
+  * ASTAuthenticationData without a type represents authentication data with
+  *  the default password type that will be later inferred from the server parameters.
+  */
+
+class ASTAuthenticationData : public IAST
+{
+public:
+    String getID(char) const override { return "AuthenticationData"; }
+
+    ASTPtr clone() const override
+    {
+        auto clone = std::make_shared<ASTAuthenticationData>(*this);
+        clone->cloneChildren();
+        return clone;
+    }
+
+    bool hasSecretParts() const override;
+
+    std::optional<String> getPassword() const;
+    std::optional<String> getSalt() const;
+
+    /// If type is empty we use the default password type.
+    /// AuthenticationType::NO_PASSWORD is specified explicitly.
+    std::optional<AuthenticationType> type;
+
+    bool contains_password = false;
+    bool contains_hash = false;
+
+protected:
+    void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;
+};
+
+}
diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp
index b0d4aef38b8..0611545adf0 100644
--- a/src/Parsers/Access/ASTCreateUserQuery.cpp
+++ b/src/Parsers/Access/ASTCreateUserQuery.cpp
@@ -2,17 +2,13 @@
 #include <Parsers/Access/ASTRolesOrUsersSet.h>
 #include <Parsers/Access/ASTSettingsProfileElement.h>
 #include <Parsers/Access/ASTUserNameWithHost.h>
+#include <Parsers/Access/ASTAuthenticationData.h>
 #include <Common/quoteString.h>
 #include <IO/Operators.h>
 
 
 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 
 namespace
 {
@@ -23,122 +19,9 @@ namespace
     }
 
 
-    void formatAuthenticationData(const AuthenticationData & auth_data, const IAST::FormatSettings & settings)
+    void formatAuthenticationData(const ASTAuthenticationData & auth_data, const IAST::FormatSettings & settings)
     {
-        auto auth_type = auth_data.getType();
-        if (auth_type == AuthenticationType::NO_PASSWORD)
-        {
-            settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " NOT IDENTIFIED"
-                          << (settings.hilite ? IAST::hilite_none : "");
-            return;
-        }
-
-        String auth_type_name = AuthenticationTypeInfo::get(auth_type).name;
-        String prefix; /// "BY" or "SERVER" or "REALM"
-        std::optional<String> password; /// either a password or hash
-        std::optional<String> salt;
-        std::optional<String> parameter;
-        const boost::container::flat_set<String> * parameters = nullptr;
-
-        switch (auth_type)
-        {
-            case AuthenticationType::PLAINTEXT_PASSWORD:
-            {
-                prefix = "BY";
-                password = auth_data.getPassword();
-                break;
-            }
-            case AuthenticationType::SHA256_PASSWORD:
-            {
-                auth_type_name = "sha256_hash";
-                prefix = "BY";
-                password = auth_data.getPasswordHashHex();
-                if (!auth_data.getSalt().empty())
-                    salt = auth_data.getSalt();
-                break;
-            }
-            case AuthenticationType::DOUBLE_SHA1_PASSWORD:
-            {
-                auth_type_name = "double_sha1_hash";
-                prefix = "BY";
-                password = auth_data.getPasswordHashHex();
-                break;
-            }
-            case AuthenticationType::LDAP:
-            {
-                prefix = "SERVER";
-                parameter = auth_data.getLDAPServerName();
-                break;
-            }
-            case AuthenticationType::KERBEROS:
-            {
-                const auto & realm = auth_data.getKerberosRealm();
-                if (!realm.empty())
-                {
-                    prefix = "REALM";
-                    parameter = realm;
-                }
-                break;
-            }
-
-            case AuthenticationType::SSL_CERTIFICATE:
-            {
-                prefix = "CN";
-                parameters = &auth_data.getSSLCertificateCommonNames();
-                break;
-            }
-
-            case AuthenticationType::NO_PASSWORD: [[fallthrough]];
-            case AuthenticationType::MAX:
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "AST: Unexpected authentication type {}", toString(auth_type));
-        }
-
-        if (password && !settings.show_secrets)
-        {
-            prefix = "";
-            password.reset();
-            salt.reset();
-            auth_type_name = AuthenticationTypeInfo::get(auth_type).name;
-        }
-
-        settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED" << (settings.hilite ? IAST::hilite_none : "");
-
-        if (!auth_type_name.empty())
-        {
-            settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " WITH " << auth_type_name
-                          << (settings.hilite ? IAST::hilite_none : "");
-        }
-
-        if (!prefix.empty())
-        {
-            settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " " << prefix << (settings.hilite ? IAST::hilite_none : "");
-        }
-
-        if (password)
-        {
-            settings.ostr << " " << quoteString(*password);
-        }
-
-        if (salt)
-        {
-            settings.ostr << " SALT " << quoteString(*salt);
-        }
-
-        if (parameter)
-        {
-            settings.ostr << " " << quoteString(*parameter);
-        }
-        else if (parameters)
-        {
-            settings.ostr << " ";
-            bool need_comma = false;
-            for (const auto & param : *parameters)
-            {
-                if (std::exchange(need_comma, true))
-                    settings.ostr << ", ";
-                settings.ostr << quoteString(param);
-            }
-        }
+        auth_data.format(settings);
     }
 
 
@@ -276,6 +159,7 @@ String ASTCreateUserQuery::getID(char) const
 ASTPtr ASTCreateUserQuery::clone() const
 {
     auto res = std::make_shared<ASTCreateUserQuery>(*this);
+    res->children.clear();
 
     if (names)
         res->names = std::static_pointer_cast<ASTUserNamesWithHost>(names->clone());
@@ -292,6 +176,12 @@ ASTPtr ASTCreateUserQuery::clone() const
     if (settings)
         res->settings = std::static_pointer_cast<ASTSettingsProfileElements>(settings->clone());
 
+    if (auth_data)
+    {
+        res->auth_data = std::static_pointer_cast<ASTAuthenticationData>(auth_data->clone());
+        res->children.push_back(res->auth_data);
+    }
+
     return res;
 }
 
@@ -346,17 +236,4 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState &
         formatGrantees(*grantees, format);
 }
 
-bool ASTCreateUserQuery::hasSecretParts() const
-{
-    if (auth_data)
-    {
-        auto auth_type = auth_data->getType();
-        if ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD)
-            || (auth_type == AuthenticationType::SHA256_PASSWORD)
-            || (auth_type == AuthenticationType::DOUBLE_SHA1_PASSWORD))
-            return true;
-    }
-    return childrenHaveSecretParts();
-}
-
 }
diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h
index a3571dd6c61..62ddbfd0040 100644
--- a/src/Parsers/Access/ASTCreateUserQuery.h
+++ b/src/Parsers/Access/ASTCreateUserQuery.h
@@ -3,7 +3,7 @@
 #include <Parsers/IAST.h>
 #include <Parsers/ASTQueryWithOnCluster.h>
 #include <Parsers/ASTDatabaseOrNone.h>
-#include <Access/Common/AuthenticationData.h>
+#include <Access/Common/AuthenticationType.h>
 #include <Access/Common/AllowedClientHosts.h>
 
 
@@ -13,6 +13,8 @@ class ASTUserNamesWithHost;
 class ASTRolesOrUsersSet;
 class ASTDatabaseOrNone;
 class ASTSettingsProfileElements;
+class ASTAuthenticationData;
+
 
 /** CREATE USER [IF NOT EXISTS | OR REPLACE] name
   *     [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}}|{WITH ldap SERVER 'server_name'}|{WITH kerberos [REALM 'realm']}]
@@ -44,9 +46,7 @@ public:
     std::shared_ptr<ASTUserNamesWithHost> names;
     std::optional<String> new_name;
 
-    std::optional<AuthenticationData> auth_data;
-
-    mutable std::optional<String> temporary_password_for_checks;
+    std::shared_ptr<ASTAuthenticationData> auth_data;
 
     std::optional<AllowedClientHosts> hosts;
     std::optional<AllowedClientHosts> add_hosts;
@@ -61,7 +61,6 @@ public:
     String getID(char) const override;
     ASTPtr clone() const override;
     void formatImpl(const FormatSettings & format, FormatState &, FormatStateStacked) const override;
-    bool hasSecretParts() const override;
     ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster<ASTCreateUserQuery>(clone()); }
 
     QueryKind getQueryKind() const override { return QueryKind::Create; }
diff --git a/src/Parsers/Access/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp
index 1c86f175fad..331f1798d77 100644
--- a/src/Parsers/Access/ASTGrantQuery.cpp
+++ b/src/Parsers/Access/ASTGrantQuery.cpp
@@ -147,6 +147,8 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F
                             "ASTGrantQuery can contain either roles or access rights elements "
                             "to grant or revoke, not both of them");
     }
+    else if (current_grants)
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << " CURRENT GRANTS" << (settings.hilite ? hilite_none : "");
     else
         formatElementsWithoutOptions(access_rights_elements, settings);
 
diff --git a/src/Parsers/Access/ASTGrantQuery.h b/src/Parsers/Access/ASTGrantQuery.h
index 8c7df3cd57e..2ccbac3dac8 100644
--- a/src/Parsers/Access/ASTGrantQuery.h
+++ b/src/Parsers/Access/ASTGrantQuery.h
@@ -26,6 +26,8 @@ public:
     bool admin_option = false;
     bool replace_access = false;
     bool replace_granted_roles = false;
+    bool current_grants = false;
+
     std::shared_ptr<ASTRolesOrUsersSet> grantees;
 
     String getID(char) const override;
diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp
index c1d0691d305..0344fb99c04 100644
--- a/src/Parsers/Access/ParserCreateUserQuery.cpp
+++ b/src/Parsers/Access/ParserCreateUserQuery.cpp
@@ -3,6 +3,7 @@
 #include <Parsers/Access/ASTRolesOrUsersSet.h>
 #include <Parsers/Access/ASTSettingsProfileElement.h>
 #include <Parsers/Access/ASTUserNameWithHost.h>
+#include <Parsers/Access/ASTAuthenticationData.h>
 #include <Parsers/Access/ParserRolesOrUsersSet.h>
 #include <Parsers/Access/ParserSettingsProfileElement.h>
 #include <Parsers/Access/ParserUserNameWithHost.h>
@@ -17,21 +18,11 @@
 #include <boost/algorithm/string/predicate.hpp>
 #include <base/insertAtEnd.h>
 #include "config.h"
-#include <base/hex.h>
-#if USE_SSL
-#     include <openssl/crypto.h>
-#     include <openssl/rand.h>
-#     include <openssl/err.h>
-#endif
+
 
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int OPENSSL_ERROR;
-}
-
 namespace
 {
     bool parseRenameTo(IParserBase::Pos & pos, Expected & expected, std::optional<String> & new_name)
@@ -50,14 +41,29 @@ namespace
         });
     }
 
+    class ParserStringAndSubstitution : public IParserBase
+    {
+    private:
+        const char * getName() const override { return "ParserStringAndSubstitution"; }
+        bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
+        {
+            return ParserStringLiteral{}.parse(pos, node, expected) || ParserSubstitution{}.parse(pos, node, expected);
+        }
 
-    bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, AuthenticationData & auth_data, std::optional<String> & temporary_password_for_checks)
+    public:
+        explicit ParserStringAndSubstitution() = default;
+    };
+
+
+    bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, std::shared_ptr<ASTAuthenticationData> & auth_data)
     {
         return IParserBase::wrapParseImpl(pos, [&]
         {
             if (ParserKeyword{"NOT IDENTIFIED"}.ignore(pos, expected))
             {
-                auth_data = AuthenticationData{AuthenticationType::NO_PASSWORD};
+                auth_data = std::make_shared<ASTAuthenticationData>();
+                auth_data->type = AuthenticationType::NO_PASSWORD;
+
                 return true;
             }
 
@@ -65,6 +71,7 @@ namespace
                 return false;
 
             std::optional<AuthenticationType> type;
+
             bool expect_password = false;
             bool expect_hash = false;
             bool expect_ldap_server_name = false;
@@ -104,52 +111,48 @@ namespace
                         type = AuthenticationType::DOUBLE_SHA1_PASSWORD;
                         expect_hash = true;
                     }
+                    else if (ParserKeyword{"BCRYPT_HASH"}.ignore(pos, expected))
+                    {
+                        type = AuthenticationType::BCRYPT_PASSWORD;
+                        expect_hash = true;
+                    }
                     else
                         return false;
                 }
             }
 
+            /// If authentication type is not specified, then the default password type is used
             if (!type)
-            {
-                type = AuthenticationType::SHA256_PASSWORD;
                 expect_password = true;
-            }
 
-            String value;
-            String parsed_salt;
-            boost::container::flat_set<String> common_names;
+            ASTPtr value;
+            ASTPtr parsed_salt;
+            ASTPtr common_names;
             if (expect_password || expect_hash)
             {
-                ASTPtr ast;
-                if (!ParserKeyword{"BY"}.ignore(pos, expected) || !ParserStringLiteral{}.parse(pos, ast, expected))
+                if (!ParserKeyword{"BY"}.ignore(pos, expected) || !ParserStringAndSubstitution{}.parse(pos, value, expected))
                     return false;
-                value = ast->as<const ASTLiteral &>().value.safeGet<String>();
 
                 if (expect_hash && type == AuthenticationType::SHA256_PASSWORD)
                 {
-                    if (ParserKeyword{"SALT"}.ignore(pos, expected) && ParserStringLiteral{}.parse(pos, ast, expected))
+                    if (ParserKeyword{"SALT"}.ignore(pos, expected))
                     {
-                        parsed_salt = ast->as<const ASTLiteral &>().value.safeGet<String>();
+                        if (!ParserStringAndSubstitution{}.parse(pos, parsed_salt, expected))
+                            return false;
                     }
                 }
             }
             else if (expect_ldap_server_name)
             {
-                ASTPtr ast;
-                if (!ParserKeyword{"SERVER"}.ignore(pos, expected) || !ParserStringLiteral{}.parse(pos, ast, expected))
+                if (!ParserKeyword{"SERVER"}.ignore(pos, expected) || !ParserStringAndSubstitution{}.parse(pos, value, expected))
                     return false;
-
-                value = ast->as<const ASTLiteral &>().value.safeGet<String>();
             }
             else if (expect_kerberos_realm)
             {
                 if (ParserKeyword{"REALM"}.ignore(pos, expected))
                 {
-                    ASTPtr ast;
-                    if (!ParserStringLiteral{}.parse(pos, ast, expected))
+                    if (!ParserStringAndSubstitution{}.parse(pos, value, expected))
                         return false;
-
-                    value = ast->as<const ASTLiteral &>().value.safeGet<String>();
                 }
             }
             else if (expect_common_names)
@@ -157,63 +160,24 @@ namespace
                 if (!ParserKeyword{"CN"}.ignore(pos, expected))
                     return false;
 
-                ASTPtr ast;
-                if (!ParserList{std::make_unique<ParserStringLiteral>(), std::make_unique<ParserToken>(TokenType::Comma), false}.parse(pos, ast, expected))
+                if (!ParserList{std::make_unique<ParserStringAndSubstitution>(), std::make_unique<ParserToken>(TokenType::Comma), false}.parse(pos, common_names, expected))
                     return false;
-
-                for (const auto & ast_child : ast->children)
-                    common_names.insert(ast_child->as<const ASTLiteral &>().value.safeGet<String>());
             }
 
-            /// Save password separately for future complexity rules check
-            if (expect_password)
-                temporary_password_for_checks = value;
+            auth_data = std::make_shared<ASTAuthenticationData>();
 
-            auth_data = AuthenticationData{*type};
-            if (auth_data.getType() == AuthenticationType::SHA256_PASSWORD)
-            {
-                if (!parsed_salt.empty())
-                {
-                    auth_data.setSalt(parsed_salt);
-                }
-                else if (expect_password)
-                {
-#if USE_SSL
-                    ///generate and add salt here
-                    ///random generator FIPS complaint
-                    uint8_t key[32];
-                    if (RAND_bytes(key, sizeof(key)) != 1)
-                    {
-                        char buf[512] = {0};
-                        ERR_error_string_n(ERR_get_error(), buf, sizeof(buf));
-                        throw Exception(ErrorCodes::OPENSSL_ERROR, "Cannot generate salt for password. OpenSSL {}", buf);
-                    }
+            auth_data->type = type;
+            auth_data->contains_password = expect_password;
+            auth_data->contains_hash = expect_hash;
 
-                    String salt;
-                    salt.resize(sizeof(key) * 2);
-                    char * buf_pos = salt.data();
-                    for (uint8_t k : key)
-                    {
-                        writeHexByteUppercase(k, buf_pos);
-                        buf_pos += 2;
-                    }
-                    value.append(salt);
-                    auth_data.setSalt(salt);
-#else
-                    ///if USE_SSL is not defined, Exception thrown later
-#endif
-                }
-            }
-            if (expect_password)
-                auth_data.setPassword(value);
-            else if (expect_hash)
-                auth_data.setPasswordHashHex(value);
-            else if (expect_ldap_server_name)
-                auth_data.setLDAPServerName(value);
-            else if (expect_kerberos_realm)
-                auth_data.setKerberosRealm(value);
-            else if (expect_common_names)
-                auth_data.setSSLCertificateCommonNames(std::move(common_names));
+            if (value)
+                auth_data->children.push_back(std::move(value));
+
+            if (parsed_salt)
+                auth_data->children.push_back(std::move(parsed_salt));
+
+            if (common_names)
+                auth_data->children = std::move(common_names->children);
 
             return true;
         });
@@ -441,11 +405,10 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     auto names_ref = names->names;
 
     std::optional<String> new_name;
-    std::optional<AuthenticationData> auth_data;
-    std::optional<String> temporary_password_for_checks;
     std::optional<AllowedClientHosts> hosts;
     std::optional<AllowedClientHosts> add_hosts;
     std::optional<AllowedClientHosts> remove_hosts;
+    std::shared_ptr<ASTAuthenticationData> auth_data;
     std::shared_ptr<ASTRolesOrUsersSet> default_roles;
     std::shared_ptr<ASTSettingsProfileElements> settings;
     std::shared_ptr<ASTRolesOrUsersSet> grantees;
@@ -456,12 +419,10 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     {
         if (!auth_data)
         {
-            AuthenticationData new_auth_data;
-            std::optional<String> new_temporary_password_for_checks;
-            if (parseAuthenticationData(pos, expected, new_auth_data, new_temporary_password_for_checks))
+            std::shared_ptr<ASTAuthenticationData> new_auth_data;
+            if (parseAuthenticationData(pos, expected, new_auth_data))
             {
                 auth_data = std::move(new_auth_data);
-                temporary_password_for_checks = std::move(new_temporary_password_for_checks);
                 continue;
             }
         }
@@ -546,7 +507,6 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     query->names = std::move(names);
     query->new_name = std::move(new_name);
     query->auth_data = std::move(auth_data);
-    query->temporary_password_for_checks = std::move(temporary_password_for_checks);
     query->hosts = std::move(hosts);
     query->add_hosts = std::move(add_hosts);
     query->remove_hosts = std::move(remove_hosts);
@@ -555,6 +515,9 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
     query->grantees = std::move(grantees);
     query->default_database = std::move(default_database);
 
+    if (query->auth_data)
+        query->children.push_back(query->auth_data);
+
     return true;
 }
 }
diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp
index 28a1846df74..6869d665385 100644
--- a/src/Parsers/Access/ParserGrantQuery.cpp
+++ b/src/Parsers/Access/ParserGrantQuery.cpp
@@ -43,7 +43,6 @@ namespace
             {
                 if (!str.empty())
                     str += " ";
-                std::string_view word{pos->begin, pos->size()};
                 str += std::string_view(pos->begin, pos->size());
                 ++pos;
             }
@@ -184,6 +183,37 @@ namespace
         });
     }
 
+    bool parseCurrentGrants(IParser::Pos & pos, Expected & expected, AccessRightsElements & elements)
+    {
+        if (ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected))
+        {
+            if (!parseElementsWithoutOptions(pos, expected, elements))
+                return false;
+
+            if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected))
+                return false;
+        }
+        else
+        {
+            AccessRightsElement default_element(AccessType::ALL);
+
+            if (!ParserKeyword{"ON"}.ignore(pos, expected))
+                return false;
+
+            String database_name, table_name;
+            bool any_database = false, any_table = false;
+            if (!parseDatabaseAndTableNameOrAsterisks(pos, expected, database_name, any_database, table_name, any_table))
+                return false;
+
+            default_element.any_database = any_database;
+            default_element.database = database_name;
+            default_element.any_table = any_table;
+            default_element.table = table_name;
+            elements.push_back(std::move(default_element));
+        }
+
+        return true;
+    }
 
     void throwIfNotGrantable(AccessRightsElements & elements)
     {
@@ -284,8 +314,19 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
     AccessRightsElements elements;
     std::shared_ptr<ASTRolesOrUsersSet> roles;
-    if (!parseElementsWithoutOptions(pos, expected, elements) && !parseRoles(pos, expected, is_revoke, attach_mode, roles))
-        return false;
+
+    bool current_grants = false;
+    if (!is_revoke && ParserKeyword{"CURRENT GRANTS"}.ignore(pos, expected))
+    {
+        current_grants = true;
+        if (!parseCurrentGrants(pos, expected, elements))
+            return false;
+    }
+    else
+    {
+        if (!parseElementsWithoutOptions(pos, expected, elements) && !parseRoles(pos, expected, is_revoke, attach_mode, roles))
+            return false;
+    }
 
     if (cluster.empty())
         parseOnCluster(pos, expected, cluster);
@@ -353,6 +394,7 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     query->admin_option = admin_option;
     query->replace_access = replace_access;
     query->replace_granted_roles = replace_role;
+    query->current_grants = current_grants;
 
     return true;
 }
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index a6354cd0e81..28cef51e571 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -1429,10 +1429,12 @@ bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     if (!allow_alias_without_as_keyword && !has_as_word)
         return false;
 
+    bool is_quoted = pos->type == TokenType::QuotedIdentifier;
+
     if (!id_p.parse(pos, node, expected))
         return false;
 
-    if (!has_as_word)
+    if (!has_as_word && !is_quoted)
     {
         /** In this case, the alias can not match the keyword -
           *  so that in the query "SELECT x FROM t", the word FROM was not considered an alias,
diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp
index 869c0969dd6..0138372ce89 100644
--- a/src/Parsers/IAST.cpp
+++ b/src/Parsers/IAST.cpp
@@ -167,14 +167,10 @@ size_t IAST::checkDepthImpl(size_t max_depth) const
     return res;
 }
 
-String IAST::formatWithSecretsHidden(size_t max_length, bool one_line) const
+String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const
 {
     WriteBufferFromOwnString buf;
-
-    FormatSettings settings{buf, one_line};
-    settings.show_secrets = false;
-    format(settings);
-
+    format({buf, one_line, show_secrets});
     return wipeSensitiveDataAndCutToLength(buf.str(), max_length);
 }
 
diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index 814dba609b4..aa5302a15b9 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -200,8 +200,8 @@ public:
         // Newline or whitespace.
         char nl_or_ws;
 
-        FormatSettings(WriteBuffer & ostr_, bool one_line_)
-            : ostr(ostr_), one_line(one_line_)
+        FormatSettings(WriteBuffer & ostr_, bool one_line_, bool show_secrets_ = true)
+            : ostr(ostr_), one_line(one_line_), show_secrets(show_secrets_)
         {
             nl_or_ws = one_line ? ' ' : '\n';
         }
@@ -251,12 +251,26 @@ public:
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown element in AST: {}", getID());
     }
 
-    // A simple way to add some user-readable context to an error message.
-    String formatWithSecretsHidden(size_t max_length = 0, bool one_line = true) const;
-    String formatForLogging(size_t max_length = 0) const { return formatWithSecretsHidden(max_length, true); }
-    String formatForErrorMessage() const { return formatWithSecretsHidden(0, true); }
+    // Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied.
+    // You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience.
+    String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const;
+
+    /*
+     * formatForLogging and formatForErrorMessage always hide secrets. This inconsistent
+     * behaviour is due to the fact such functions are called from Client which knows nothing about
+     * access rights and settings. Moreover, the only use case for displaying secrets are backups,
+     * and backup tools use only direct input and ignore logs and error messages.
+     */
+    String formatForLogging(size_t max_length = 0) const
+    {
+        return formatWithPossiblyHidingSensitiveData(max_length, true, false);
+    }
+
+    String formatForErrorMessage() const
+    {
+        return formatWithPossiblyHidingSensitiveData(0, true, false);
+    }
 
-    /// If an AST has secret parts then formatForLogging() will replace them with the placeholder '[HIDDEN]'.
     virtual bool hasSecretParts() const { return childrenHaveSecretParts(); }
 
     void cloneChildren();
diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h
index 2489b108004..5f79a4b68f6 100644
--- a/src/Parsers/ParserCreateQuery.h
+++ b/src/Parsers/ParserCreateQuery.h
@@ -244,7 +244,9 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
             auto default_function = std::make_shared<ASTFunction>();
             default_function->name = "defaultValueOfTypeName";
             default_function->arguments = std::make_shared<ASTExpressionList>();
-            default_function->arguments->children.emplace_back(std::make_shared<ASTLiteral>(type->as<ASTFunction>()->formatWithSecretsHidden()));
+            // Ephemeral columns don't really have secrets but we need to format
+            // into a String, hence the strange call
+            default_function->arguments->children.emplace_back(std::make_shared<ASTLiteral>(type->as<ASTFunction>()->formatForLogging()));
             default_expression = default_function;
         }
 
diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp
index 7d41ac0e5d3..e4f2595f83c 100644
--- a/src/Parsers/ParserSystemQuery.cpp
+++ b/src/Parsers/ParserSystemQuery.cpp
@@ -159,6 +159,14 @@ enum class SystemQueryTargetType
     if (!ParserStringLiteral{}.parse(pos, ast, expected))
         return false;
     res->replica = ast->as<ASTLiteral &>().value.safeGet<String>();
+
+    if (ParserKeyword{"FROM SHARD"}.ignore(pos, expected))
+    {
+        if (!ParserStringLiteral{}.parse(pos, ast, expected))
+            return false;
+        res->shard = ast->as<ASTLiteral &>().value.safeGet<String>();
+    }
+
     if (ParserKeyword{"FROM"}.ignore(pos, expected))
     {
         // way 1. parse replica database
diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp
index b452bd27642..19872c4189a 100644
--- a/src/Parsers/tests/gtest_Parser.cpp
+++ b/src/Parsers/tests/gtest_Parser.cpp
@@ -5,6 +5,7 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/Access/ASTCreateUserQuery.h>
 #include <Parsers/Access/ParserCreateUserQuery.h>
+#include <Parsers/Access/ASTAuthenticationData.h>
 #include <Parsers/ParserAlterQuery.h>
 #include <Parsers/ParserCreateQuery.h>
 #include <Parsers/ParserOptimizeQuery.h>
@@ -69,7 +70,7 @@ TEST_P(ParserTest, parseQuery)
             {
                 if (input_text.starts_with("ATTACH"))
                 {
-                    auto salt = (dynamic_cast<const ASTCreateUserQuery *>(ast.get())->auth_data)->getSalt();
+                    auto salt = (dynamic_cast<const ASTCreateUserQuery *>(ast.get())->auth_data)->getSalt().value_or("");
                     EXPECT_TRUE(std::regex_match(salt, std::regex(expected_ast)));
                 }
                 else
@@ -260,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest,
         ::testing::ValuesIn(std::initializer_list<ParserTestCase>{
         {
             "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'",
-            "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '[A-Za-z0-9]{64}' SALT '[A-Za-z0-9]{64}'"
+            "CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'"
         },
         {
             "CREATE USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'",
@@ -268,7 +269,7 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest,
         },
         {
             "ALTER USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'",
-            "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '[A-Za-z0-9]{64}' SALT '[A-Za-z0-9]{64}'"
+            "ALTER USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'"
         },
         {
             "ALTER USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'",
diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp
index 574b1ccbcd2..27f6a454b24 100644
--- a/src/Processors/Executors/ExecutingGraph.cpp
+++ b/src/Processors/Executors/ExecutingGraph.cpp
@@ -219,7 +219,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue
     std::stack<uint64_t> updated_processors;
     updated_processors.push(pid);
 
-    UpgradableMutex::ReadGuard read_lock(nodes_mutex);
+    std::shared_lock read_lock(nodes_mutex);
 
     while (!updated_processors.empty() || !updated_edges.empty())
     {
@@ -382,11 +382,14 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue
 
             if (need_expand_pipeline)
             {
+                // We do not need to upgrade lock atomically, so we can safely release shared_lock and acquire unique_lock
+                read_lock.unlock();
                 {
-                    UpgradableMutex::WriteGuard lock(read_lock);
+                    std::unique_lock lock(nodes_mutex);
                     if (!expandPipeline(updated_processors, pid))
                         return false;
                 }
+                read_lock.lock();
 
                 /// Add itself back to be prepared again.
                 updated_processors.push(pid);
diff --git a/src/Processors/Executors/ExecutingGraph.h b/src/Processors/Executors/ExecutingGraph.h
index 69acc7fb31b..1a7659d3e08 100644
--- a/src/Processors/Executors/ExecutingGraph.h
+++ b/src/Processors/Executors/ExecutingGraph.h
@@ -2,7 +2,7 @@
 
 #include <Processors/Port.h>
 #include <Processors/IProcessor.h>
-#include <Processors/Executors/UpgradableLock.h>
+#include <Common/SharedMutex.h>
 #include <mutex>
 #include <queue>
 #include <stack>
@@ -156,7 +156,7 @@ private:
     std::vector<bool> source_processors;
     std::mutex processors_mutex;
 
-    UpgradableMutex nodes_mutex;
+    SharedMutex nodes_mutex;
 
     const bool profile_processors;
     bool cancelled = false;
diff --git a/src/Processors/Executors/UpgradableLock.h b/src/Processors/Executors/UpgradableLock.h
deleted file mode 100644
index b5a31885424..00000000000
--- a/src/Processors/Executors/UpgradableLock.h
+++ /dev/null
@@ -1,175 +0,0 @@
-#pragma once
-#include <atomic>
-#include <cassert>
-#include <list>
-#include <mutex>
-#include <condition_variable>
-
-namespace DB
-{
-
-/// RWLock which allows to upgrade read lock to write lock.
-/// Read locks should be fast if there is no write lock.
-///
-/// Newly created write lock waits for all active read locks.
-/// Newly created read lock waits for all write locks. Starvation is possible.
-///
-/// Mutex must live longer than locks.
-/// Read lock must live longer than corresponding  write lock.
-///
-/// For every write lock, a new internal state is created inside mutex.
-/// This state is not deallocated until the destruction of mutex itself.
-///
-/// Usage example:
-///
-/// UpgradableMutex mutex;
-/// {
-///     UpgradableMutex::ReadLock read_lock(mutex);
-///     ...
-///     {
-///         UpgradableMutex::WriteLock write_lock(read_lock);
-///         ...
-///     }
-///     ...
-/// }
-class UpgradableMutex
-{
-private:
-    /// Implementation idea
-    ///
-    /// ----------- (read scope)
-    /// ++num_readers
-    /// ** wait for active writer (in loop, starvation is possible here) **
-    ///
-    /// =========== (write scope)
-    /// ** create new State **
-    /// ** wait for active writer (in loop, starvation is possible here) **
-    /// ** wait for all active readers **
-    ///
-    /// ** notify all waiting readers for the current state.
-    /// =========== (end write scope)
-    ///
-    /// --num_readers
-    /// ** notify current active writer **
-    /// ----------- (end read scope)
-    struct State
-    {
-        size_t num_waiting = 0;
-        bool is_done = false;
-
-        std::mutex mutex;
-        std::condition_variable read_condvar;
-        std::condition_variable write_condvar;
-
-        void wait() noexcept
-        {
-            std::unique_lock lock(mutex);
-            ++num_waiting;
-            write_condvar.notify_one();
-            while (!is_done)
-                read_condvar.wait(lock);
-        }
-
-        void lock(std::atomic_size_t & num_readers_) noexcept
-        {
-            /// Note : num_locked is an atomic
-            /// which can change it's value without locked mutex.
-            /// We support an invariant that after changing num_locked value,
-            /// UpgradableMutex::write_state is checked, and in case of active
-            /// write lock, we always notify it's write condvar.
-            std::unique_lock lock(mutex);
-            ++num_waiting;
-            while (num_waiting < num_readers_.load())
-                write_condvar.wait(lock);
-        }
-
-        void unlock() noexcept
-        {
-            {
-                std::unique_lock lock(mutex);
-                is_done = true;
-            }
-            read_condvar.notify_all();
-        }
-    };
-
-    std::atomic_size_t num_readers = 0;
-
-    std::list<State> states;
-    std::mutex states_mutex;
-    std::atomic<State *> write_state{nullptr};
-
-    void lock() noexcept
-    {
-        ++num_readers;
-        while (auto * state = write_state.load())
-            state->wait();
-    }
-
-    void unlock() noexcept
-    {
-        --num_readers;
-        while (auto * state = write_state.load())
-            state->write_condvar.notify_one();
-    }
-
-    State * allocState()
-    {
-        std::lock_guard guard(states_mutex);
-        return &states.emplace_back();
-    }
-
-    void upgrade(State & state) noexcept
-    {
-        State * expected = nullptr;
-
-        /// Only change nullptr -> state is possible.
-        while (!write_state.compare_exchange_strong(expected, &state))
-        {
-            expected->wait();
-            expected = nullptr;
-        }
-
-        state.lock(num_readers);
-    }
-
-    void degrade(State & state) noexcept
-    {
-        State * my = write_state.exchange(nullptr);
-        if (&state != my)
-            std::terminate();
-        state.unlock();
-    }
-
-public:
-    class ReadGuard
-    {
-    public:
-        explicit ReadGuard(UpgradableMutex & lock_) : lock(lock_) { lock.lock(); }
-        ~ReadGuard() { lock.unlock(); }
-
-        UpgradableMutex & lock;
-    };
-
-    class WriteGuard
-    {
-    public:
-        explicit WriteGuard(ReadGuard & read_guard_) : read_guard(read_guard_)
-        {
-            state = read_guard.lock.allocState();
-            read_guard.lock.upgrade(*state);
-        }
-
-        ~WriteGuard()
-        {
-            if (state)
-                read_guard.lock.degrade(*state);
-        }
-
-    private:
-        ReadGuard & read_guard;
-        State * state = nullptr;
-    };
-};
-
-}
diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
index 4e5aaab5dcb..fddcd059be5 100644
--- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
+++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.h
@@ -2,7 +2,6 @@
 
 #include <Processors/Formats/IOutputFormat.h>
 
-#include <Common/Arena.h>
 #include <Common/ThreadPool.h>
 #include <Common/Stopwatch.h>
 #include <Common/logger_useful.h>
diff --git a/src/Processors/Merges/AggregatingSortedTransform.h b/src/Processors/Merges/AggregatingSortedTransform.h
index b7a88cca952..c6d7e844c65 100644
--- a/src/Processors/Merges/AggregatingSortedTransform.h
+++ b/src/Processors/Merges/AggregatingSortedTransform.h
@@ -13,14 +13,18 @@ class AggregatingSortedTransform final : public IMergingTransform<AggregatingSor
 {
 public:
     AggregatingSortedTransform(
-        const Block & header, size_t num_inputs,
-        SortDescription description_, size_t max_block_size)
+        const Block & header,
+        size_t num_inputs,
+        SortDescription description_,
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes)
         : IMergingTransform(
             num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false,
             header,
             num_inputs,
             std::move(description_),
-            max_block_size)
+            max_block_size_rows,
+            max_block_size_bytes)
     {
     }
 
diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
index ef103eb508c..3525a5cab77 100644
--- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
@@ -159,8 +159,11 @@ AggregatingSortedAlgorithm::SimpleAggregateDescription::~SimpleAggregateDescript
 
 
 AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData(
-    MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_)
-    : MergedData(std::move(columns_), false, max_block_size_), def(def_)
+    MutableColumns columns_,
+    UInt64 max_block_size_rows_,
+    UInt64 max_block_size_bytes_,
+    ColumnsDefinition & def_)
+    : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_), def(def_)
 {
     initAggregateDescription();
 
@@ -168,7 +171,7 @@ AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData(
     if (def.allocates_memory_in_arena)
     {
         arena = std::make_unique<Arena>();
-        arena_size = arena->size();
+        arena_size = arena->allocatedBytes();
     }
 }
 
@@ -194,10 +197,10 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::startGroup(const ColumnR
     /// To avoid this, reset arena if and only if:
     /// - arena is required (i.e. SimpleAggregateFunction(any, String) in PK),
     /// - arena was used in the previous groups.
-    if (def.allocates_memory_in_arena && arena->size() > arena_size)
+    if (def.allocates_memory_in_arena && arena->allocatedBytes() > arena_size)
     {
         arena = std::make_unique<Arena>();
-        arena_size = arena->size();
+        arena_size = arena->allocatedBytes();
     }
 
     is_group_started = true;
@@ -257,10 +260,14 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::initAggregateDescription
 
 
 AggregatingSortedAlgorithm::AggregatingSortedAlgorithm(
-    const Block & header_, size_t num_inputs, SortDescription description_, size_t max_block_size)
+    const Block & header_,
+    size_t num_inputs,
+    SortDescription description_,
+    size_t max_block_size_rows_,
+    size_t max_block_size_bytes_)
     : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, description_)
     , columns_definition(defineColumns(header_, description_))
-    , merged_data(getMergedColumns(header_, columns_definition), max_block_size, columns_definition)
+    , merged_data(getMergedColumns(header_, columns_definition), max_block_size_rows_, max_block_size_bytes_, columns_definition)
 {
 }
 
diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
index d670242ed81..456b94c69ce 100644
--- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
@@ -18,8 +18,11 @@ class AggregatingSortedAlgorithm final : public IMergingAlgorithmWithDelayedChun
 {
 public:
     AggregatingSortedAlgorithm(
-        const Block & header, size_t num_inputs,
-        SortDescription description_, size_t max_block_size);
+        const Block & header,
+        size_t num_inputs,
+        SortDescription description_,
+        size_t max_block_size_rows_,
+        size_t max_block_size_bytes_);
 
     void initialize(Inputs inputs) override;
     void consume(Input & input, size_t source_num) override;
@@ -96,7 +99,11 @@ private:
         using MergedData::insertRow;
 
     public:
-        AggregatingMergedData(MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_);
+        AggregatingMergedData(
+            MutableColumns columns_,
+            UInt64 max_block_size_rows_,
+            UInt64 max_block_size_bytes_,
+            ColumnsDefinition & def_);
 
         /// Group is a group of rows with the same sorting key. It represents single row in result.
         /// Algorithm is: start group, add several rows, finish group.
diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp
index 0c23dd51f3c..1b47cb824d1 100644
--- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp
@@ -26,12 +26,13 @@ CollapsingSortedAlgorithm::CollapsingSortedAlgorithm(
     SortDescription description_,
     const String & sign_column,
     bool only_positive_sign_,
-    size_t max_block_size,
+    size_t max_block_size_rows_,
+    size_t max_block_size_bytes_,
     Poco::Logger * log_,
     WriteBuffer * out_row_sources_buf_,
     bool use_average_block_sizes)
     : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs)
-    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
+    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)
     , sign_column_number(header_.getPositionByName(sign_column))
     , only_positive_sign(only_positive_sign_)
     , log(log_)
diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h
index f457af05bd5..c90ddbbab5d 100644
--- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h
@@ -32,7 +32,8 @@ public:
         SortDescription description_,
         const String & sign_column,
         bool only_positive_sign_, /// For select final. Skip rows with sum(sign) < 0.
-        size_t max_block_size,
+        size_t max_block_size_rows_,
+        size_t max_block_size_bytes_,
         Poco::Logger * log_,
         WriteBuffer * out_row_sources_buf_ = nullptr,
         bool use_average_block_sizes = false);
@@ -74,4 +75,3 @@ private:
 };
 
 }
-
diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
index aef1e9c70da..5f9725c804b 100644
--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
@@ -30,9 +30,9 @@ FinishAggregatingInOrderAlgorithm::FinishAggregatingInOrderAlgorithm(
     size_t num_inputs_,
     AggregatingTransformParamsPtr params_,
     const SortDescription & description_,
-    size_t max_block_size_,
-    size_t max_block_bytes_)
-    : header(header_), num_inputs(num_inputs_), params(params_), max_block_size(max_block_size_), max_block_bytes(max_block_bytes_)
+    size_t max_block_size_rows_,
+    size_t max_block_size_bytes_)
+    : header(header_), num_inputs(num_inputs_), params(params_), max_block_size_rows(max_block_size_rows_), max_block_size_bytes(max_block_size_bytes_)
 {
     for (const auto & column_description : description_)
         description.emplace_back(column_description, header_.getPositionByName(column_description.column_name));
@@ -118,7 +118,7 @@ IMergingAlgorithm::Status FinishAggregatingInOrderAlgorithm::merge()
     inputs_to_update.pop_back();
 
     /// Do not merge blocks, if there are too few rows or bytes.
-    if (accumulated_rows >= max_block_size || accumulated_bytes >= max_block_bytes)
+    if (accumulated_rows >= max_block_size_rows || accumulated_bytes >= max_block_size_bytes)
         status.chunk = prepareToMerge();
 
     return status;
diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
index b1a74a09459..13522b70834 100644
--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h
@@ -42,8 +42,8 @@ public:
         size_t num_inputs_,
         AggregatingTransformParamsPtr params_,
         const SortDescription & description_,
-        size_t max_block_size_,
-        size_t max_block_bytes_);
+        size_t max_block_size_rows_,
+        size_t max_block_size_bytes_);
 
     void initialize(Inputs inputs) override;
     void consume(Input & input, size_t source_num) override;
@@ -79,8 +79,8 @@ private:
     size_t num_inputs;
     AggregatingTransformParamsPtr params;
     SortDescriptionWithPositions description;
-    size_t max_block_size;
-    size_t max_block_bytes;
+    size_t max_block_size_rows;
+    size_t max_block_size_bytes;
 
     Inputs current_inputs;
 
diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp
index 123748f9b43..814625d7aee 100644
--- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp
@@ -42,11 +42,12 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm(
     const Block & header_,
     size_t num_inputs,
     SortDescription description_,
-    size_t max_block_size,
+    size_t max_block_size_rows_,
+    size_t max_block_size_bytes_,
     Graphite::Params params_,
     time_t time_of_merge_)
     : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs)
-    , merged_data(header_.cloneEmptyColumns(), false, max_block_size)
+    , merged_data(header_.cloneEmptyColumns(), false, max_block_size_rows_, max_block_size_bytes_)
     , params(std::move(params_))
     , time_of_merge(time_of_merge_)
 {
diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
index d6d2f66fb82..f920d623b1f 100644
--- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
@@ -22,9 +22,13 @@ class GraphiteRollupSortedAlgorithm final : public IMergingAlgorithmWithSharedCh
 {
 public:
     GraphiteRollupSortedAlgorithm(
-        const Block & header, size_t num_inputs,
-        SortDescription description_, size_t max_block_size,
-        Graphite::Params params_, time_t time_of_merge_);
+        const Block & header,
+        size_t num_inputs,
+        SortDescription description_,
+        size_t max_block_size_rows_,
+        size_t max_block_size_bytes_,
+        Graphite::Params params_,
+        time_t time_of_merge_);
 
     Status merge() override;
 
diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h
index f4ef0b77c53..f92d20d22e1 100644
--- a/src/Processors/Merges/Algorithms/MergedData.h
+++ b/src/Processors/Merges/Algorithms/MergedData.h
@@ -19,8 +19,8 @@ namespace ErrorCodes
 class MergedData
 {
 public:
-    explicit MergedData(MutableColumns columns_, bool use_average_block_size_, UInt64 max_block_size_)
-        : columns(std::move(columns_)), max_block_size(max_block_size_), use_average_block_size(use_average_block_size_)
+    explicit MergedData(MutableColumns columns_, bool use_average_block_size_, UInt64 max_block_size_, UInt64 max_block_size_bytes_)
+        : columns(std::move(columns_)), max_block_size(max_block_size_), max_block_size_bytes(max_block_size_bytes_), use_average_block_size(use_average_block_size_)
     {
     }
 
@@ -117,6 +117,16 @@ public:
         if (merged_rows >= max_block_size)
             return true;
 
+        /// Never return more than max_block_size_bytes
+        if (max_block_size_bytes)
+        {
+            size_t merged_bytes = 0;
+            for (const auto & column : columns)
+                merged_bytes += column->allocatedBytes();
+            if (merged_bytes >= max_block_size_bytes)
+                return true;
+        }
+
         if (!use_average_block_size)
             return false;
 
@@ -143,8 +153,9 @@ protected:
     UInt64 total_chunks = 0;
     UInt64 total_allocated_bytes = 0;
 
-    const UInt64 max_block_size;
-    const bool use_average_block_size;
+    const UInt64 max_block_size = 0;
+    const UInt64 max_block_size_bytes = 0;
+    const bool use_average_block_size = false;
 
     bool need_flush = false;
 };
diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp
index 77db1e06d06..1debfcec8e0 100644
--- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp
@@ -11,13 +11,14 @@ MergingSortedAlgorithm::MergingSortedAlgorithm(
     Block header_,
     size_t num_inputs,
     const SortDescription & description_,
-    size_t max_block_size,
+    size_t max_block_size_,
+    size_t max_block_size_bytes_,
     SortingQueueStrategy sorting_queue_strategy_,
     UInt64 limit_,
     WriteBuffer * out_row_sources_buf_,
     bool use_average_block_sizes)
     : header(std::move(header_))
-    , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
+    , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size_, max_block_size_bytes_)
     , description(description_)
     , limit(limit_)
     , out_row_sources_buf(out_row_sources_buf_)
diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h
index 2537c48b128..1357e58f0f1 100644
--- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h
@@ -17,7 +17,8 @@ public:
         Block header_,
         size_t num_inputs,
         const SortDescription & description_,
-        size_t max_block_size,
+        size_t max_block_size_,
+        size_t max_block_size_bytes_,
         SortingQueueStrategy sorting_queue_strategy_,
         UInt64 limit_ = 0,
         WriteBuffer * out_row_sources_buf_ = nullptr,
diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
index e8d1f836591..db770de858c 100644
--- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
@@ -17,12 +17,13 @@ ReplacingSortedAlgorithm::ReplacingSortedAlgorithm(
     SortDescription description_,
     const String & is_deleted_column,
     const String & version_column,
-    size_t max_block_size,
+    size_t max_block_size_rows,
+    size_t max_block_size_bytes,
     WriteBuffer * out_row_sources_buf_,
     bool use_average_block_sizes,
     bool cleanup_)
     : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs)
-    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size), cleanup(cleanup_)
+    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes), cleanup(cleanup_)
 {
     if (!is_deleted_column.empty())
         is_deleted_column_number = header_.getPositionByName(is_deleted_column);
diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h
index 6b9fb8f98c5..4d8de55b032 100644
--- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h
@@ -23,7 +23,8 @@ public:
         SortDescription description_,
         const String & is_deleted_column,
         const String & version_column,
-        size_t max_block_size,
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes,
         WriteBuffer * out_row_sources_buf_ = nullptr,
         bool use_average_block_sizes = false,
         bool cleanup = false);
diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
index c45613f18e5..7dac5715f95 100644
--- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
@@ -497,8 +497,8 @@ static void setRow(Row & row, const ColumnRawPtrs & raw_columns, size_t row_num,
 
 
 SummingSortedAlgorithm::SummingMergedData::SummingMergedData(
-    MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_)
-    : MergedData(std::move(columns_), false, max_block_size_)
+    MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_)
+    : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_)
     , def(def_)
 {
     current_row.resize(def.column_names.size());
@@ -508,7 +508,7 @@ SummingSortedAlgorithm::SummingMergedData::SummingMergedData(
     if (def.allocates_memory_in_arena)
     {
         arena = std::make_unique<Arena>();
-        arena_size = arena->size();
+        arena_size = arena->allocatedBytes();
     }
 }
 
@@ -522,10 +522,10 @@ void SummingSortedAlgorithm::SummingMergedData::startGroup(ColumnRawPtrs & raw_c
     for (auto & desc : def.columns_to_aggregate)
         desc.createState();
 
-    if (def.allocates_memory_in_arena && arena->size() > arena_size)
+    if (def.allocates_memory_in_arena && arena->allocatedBytes() > arena_size)
     {
         arena = std::make_unique<Arena>();
-        arena_size = arena->size();
+        arena_size = arena->allocatedBytes();
     }
 
     if (def.maps_to_sum.empty())
@@ -686,10 +686,11 @@ SummingSortedAlgorithm::SummingSortedAlgorithm(
     SortDescription description_,
     const Names & column_names_to_sum,
     const Names & partition_key_columns,
-    size_t max_block_size)
+    size_t max_block_size_rows,
+    size_t max_block_size_bytes)
     : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, std::move(description_))
     , columns_definition(defineColumns(header_, description, column_names_to_sum, partition_key_columns))
-    , merged_data(getMergedDataColumns(header_, columns_definition), max_block_size, columns_definition)
+    , merged_data(getMergedDataColumns(header_, columns_definition), max_block_size_rows, max_block_size_bytes, columns_definition)
 {
 }
 
diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
index c77bf7c0ba5..8943e235729 100644
--- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
@@ -22,7 +22,8 @@ public:
         const Names & column_names_to_sum,
         /// List of partition key columns. They have to be excluded.
         const Names & partition_key_columns,
-        size_t max_block_size);
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes);
 
     void initialize(Inputs inputs) override;
     void consume(Input & input, size_t source_num) override;
@@ -63,7 +64,7 @@ public:
         using MergedData::insertRow;
 
     public:
-        SummingMergedData(MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_);
+        SummingMergedData(MutableColumns columns_, UInt64 max_block_size_rows, UInt64 max_block_size_bytes_, ColumnsDefinition & def_);
 
         void startGroup(ColumnRawPtrs & raw_columns, size_t row);
         void finishGroup();
diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp
index cbafa53d0a3..e7a431dc1d0 100644
--- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp
@@ -12,13 +12,14 @@ VersionedCollapsingAlgorithm::VersionedCollapsingAlgorithm(
     size_t num_inputs,
     SortDescription description_,
     const String & sign_column_,
-    size_t max_block_size,
+    size_t max_block_size_rows_,
+    size_t max_block_size_bytes_,
     WriteBuffer * out_row_sources_buf_,
     bool use_average_block_sizes)
     : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE)
-    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
+    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)
     /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer. 3 is a reasonable minimum size to collapse anything.
-    , max_rows_in_queue(std::min(std::max<size_t>(3, max_block_size), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 1)
+    , max_rows_in_queue(std::min(std::max<size_t>(3, max_block_size_rows_), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 1)
     , current_keys(max_rows_in_queue)
 {
     sign_column_number = header_.getPositionByName(sign_column_);
diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h
index 2226762d541..578100f080d 100644
--- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h
@@ -20,7 +20,8 @@ public:
     VersionedCollapsingAlgorithm(
         const Block & header, size_t num_inputs,
         SortDescription description_, const String & sign_column_,
-        size_t max_block_size,
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes,
         WriteBuffer * out_row_sources_buf_ = nullptr,
         bool use_average_block_sizes = false);
 
diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h
index abe3eefb401..b0cb6bc6d62 100644
--- a/src/Processors/Merges/CollapsingSortedTransform.h
+++ b/src/Processors/Merges/CollapsingSortedTransform.h
@@ -16,7 +16,8 @@ public:
         SortDescription description_,
         const String & sign_column,
         bool only_positive_sign,
-        size_t max_block_size,
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes,
         WriteBuffer * out_row_sources_buf_ = nullptr,
         bool use_average_block_sizes = false)
         : IMergingTransform(
@@ -26,7 +27,8 @@ public:
             std::move(description_),
             sign_column,
             only_positive_sign,
-            max_block_size,
+            max_block_size_rows,
+            max_block_size_bytes,
             &Poco::Logger::get("CollapsingSortedTransform"),
             out_row_sources_buf_,
             use_average_block_sizes)
diff --git a/src/Processors/Merges/FinishAggregatingInOrderTransform.h b/src/Processors/Merges/FinishAggregatingInOrderTransform.h
index b82a103fee0..0960b9d4127 100644
--- a/src/Processors/Merges/FinishAggregatingInOrderTransform.h
+++ b/src/Processors/Merges/FinishAggregatingInOrderTransform.h
@@ -17,16 +17,16 @@ public:
         size_t num_inputs,
         AggregatingTransformParamsPtr params,
         SortDescription description,
-        size_t max_block_size,
-        size_t max_block_bytes)
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes)
         : IMergingTransform(
             num_inputs, header, {}, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false,
             header,
             num_inputs,
             params,
             std::move(description),
-            max_block_size,
-            max_block_bytes)
+            max_block_size_rows,
+            max_block_size_bytes)
     {
     }
 
diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.h b/src/Processors/Merges/GraphiteRollupSortedTransform.h
index f3c391c77ce..b69feff1fb6 100644
--- a/src/Processors/Merges/GraphiteRollupSortedTransform.h
+++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h
@@ -11,15 +11,20 @@ class GraphiteRollupSortedTransform final : public IMergingTransform<GraphiteRol
 {
 public:
     GraphiteRollupSortedTransform(
-        const Block & header, size_t num_inputs,
-        SortDescription description_, size_t max_block_size,
-        Graphite::Params params_, time_t time_of_merge_)
+        const Block & header,
+        size_t num_inputs,
+        SortDescription description_,
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes,
+        Graphite::Params params_,
+        time_t time_of_merge_)
         : IMergingTransform(
             num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false,
             header,
             num_inputs,
             std::move(description_),
-            max_block_size,
+            max_block_size_rows,
+            max_block_size_bytes,
             std::move(params_),
             time_of_merge_)
     {
diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp
index 2e5eda9b54b..572a5204df7 100644
--- a/src/Processors/Merges/MergingSortedTransform.cpp
+++ b/src/Processors/Merges/MergingSortedTransform.cpp
@@ -11,7 +11,8 @@ MergingSortedTransform::MergingSortedTransform(
     const Block & header,
     size_t num_inputs,
     const SortDescription & description_,
-    size_t max_block_size,
+    size_t max_block_size_rows,
+    size_t max_block_size_bytes,
     SortingQueueStrategy sorting_queue_strategy,
     UInt64 limit_,
     bool always_read_till_end_,
@@ -29,7 +30,8 @@ MergingSortedTransform::MergingSortedTransform(
         header,
         num_inputs,
         description_,
-        max_block_size,
+        max_block_size_rows,
+        max_block_size_bytes,
         sorting_queue_strategy,
         limit_,
         out_row_sources_buf_,
diff --git a/src/Processors/Merges/MergingSortedTransform.h b/src/Processors/Merges/MergingSortedTransform.h
index 3042550d5d5..2b53939f309 100644
--- a/src/Processors/Merges/MergingSortedTransform.h
+++ b/src/Processors/Merges/MergingSortedTransform.h
@@ -15,7 +15,8 @@ public:
         const Block & header,
         size_t num_inputs,
         const SortDescription & description,
-        size_t max_block_size,
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes,
         SortingQueueStrategy sorting_queue_strategy,
         UInt64 limit_ = 0,
         bool always_read_till_end_ = false,
diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h
index 8289f102cb7..9cd2f29a862 100644
--- a/src/Processors/Merges/ReplacingSortedTransform.h
+++ b/src/Processors/Merges/ReplacingSortedTransform.h
@@ -15,7 +15,8 @@ public:
         const Block & header, size_t num_inputs,
         SortDescription description_,
         const String & is_deleted_column, const String & version_column,
-        size_t max_block_size,
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes,
         WriteBuffer * out_row_sources_buf_ = nullptr,
         bool use_average_block_sizes = false,
         bool cleanup = false)
@@ -26,7 +27,8 @@ public:
             std::move(description_),
             is_deleted_column,
             version_column,
-            max_block_size,
+            max_block_size_rows,
+            max_block_size_bytes,
             out_row_sources_buf_,
             use_average_block_sizes,
             cleanup)
diff --git a/src/Processors/Merges/SummingSortedTransform.h b/src/Processors/Merges/SummingSortedTransform.h
index 204224ecf06..70ddebfea95 100644
--- a/src/Processors/Merges/SummingSortedTransform.h
+++ b/src/Processors/Merges/SummingSortedTransform.h
@@ -17,7 +17,9 @@ public:
         /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken.
         const Names & column_names_to_sum,
         const Names & partition_key_columns,
-        size_t max_block_size)
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes
+        )
         : IMergingTransform(
             num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false,
             header,
@@ -25,7 +27,8 @@ public:
             std::move(description_),
             column_names_to_sum,
             partition_key_columns,
-            max_block_size)
+            max_block_size_rows,
+            max_block_size_bytes)
     {
     }
 
diff --git a/src/Processors/Merges/VersionedCollapsingTransform.h b/src/Processors/Merges/VersionedCollapsingTransform.h
index e7eb164f515..18244469bd7 100644
--- a/src/Processors/Merges/VersionedCollapsingTransform.h
+++ b/src/Processors/Merges/VersionedCollapsingTransform.h
@@ -15,7 +15,8 @@ public:
     VersionedCollapsingTransform(
         const Block & header, size_t num_inputs,
         SortDescription description_, const String & sign_column_,
-        size_t max_block_size,
+        size_t max_block_size_rows,
+        size_t max_block_size_bytes,
         WriteBuffer * out_row_sources_buf_ = nullptr,
         bool use_average_block_sizes = false)
         : IMergingTransform(
@@ -24,7 +25,8 @@ public:
             num_inputs,
             std::move(description_),
             sign_column_,
-            max_block_size,
+            max_block_size_rows,
+            max_block_size_bytes,
             out_row_sources_buf_,
             use_average_block_sizes)
     {
diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp
index 9eec3e90494..b5d773d889f 100644
--- a/src/Processors/QueryPlan/CreatingSetsStep.cpp
+++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp
@@ -1,3 +1,4 @@
+#include <exception>
 #include <Processors/QueryPlan/CreatingSetsStep.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@@ -133,7 +134,11 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::SubqueriesForSets
     for (auto & [description, subquery_for_set] : subqueries_for_sets)
     {
         if (!subquery_for_set.hasSource())
+        {
+            subquery_for_set.promise_to_fill_set.set_exception(std::make_exception_ptr(
+                Exception(ErrorCodes::LOGICAL_ERROR, "Subquery for set {} has no source", subquery_for_set.key)));
             continue;
+        }
 
         auto plan = subquery_for_set.detachSource();
 
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index f0795aebf64..931e1ae1dd8 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -582,6 +582,9 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
     ContextPtr context = reading->getContext();
     MergeTreeDataSelectExecutor reader(reading->getMergeTreeData());
 
+    auto ordinary_reading_select_result = reading->selectRangesToRead(parts);
+    size_t ordinary_reading_marks = ordinary_reading_select_result->marks();
+
     /// Selecting best candidate.
     for (auto & candidate : candidates.real)
     {
@@ -597,12 +600,18 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &
         if (!analyzed)
             continue;
 
+        if (candidate.sum_marks > ordinary_reading_marks)
+            continue;
+
         if (best_candidate == nullptr || best_candidate->sum_marks > candidate.sum_marks)
             best_candidate = &candidate;
     }
 
     if (!best_candidate)
+    {
+        reading->setAnalyzedResult(std::move(ordinary_reading_select_result));
         return false;
+    }
 
     QueryPlanStepPtr projection_reading;
     bool has_ordinary_parts;
diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp
index fa2414ec885..2080c31d253 100644
--- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp
@@ -6,12 +6,14 @@
 
 #include <Interpreters/getColumnFromBlock.h>
 #include <Interpreters/inplaceBlockConversions.h>
+#include <Interpreters/InterpreterSelectQuery.h>
 #include <Storages/StorageSnapshot.h>
 #include <Storages/StorageMemory.h>
 
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Processors/ISource.h>
+#include <Processors/Sources/NullSource.h>
 
 namespace DB
 {
@@ -93,29 +95,39 @@ private:
     InitializerFunc initializer_func;
 };
 
-ReadFromMemoryStorageStep::ReadFromMemoryStorageStep(Pipe pipe_) :
-    SourceStepWithFilter(DataStream{.header = pipe_.getHeader()}),
-    pipe(std::move(pipe_))
+ReadFromMemoryStorageStep::ReadFromMemoryStorageStep(const Names & columns_to_read_,
+                                                     const StorageSnapshotPtr & storage_snapshot_,
+                                                     const size_t num_streams_,
+                                                     const bool delay_read_for_global_sub_queries_) :
+    SourceStepWithFilter(DataStream{.header=storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}),
+    columns_to_read(columns_to_read_),
+    storage_snapshot(storage_snapshot_),
+    num_streams(num_streams_),
+    delay_read_for_global_sub_queries(delay_read_for_global_sub_queries_)
 {
 }
 
 void ReadFromMemoryStorageStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    // use move - make sure that the call will only be made once.
+    auto pipe = makePipe();
+
+    if (pipe.empty())
+    {
+        assert(output_stream != std::nullopt);
+        pipe = Pipe(std::make_shared<NullSource>(output_stream->header));
+    }
+
     pipeline.init(std::move(pipe));
 }
 
-Pipe ReadFromMemoryStorageStep::makePipe(const Names & columns_to_read_,
-              const StorageSnapshotPtr & storage_snapshot_,
-              size_t num_streams_,
-              const bool delay_read_for_global_sub_queries_)
+Pipe ReadFromMemoryStorageStep::makePipe()
 {
-    storage_snapshot_->check(columns_to_read_);
+    storage_snapshot->check(columns_to_read);
 
-    const auto & snapshot_data = assert_cast<const StorageMemory::SnapshotData &>(*storage_snapshot_->data);
+    const auto & snapshot_data = assert_cast<const StorageMemory::SnapshotData &>(*storage_snapshot->data);
     auto current_data = snapshot_data.blocks;
 
-    if (delay_read_for_global_sub_queries_)
+    if (delay_read_for_global_sub_queries)
     {
         /// Note: for global subquery we use single source.
         /// Mainly, the reason is that at this point table is empty,
@@ -126,8 +138,8 @@ Pipe ReadFromMemoryStorageStep::makePipe(const Names & columns_to_read_,
         /// Since no other manipulation with data is done, multiple sources shouldn't give any profit.
 
         return Pipe(std::make_shared<MemorySource>(
-            columns_to_read_,
-            storage_snapshot_,
+            columns_to_read,
+            storage_snapshot,
             nullptr /* data */,
             nullptr /* parallel execution index */,
             [current_data](std::shared_ptr<const Blocks> & data_to_initialize)
@@ -138,16 +150,16 @@ Pipe ReadFromMemoryStorageStep::makePipe(const Names & columns_to_read_,
 
     size_t size = current_data->size();
 
-    if (num_streams_ > size)
-        num_streams_ = size;
+    if (num_streams > size)
+        num_streams = size;
 
     Pipes pipes;
 
     auto parallel_execution_index = std::make_shared<std::atomic<size_t>>(0);
 
-    for (size_t stream = 0; stream < num_streams_; ++stream)
+    for (size_t stream = 0; stream < num_streams; ++stream)
     {
-        pipes.emplace_back(std::make_shared<MemorySource>(columns_to_read_, storage_snapshot_, current_data, parallel_execution_index));
+        pipes.emplace_back(std::make_shared<MemorySource>(columns_to_read, storage_snapshot, current_data, parallel_execution_index));
     }
     return Pipe::unitePipes(std::move(pipes));
 }
diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h
index 652e729a97a..cec523ed58b 100644
--- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h
+++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.h
@@ -5,6 +5,7 @@
 #include <Interpreters/TreeRewriter.h>
 #include <Processors/QueryPlan/SourceStepWithFilter.h>
 #include <QueryPipeline/Pipe.h>
+#include <Storages/SelectQueryInfo.h>
 
 namespace DB
 {
@@ -14,7 +15,10 @@ class QueryPipelineBuilder;
 class ReadFromMemoryStorageStep final : public SourceStepWithFilter
 {
 public:
-    explicit ReadFromMemoryStorageStep(Pipe pipe_);
+    ReadFromMemoryStorageStep(const Names & columns_to_read_,
+                              const StorageSnapshotPtr & storage_snapshot_,
+                              size_t num_streams_,
+                              bool delay_read_for_global_sub_queries_);
 
     ReadFromMemoryStorageStep() = delete;
     ReadFromMemoryStorageStep(const ReadFromMemoryStorageStep &) = delete;
@@ -27,14 +31,15 @@ public:
 
     void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
 
-    static Pipe makePipe(const Names & columns_to_read_,
-                         const StorageSnapshotPtr & storage_snapshot_,
-                         size_t num_streams_,
-                         bool delay_read_for_global_sub_queries_);
-
 private:
     static constexpr auto name = "ReadFromMemoryStorage";
-    Pipe pipe;
+
+    Names columns_to_read;
+    StorageSnapshotPtr storage_snapshot;
+    size_t num_streams;
+    bool delay_read_for_global_sub_queries;
+
+    Pipe makePipe();
 };
 
 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 185ec9bace8..4af15350520 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -852,7 +852,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
             if (pipe.numOutputPorts() > 1)
             {
                 auto transform = std::make_shared<MergingSortedTransform>(
-                    pipe.getHeader(), pipe.numOutputPorts(), sort_description, max_block_size, SortingQueueStrategy::Batch);
+                    pipe.getHeader(), pipe.numOutputPorts(), sort_description, max_block_size, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch);
 
                 pipe.addTransform(std::move(transform));
             }
@@ -898,31 +898,31 @@ static void addMergingFinal(
         {
             case MergeTreeData::MergingParams::Ordinary:
                 return std::make_shared<MergingSortedTransform>(header, num_outputs,
-                            sort_description, max_block_size, SortingQueueStrategy::Batch);
+                            sort_description, max_block_size, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch);
 
             case MergeTreeData::MergingParams::Collapsing:
                 return std::make_shared<CollapsingSortedTransform>(header, num_outputs,
-                            sort_description, merging_params.sign_column, true, max_block_size);
+                            sort_description, merging_params.sign_column, true, max_block_size, /*max_block_size_bytes=*/0);
 
             case MergeTreeData::MergingParams::Summing:
                 return std::make_shared<SummingSortedTransform>(header, num_outputs,
-                            sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size);
+                            sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size, /*max_block_size_bytes=*/0);
 
             case MergeTreeData::MergingParams::Aggregating:
                 return std::make_shared<AggregatingSortedTransform>(header, num_outputs,
-                            sort_description, max_block_size);
+                            sort_description, max_block_size, /*max_block_size_bytes=*/0);
 
             case MergeTreeData::MergingParams::Replacing:
                 return std::make_shared<ReplacingSortedTransform>(header, num_outputs,
-                            sort_description, merging_params.is_deleted_column, merging_params.version_column, max_block_size, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false, /*cleanup*/ !merging_params.is_deleted_column.empty());
+                            sort_description, merging_params.is_deleted_column, merging_params.version_column, max_block_size, /*max_block_size_bytes=*/0, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false, /*cleanup*/ !merging_params.is_deleted_column.empty());
 
             case MergeTreeData::MergingParams::VersionedCollapsing:
                 return std::make_shared<VersionedCollapsingTransform>(header, num_outputs,
-                            sort_description, merging_params.sign_column, max_block_size);
+                            sort_description, merging_params.sign_column, max_block_size, /*max_block_size_bytes=*/0);
 
             case MergeTreeData::MergingParams::Graphite:
                 return std::make_shared<GraphiteRollupSortedTransform>(header, num_outputs,
-                            sort_description, max_block_size, merging_params.graphite_params, now);
+                            sort_description, max_block_size, /*max_block_size_bytes=*/0, merging_params.graphite_params, now);
         }
 
         UNREACHABLE();
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 4096599cc5c..ca603c53707 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -331,7 +331,7 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
         all_replicas_count = cluster->getShardsInfo().size();
     }
 
-    /// Find local shard
+    /// Find local shard. It might happen that there is no local shard, but that's fine
     for (const auto & shard: cluster->getShardsInfo())
     {
         if (shard.isLocal())
@@ -346,9 +346,6 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder
         }
     }
 
-    if (pipes.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "No local shard");
-
     auto current_shard = cluster->getShardsInfo().begin();
     while (pipes.size() != all_replicas_count)
     {
diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp
index db44da5a0fc..55ce763575e 100644
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@@ -176,6 +176,7 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr
             pipeline.getNumStreams(),
             result_sort_desc,
             sort_settings.max_block_size,
+            /*max_block_size_bytes=*/0,
             SortingQueueStrategy::Batch,
             limit_,
             always_read_till_end);
@@ -269,6 +270,7 @@ void SortingStep::fullSort(
             pipeline.getNumStreams(),
             result_sort_desc,
             sort_settings.max_block_size,
+            /*max_block_size_bytes=*/0,
             SortingQueueStrategy::Batch,
             limit_,
             always_read_till_end);
diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp
index ecf14a81c00..de77711d129 100644
--- a/src/Processors/Transforms/MergeSortingTransform.cpp
+++ b/src/Processors/Transforms/MergeSortingTransform.cpp
@@ -186,6 +186,7 @@ void MergeSortingTransform::consume(Chunk chunk)
                     0,
                     description,
                     max_merged_block_size,
+                    /*max_merged_block_size_bytes*/0,
                     SortingQueueStrategy::Batch,
                     limit,
                     /*always_read_till_end_=*/ false,
diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h
index f252d683b9a..350956c9c6b 100644
--- a/src/Processors/Transforms/TotalsHavingTransform.h
+++ b/src/Processors/Transforms/TotalsHavingTransform.h
@@ -2,14 +2,10 @@
 
 #include <Processors/ISimpleTransform.h>
 #include <Processors/Transforms/finalizeChunk.h>
-#include <Common/Arena.h>
 
 namespace DB
 {
 
-class Arena;
-using ArenaPtr = std::shared_ptr<Arena>;
-
 class ExpressionActions;
 using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
 
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 04a1f12f30a..d2cce2cb890 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -44,6 +44,8 @@ public:
     // Must insert the result for current_row.
     virtual void windowInsertResultInto(const WindowTransform * transform,
         size_t function_index) = 0;
+
+    virtual std::optional<WindowFrame> getDefaultFrame() const { return {}; }
 };
 
 // Compares ORDER BY column values at given rows to find the boundaries of frame:
@@ -222,6 +224,15 @@ WindowTransform::WindowTransform(const Block & input_header_,
         /// Currently we have slightly wrong mixup of the interfaces of Window and Aggregate functions.
         workspace.window_function_impl = dynamic_cast<IWindowFunction *>(const_cast<IAggregateFunction *>(aggregate_function.get()));
 
+        /// Some functions may have non-standard default frame.
+        /// Use it if it's the only function over the current window.
+        if (window_description.frame.is_default && functions.size() == 1 && workspace.window_function_impl)
+        {
+            auto custom_default_frame = workspace.window_function_impl->getDefaultFrame();
+            if (custom_default_frame)
+                window_description.frame = *custom_default_frame;
+        }
+
         workspace.aggregate_function_state.reset(
             aggregate_function->sizeOfData(),
             aggregate_function->alignOfData());
@@ -1977,18 +1988,23 @@ struct WindowFunctionNtile final : public WindowFunction
         : WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
     {
         if (argument_types.size() != 1)
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one parameter", name_);
-        }
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} takes exactly one argument", name_);
+
         auto type_id = argument_types[0]->getTypeId();
         if (type_id != TypeIndex::UInt8 && type_id != TypeIndex::UInt16 && type_id != TypeIndex::UInt32 && type_id != TypeIndex::UInt64)
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's argument type must be an unsigned integer (not larger then 64-bit), but got {}", argument_types[0]->getName());
-        }
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "'{}' argument type must be an unsigned integer (not larger than 64-bit), got {}", name_, argument_types[0]->getName());
     }
 
     bool allocatesMemoryInArena() const override { return false; }
 
+    std::optional<WindowFrame> getDefaultFrame() const override
+    {
+        WindowFrame frame;
+        frame.type = WindowFrame::FrameType::ROWS;
+        frame.end_type = WindowFrame::BoundaryType::Unbounded;
+        return frame;
+    }
+
     void windowInsertResultInto(const WindowTransform * transform,
         size_t function_index) override
     {
@@ -1999,7 +2015,7 @@ struct WindowFunctionNtile final : public WindowFunction
             const auto & workspace = transform->workspaces[function_index];
             const auto & arg_col = *current_block.original_input_columns[workspace.argument_column_indices[0]];
             if (!isColumnConst(arg_col))
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's argument must be a constant");
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of 'ntile' function must be a constant");
             auto type_id = argument_types[0]->getTypeId();
             if (type_id == TypeIndex::UInt8)
                 buckets = arg_col[transform->current_row.row].get<UInt8>();
@@ -2012,7 +2028,7 @@ struct WindowFunctionNtile final : public WindowFunction
 
             if (!buckets)
             {
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's argument must > 0");
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of 'ntile' funtcion must be greater than zero");
             }
         }
         // new partition
@@ -2090,22 +2106,16 @@ private:
     static void checkWindowFrameType(const WindowTransform * transform)
     {
         if (transform->order_by_indices.empty())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's window frame must have order by clause");
-        if (transform->window_description.frame.type != WindowFrame::FrameType::ROWS)
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's frame type must be ROWS");
-        }
-        if (transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded)
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's frame start type must be UNBOUNDED PRECEDING");
-        }
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for 'ntile' function must have ORDER BY clause");
 
-        if (transform->window_description.frame.end_type != WindowFrame::BoundaryType::Unbounded)
+        // We must wait all for the partition end and get the total rows number in this
+        // partition. So before the end of this partition, there is no any block could be
+        // dropped out.
+        bool is_frame_supported = transform->window_description.frame.begin_type == WindowFrame::BoundaryType::Unbounded
+            && transform->window_description.frame.end_type == WindowFrame::BoundaryType::Unbounded;
+        if (!is_frame_supported)
         {
-            // We must wait all for the partition end and get the total rows number in this
-            // partition. So before the end of this partition, there is no any block could be
-            // dropped out.
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "ntile's frame end type must be UNBOUNDED FOLLOWING");
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Window frame for function 'ntile' should be 'ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING'");
         }
     }
 };
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 0bdd7a88851..31bab46b868 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -4,7 +4,6 @@
 #include <Interpreters/InterpreterInsertQuery.h>
 #include <Interpreters/InterpreterSelectQuery.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>
-#include <Interpreters/ProcessList.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Processors/Transforms/SquashingChunksTransform.h>
 #include <Processors/Transforms/ExpressionTransform.h>
@@ -23,11 +22,11 @@
 #include <Common/ThreadStatus.h>
 #include <Common/checkStackSize.h>
 #include <Common/logger_useful.h>
-#include <base/scope_guard.h>
 
 #include <atomic>
 #include <chrono>
 
+
 namespace ProfileEvents
 {
     extern const Event SelectedBytes;
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h
index c7effa77d5b..0f413bee5c6 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.h
+++ b/src/Processors/Transforms/buildPushingToViewsChain.h
@@ -9,6 +9,7 @@
 #include <Common/Stopwatch.h>
 #include <Common/ThreadStatus.h>
 
+
 namespace Poco
 {
 class Logger;
diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp
index e83eaf3c111..8ca561bd309 100644
--- a/src/QueryPipeline/RemoteQueryExecutor.cpp
+++ b/src/QueryPipeline/RemoteQueryExecutor.cpp
@@ -710,9 +710,10 @@ void RemoteQueryExecutor::tryCancel(const char * reason)
     if (read_context)
         read_context->cancel();
 
-    /// Query could be cancelled during connection creation or query sending,
-    /// we should check if connections were already created and query were sent.
-    if (connections && sent_query)
+    /// Query could be cancelled during connection creation, query sending or data receiving.
+    /// We should send cancel request if connections were already created, query were sent
+    /// and remote query is not finished.
+    if (connections && sent_query && !finished)
     {
         connections->sendCancel();
         if (log)
diff --git a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp
index d968dae3ff8..bc22f249f97 100644
--- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp
+++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp
@@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest)
     EXPECT_EQ(pipe.numOutputPorts(), 3);
 
     auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
-        DEFAULT_MERGE_BLOCK_SIZE, SortingQueueStrategy::Batch, 0, false, nullptr, false, true);
+        8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, false, true);
 
     pipe.addTransform(std::move(transform));
 
@@ -125,7 +125,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes)
     EXPECT_EQ(pipe.numOutputPorts(), 3);
 
     auto transform = std::make_shared<MergingSortedTransform>(pipe.getHeader(), pipe.numOutputPorts(), sort_description,
-        DEFAULT_MERGE_BLOCK_SIZE, SortingQueueStrategy::Batch, 0, false, nullptr, false, true);
+        8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, false, true);
 
     pipe.addTransform(std::move(transform));
 
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index aff17465466..ae1f9172459 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -1090,7 +1090,11 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
                                                              "in a single ALTER query", backQuote(column_name));
 
             if (command.codec)
+            {
+                if (all_columns.hasAlias(column_name))
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS");
                 CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs);
+            }
             auto column_default = all_columns.getDefault(column_name);
             if (column_default)
             {
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index fa39e304925..21b140bd73a 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -659,6 +659,12 @@ bool ColumnsDescription::hasPhysical(const String & column_name) const
         it->default_desc.kind != ColumnDefaultKind::Alias && it->default_desc.kind != ColumnDefaultKind::Ephemeral;
 }
 
+bool ColumnsDescription::hasAlias(const String & column_name) const
+{
+    auto it = columns.get<1>().find(column_name);
+    return it != columns.get<1>().end() && it->default_desc.kind == ColumnDefaultKind::Alias;
+}
+
 bool ColumnsDescription::hasColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const
 {
     auto it = columns.get<1>().find(column_name);
diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h
index 5551fdea2e3..e5ec867cd64 100644
--- a/src/Storages/ColumnsDescription.h
+++ b/src/Storages/ColumnsDescription.h
@@ -177,6 +177,7 @@ public:
     Names getNamesOfPhysical() const;
 
     bool hasPhysical(const String & column_name) const;
+    bool hasAlias(const String & column_name) const;
     bool hasColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const;
     bool hasColumnOrNested(GetColumnsOptions::Kind kind, const String & column_name) const;
 
diff --git a/src/Storages/DataLakes/StorageIceberg.h b/src/Storages/DataLakes/StorageIceberg.h
index 6e93c732338..f1c9c485ef0 100644
--- a/src/Storages/DataLakes/StorageIceberg.h
+++ b/src/Storages/DataLakes/StorageIceberg.h
@@ -5,7 +5,7 @@
 #include <Storages/DataLakes/IcebergMetadataParser.h>
 #include "config.h"
 
-#if USE_AWS_S3
+#if USE_AWS_S3 && USE_AVRO
 #include <Storages/DataLakes/S3MetadataReader.h>
 #include <Storages/StorageS3.h>
 #endif
@@ -18,7 +18,7 @@ struct StorageIcebergName
     static constexpr auto name = "Iceberg";
 };
 
-#if USE_AWS_S3
+#if USE_AWS_S3 && USE_AVRO
 using StorageIcebergS3 = IStorageDataLake<StorageS3, StorageIcebergName, IcebergMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>>;
 #endif
 
diff --git a/src/Storages/DataLakes/registerDataLakes.cpp b/src/Storages/DataLakes/registerDataLakes.cpp
index 508cf9c8d80..1447a4777c5 100644
--- a/src/Storages/DataLakes/registerDataLakes.cpp
+++ b/src/Storages/DataLakes/registerDataLakes.cpp
@@ -31,11 +31,15 @@ void registerStorageDeltaLake(StorageFactory & factory)
 }
 #endif
 
+#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format.
+
 void registerStorageIceberg(StorageFactory & factory)
 {
     REGISTER_DATA_LAKE_STORAGE(StorageIcebergS3, StorageIcebergName::name)
 }
 
+#endif
+
 void registerStorageHudi(StorageFactory & factory)
 {
     REGISTER_DATA_LAKE_STORAGE(StorageHudiS3, StorageHudiName::name)
diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp
index 45f1df92e38..856d3eb2b27 100644
--- a/src/Storages/HDFS/StorageHDFSCluster.cpp
+++ b/src/Storages/HDFS/StorageHDFSCluster.cpp
@@ -102,7 +102,8 @@ Pipe StorageHDFSCluster::read(
         addColumnsStructureToQueryWithClusterEngine(
             query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 3, getName());
 
-    const auto & current_settings = context->getSettingsRef();
+    auto new_context = IStorageCluster::updateSettingsForTableFunctionCluster(context, context->getSettingsRef());
+    const auto & current_settings = new_context->getSettingsRef();
     auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
     for (const auto & shard_info : cluster->getShardsInfo())
     {
@@ -113,7 +114,7 @@ Pipe StorageHDFSCluster::read(
                 std::vector<IConnectionPool::Entry>{try_result},
                 queryToString(query_to_send),
                 header,
-                context,
+                new_context,
                 /*throttler=*/nullptr,
                 scalars,
                 Tables(),
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
index 87dcee9d9a6..98990b9e37b 100644
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@@ -136,8 +136,8 @@ void IStorage::read(
 
     /// parallelize processing if not yet
     const size_t output_ports = pipe.numOutputPorts();
-    const auto storage_name = getName();
-    if (parallelizeOutputAfterReading() && output_ports > 0 && output_ports < num_streams)
+    const bool parallelize_output = context->getSettingsRef().parallelize_output_from_storages;
+    if (parallelize_output && parallelizeOutputAfterReading() && output_ports > 0 && output_ports < num_streams)
         pipe.resize(num_streams);
 
     readFromPipe(query_plan, std::move(pipe), column_names, storage_snapshot, query_info, context, getName());
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 3cebb7d6820..5743d903630 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -178,6 +178,8 @@ public:
     /// Returns true if the storage is for system, which cannot be target of SHOW CREATE TABLE.
     virtual bool isSystemStorage() const { return false; }
 
+    /// Returns true if asynchronous inserts are enabled for table.
+    virtual bool areAsynchronousInsertsEnabled() const { return false; }
 
     /// Optional size information of each physical column.
     /// Currently it's only used by the MergeTree family for query optimizations.
@@ -512,7 +514,7 @@ public:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName());
     }
 
-    virtual void waitForMutation(const String & /*mutation_id*/)
+    virtual void waitForMutation(const String & /*mutation_id*/, bool /*wait_for_another_mutation*/)
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName());
     }
diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h
index 35d297428ba..03185e7aee6 100644
--- a/src/Storages/IStorageCluster.h
+++ b/src/Storages/IStorageCluster.h
@@ -23,6 +23,18 @@ public:
     virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const = 0;
 
     bool isRemote() const override { return true; }
+
+    static ContextPtr updateSettingsForTableFunctionCluster(ContextPtr context, const Settings & settings)
+    {
+        Settings new_settings = settings;
+
+        /// Cluster table functions should always skip unavailable shards.
+        new_settings.skip_unavailable_shards = true;
+
+        auto new_context = Context::createCopy(context);
+        new_context->setSettings(new_settings);
+        return new_context;
+    }
 };
 
 
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
index 09456088d74..ec00cc3d2b9 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
@@ -10,6 +10,7 @@
 #include <Backups/BackupEntryFromSmallFile.h>
 #include <Backups/BackupEntryFromImmutableFile.h>
 #include <Disks/SingleDiskVolume.h>
+#include <Storages/MergeTree/IMergeTreeDataPart.h>
 
 namespace DB
 {
@@ -415,6 +416,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze(
 
     disk->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt");
     disk->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt");
+    disk->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME);
 
     auto single_disk_volume = std::make_shared<SingleDiskVolume>(disk->getName(), disk, 0);
 
@@ -461,6 +463,7 @@ void DataPartStorageOnDiskBase::rename(
 
     if (volume->getDisk()->exists(to))
     {
+        /// FIXME it should be logical error
         if (remove_new_dir_if_exists)
         {
             Names files;
@@ -471,7 +474,8 @@ void DataPartStorageOnDiskBase::rename(
                     "Part directory {} already exists and contains {} files. Removing it.",
                     fullPath(volume->getDisk(), to), files.size());
 
-            executeWriteOperation([&](auto & disk) { disk.removeRecursive(to); });
+            /// Do not remove blobs if they exist
+            executeWriteOperation([&](auto & disk) { disk.removeSharedRecursive(to, true, {}); });
         }
         else
         {
@@ -574,6 +578,9 @@ void DataPartStorageOnDiskBase::remove(
             if (e.code() == ErrorCodes::FILE_DOESNT_EXIST)
             {
                 LOG_ERROR(log, "Directory {} (part to remove) doesn't exist or one of nested files has gone. Most likely this is due to manual removing. This should be discouraged. Ignoring.", fullPath(disk, from));
+                /// We will never touch this part again, so unlocking it from zero-copy
+                if (!can_remove_description)
+                    can_remove_description.emplace(can_remove_callback());
                 return;
             }
             throw;
@@ -584,6 +591,10 @@ void DataPartStorageOnDiskBase::remove(
             {
                 LOG_ERROR(log, "Directory {} (part to remove) doesn't exist or one of nested files has gone. "
                           "Most likely this is due to manual removing. This should be discouraged. Ignoring.", fullPath(disk, from));
+                /// We will never touch this part again, so unlocking it from zero-copy
+                if (!can_remove_description)
+                    can_remove_description.emplace(can_remove_callback());
+
                 return;
             }
             throw;
diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp
index 46c6d09eca4..0e707a82135 100644
--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@@ -821,6 +821,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
     const auto data_settings = data.getSettings();
     MergeTreeData::DataPart::Checksums data_checksums;
 
+    zkutil::EphemeralNodeHolderPtr zero_copy_temporary_lock_holder;
     if (to_remote_disk)
     {
         readStringBinary(part_id, in);
@@ -829,7 +830,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
             throw Exception(ErrorCodes::ZERO_COPY_REPLICATION_ERROR, "Part {} unique id {} doesn't exist on {} (with type {}).", part_name, part_id, disk->getName(), toString(disk->getDataSourceDescription().type));
 
         LOG_DEBUG(log, "Downloading part {} unique id {} metadata onto disk {}.", part_name, part_id, disk->getName());
-        data.lockSharedDataTemporary(part_name, part_id, disk);
+        zero_copy_temporary_lock_holder = data.lockSharedDataTemporary(part_name, part_id, disk);
     }
     else
     {
@@ -938,7 +939,6 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
 
     if (to_remote_disk)
     {
-        data.lockSharedData(*new_data_part, /* replace_existing_lock = */ true, {});
         LOG_DEBUG(log, "Download of part {} unique id {} metadata onto disk {} finished.", part_name, part_id, disk->getName());
     }
     else
@@ -948,6 +948,9 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk(
         LOG_DEBUG(log, "Download of part {} onto disk {} finished.", part_name, disk->getName());
     }
 
+    if (zero_copy_temporary_lock_holder)
+        zero_copy_temporary_lock_holder->setAlreadyRemoved();
+
     return new_data_part;
 }
 
diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp
index 996d2bc46a5..5741e11aa22 100644
--- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp
+++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp
@@ -24,7 +24,7 @@ template <typename T>
 std::optional<EphemeralLockInZooKeeper> createEphemeralLockInZooKeeper(
     const String & path_prefix_, const String & temp_path, const ZooKeeperWithFaultInjectionPtr & zookeeper_, const T & deduplication_path)
 {
-    constexpr bool async_insert = std::is_same_v<T, std::vector<String>>;
+    static constexpr bool async_insert = std::is_same_v<T, std::vector<String>>;
 
     String path;
 
@@ -42,16 +42,15 @@ std::optional<EphemeralLockInZooKeeper> createEphemeralLockInZooKeeper(
         if constexpr (async_insert)
         {
             for (const auto & single_dedup_path : deduplication_path)
-            {
-                ops.emplace_back(zkutil::makeCreateRequest(single_dedup_path, "", zkutil::CreateMode::Persistent));
-                ops.emplace_back(zkutil::makeRemoveRequest(single_dedup_path, -1));
-            }
+                zkutil::addCheckNotExistsRequest(ops, *zookeeper_, single_dedup_path);
         }
         else
         {
-            ops.emplace_back(zkutil::makeCreateRequest(deduplication_path, "", zkutil::CreateMode::Persistent));
-            ops.emplace_back(zkutil::makeRemoveRequest(deduplication_path, -1));
+            zkutil::addCheckNotExistsRequest(ops, *zookeeper_, deduplication_path);
         }
+
+        auto deduplication_path_ops_size = ops.size();
+
         ops.emplace_back(zkutil::makeCreateRequest(path_prefix_, holder_path, zkutil::CreateMode::EphemeralSequential));
         Coordination::Responses responses;
         Coordination::Error e = zookeeper_->tryMulti(ops, responses);
@@ -60,9 +59,10 @@ std::optional<EphemeralLockInZooKeeper> createEphemeralLockInZooKeeper(
             if constexpr (async_insert)
             {
                 auto failed_idx = zkutil::getFailedOpIndex(Coordination::Error::ZNODEEXISTS, responses);
-                if (failed_idx < deduplication_path.size() * 2)
+
+                if (failed_idx < deduplication_path_ops_size)
                 {
-                    const String & failed_op_path = deduplication_path[failed_idx / 2];
+                    const String & failed_op_path = ops[failed_idx]->getPath();
                     LOG_DEBUG(
                         &Poco::Logger::get("createEphemeralLockInZooKeeper"),
                         "Deduplication path already exists: deduplication_path={}",
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 148cbf93948..d3bc3cc70d9 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1357,14 +1357,6 @@ void IMergeTreeDataPart::loadColumns(bool require)
     else
     {
         loaded_metadata_version = metadata_snapshot->getMetadataVersion();
-
-        if (!is_readonly_storage)
-        {
-            writeMetadata(METADATA_VERSION_FILE_NAME, {}, [loaded_metadata_version](auto & buffer)
-            {
-                writeIntText(loaded_metadata_version, buffer);
-            });
-        }
     }
 
     setColumns(loaded_columns, infos, loaded_metadata_version);
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 5d961425469..ed686a0b38b 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -1999,9 +1999,9 @@ static BoolMask forAnyHyperrectangle(
         if (left_bounded && right_bounded)
             hyperrectangle[prefix_size] = Range(left_keys[prefix_size], true, right_keys[prefix_size], true);
         else if (left_bounded)
-            hyperrectangle[prefix_size] = Range::createLeftBounded(left_keys[prefix_size], true);
+            hyperrectangle[prefix_size] = Range::createLeftBounded(left_keys[prefix_size], true, data_types[prefix_size]->isNullable());
         else if (right_bounded)
-            hyperrectangle[prefix_size] = Range::createRightBounded(right_keys[prefix_size], true);
+            hyperrectangle[prefix_size] = Range::createRightBounded(right_keys[prefix_size], true, data_types[prefix_size]->isNullable());
 
         return callback(hyperrectangle);
     }
diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp
index 0bf662921ad..6812ef93a78 100644
--- a/src/Storages/MergeTree/MergeList.cpp
+++ b/src/Storages/MergeTree/MergeList.cpp
@@ -31,6 +31,7 @@ MergeListElement::MergeListElement(
         source_part_paths.emplace_back(source_part->getDataPartStorage().getFullPath());
 
         total_size_bytes_compressed += source_part->getBytesOnDisk();
+        total_size_bytes_uncompressed += source_part->getTotalColumnsSize().data_uncompressed;
         total_size_marks += source_part->getMarksCount();
         total_rows_count += source_part->index_granularity.getTotalRows();
     }
@@ -57,6 +58,7 @@ MergeInfo MergeListElement::getInfo() const
     res.progress = progress.load(std::memory_order_relaxed);
     res.num_parts = num_parts;
     res.total_size_bytes_compressed = total_size_bytes_compressed;
+    res.total_size_bytes_uncompressed = total_size_bytes_uncompressed;
     res.total_size_marks = total_size_marks;
     res.total_rows_count = total_rows_count;
     res.bytes_read_uncompressed = bytes_read_uncompressed.load(std::memory_order_relaxed);
diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h
index 9c8c2ebd1e4..045b4015c8e 100644
--- a/src/Storages/MergeTree/MergeList.h
+++ b/src/Storages/MergeTree/MergeList.h
@@ -40,6 +40,7 @@ struct MergeInfo
     Float64 progress;
     UInt64 num_parts;
     UInt64 total_size_bytes_compressed;
+    UInt64 total_size_bytes_uncompressed;
     UInt64 total_size_marks;
     UInt64 total_rows_count;
     UInt64 bytes_read_uncompressed;
@@ -82,6 +83,7 @@ struct MergeListElement : boost::noncopyable
     std::atomic<bool> is_cancelled{};
 
     UInt64 total_size_bytes_compressed{};
+    UInt64 total_size_bytes_uncompressed{};
     UInt64 total_size_marks{};
     UInt64 total_rows_count{};
     std::atomic<UInt64> bytes_read_uncompressed{};
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index df759b3bd45..eee550f8dd6 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -921,7 +921,9 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
     /// If merge is vertical we cannot calculate it
     ctx->blocks_are_granules_size = (global_ctx->chosen_merge_algorithm == MergeAlgorithm::Vertical);
 
-    UInt64 merge_block_size = data_settings->merge_max_block_size;
+    /// There is no sense to have the block size bigger than one granule for merge operations.
+    const UInt64 merge_block_size_rows = data_settings->merge_max_block_size;
+    const UInt64 merge_block_size_bytes = data_settings->merge_max_block_size_bytes;
 
     switch (ctx->merging_params.mode)
     {
@@ -930,7 +932,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
                 header,
                 pipes.size(),
                 sort_description,
-                merge_block_size,
+                merge_block_size_rows,
+                merge_block_size_bytes,
                 SortingQueueStrategy::Default,
                 /* limit_= */0,
                 /* always_read_till_end_= */false,
@@ -942,35 +945,35 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
         case MergeTreeData::MergingParams::Collapsing:
             merged_transform = std::make_shared<CollapsingSortedTransform>(
                 header, pipes.size(), sort_description, ctx->merging_params.sign_column, false,
-                merge_block_size, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size);
+                merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size);
             break;
 
         case MergeTreeData::MergingParams::Summing:
             merged_transform = std::make_shared<SummingSortedTransform>(
-                header, pipes.size(), sort_description, ctx->merging_params.columns_to_sum, partition_key_columns, merge_block_size);
+                header, pipes.size(), sort_description, ctx->merging_params.columns_to_sum, partition_key_columns, merge_block_size_rows, merge_block_size_bytes);
             break;
 
         case MergeTreeData::MergingParams::Aggregating:
-            merged_transform = std::make_shared<AggregatingSortedTransform>(header, pipes.size(), sort_description, merge_block_size);
+            merged_transform = std::make_shared<AggregatingSortedTransform>(header, pipes.size(), sort_description, merge_block_size_rows, merge_block_size_bytes);
             break;
 
         case MergeTreeData::MergingParams::Replacing:
             merged_transform = std::make_shared<ReplacingSortedTransform>(
                 header, pipes.size(), sort_description, ctx->merging_params.is_deleted_column, ctx->merging_params.version_column,
-                merge_block_size, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size,
+                merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size,
                 (data_settings->clean_deleted_rows != CleanDeletedRows::Never) || global_ctx->cleanup);
             break;
 
         case MergeTreeData::MergingParams::Graphite:
             merged_transform = std::make_shared<GraphiteRollupSortedTransform>(
-                header, pipes.size(), sort_description, merge_block_size,
+                header, pipes.size(), sort_description, merge_block_size_rows, merge_block_size_bytes,
                 ctx->merging_params.graphite_params, global_ctx->time_of_merge);
             break;
 
         case MergeTreeData::MergingParams::VersionedCollapsing:
             merged_transform = std::make_shared<VersionedCollapsingTransform>(
                 header, pipes.size(), sort_description, ctx->merging_params.sign_column,
-                merge_block_size, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size);
+                merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size);
             break;
     }
 
@@ -1011,7 +1014,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
 
 MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm() const
 {
-    const size_t sum_rows_upper_bound = global_ctx->merge_list_element_ptr->total_rows_count;
+    const size_t total_rows_count = global_ctx->merge_list_element_ptr->total_rows_count;
+    const size_t total_size_bytes_uncompressed = global_ctx->merge_list_element_ptr->total_size_bytes_uncompressed;
     const auto data_settings = global_ctx->data->getSettings();
 
     if (global_ctx->deduplicate)
@@ -1042,11 +1046,13 @@ MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm
 
     bool enough_ordinary_cols = global_ctx->gathering_columns.size() >= data_settings->vertical_merge_algorithm_min_columns_to_activate;
 
-    bool enough_total_rows = sum_rows_upper_bound >= data_settings->vertical_merge_algorithm_min_rows_to_activate;
+    bool enough_total_rows = total_rows_count >= data_settings->vertical_merge_algorithm_min_rows_to_activate;
+
+    bool enough_total_bytes = total_size_bytes_uncompressed >= data_settings->vertical_merge_algorithm_min_bytes_to_activate;
 
     bool no_parts_overflow = global_ctx->future_part->parts.size() <= RowSourcePart::MAX_PARTS;
 
-    auto merge_alg = (is_supported_storage && enough_total_rows && enough_ordinary_cols && no_parts_overflow) ?
+    auto merge_alg = (is_supported_storage && enough_total_rows && enough_total_bytes && enough_ordinary_cols && no_parts_overflow) ?
                         MergeAlgorithm::Vertical : MergeAlgorithm::Horizontal;
 
     return merge_alg;
diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
index 6f1e41d2791..6512aad9260 100644
--- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp
@@ -32,13 +32,15 @@ MergeTreeBackgroundExecutor<Queue>::MergeTreeBackgroundExecutor(
     size_t threads_count_,
     size_t max_tasks_count_,
     CurrentMetrics::Metric metric_,
-    CurrentMetrics::Metric max_tasks_metric_)
+    CurrentMetrics::Metric max_tasks_metric_,
+    std::string_view policy)
     : name(name_)
     , threads_count(threads_count_)
     , max_tasks_count(max_tasks_count_)
     , metric(metric_)
     , max_tasks_metric(max_tasks_metric_, 2 * max_tasks_count) // active + pending
-    , pool(std::make_unique<ThreadPool>(CurrentMetrics::MergeTreeBackgroundExecutorThreads, CurrentMetrics::MergeTreeBackgroundExecutorThreadsActive))
+    , pool(std::make_unique<ThreadPool>(
+          CurrentMetrics::MergeTreeBackgroundExecutorThreads, CurrentMetrics::MergeTreeBackgroundExecutorThreadsActive))
 {
     if (max_tasks_count == 0)
         throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Task count for MergeTreeBackgroundExecutor must not be zero");
@@ -52,20 +54,9 @@ MergeTreeBackgroundExecutor<Queue>::MergeTreeBackgroundExecutor(
 
     for (size_t number = 0; number < threads_count; ++number)
         pool->scheduleOrThrowOnError([this] { threadFunction(); });
-}
 
-template <class Queue>
-MergeTreeBackgroundExecutor<Queue>::MergeTreeBackgroundExecutor(
-    String name_,
-    size_t threads_count_,
-    size_t max_tasks_count_,
-    CurrentMetrics::Metric metric_,
-    CurrentMetrics::Metric max_tasks_metric_,
-    std::string_view policy)
-    requires requires(Queue queue) { queue.updatePolicy(policy); } // Because we use explicit template instantiation
-    : MergeTreeBackgroundExecutor(name_, threads_count_, max_tasks_count_, metric_, max_tasks_metric_)
-{
-    pending.updatePolicy(policy);
+    if (!policy.empty())
+        pending.updatePolicy(policy);
 }
 
 template <class Queue>
@@ -326,5 +317,4 @@ void MergeTreeBackgroundExecutor<Queue>::threadFunction()
 template class MergeTreeBackgroundExecutor<RoundRobinRuntimeQueue>;
 template class MergeTreeBackgroundExecutor<PriorityRuntimeQueue>;
 template class MergeTreeBackgroundExecutor<DynamicRuntimeQueue>;
-
 }
diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
index 9bfea32c7f9..8142e383d0c 100644
--- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h
@@ -13,16 +13,22 @@
 #include <boost/noncopyable.hpp>
 #include <Poco/Event.h>
 
-#include <Common/CurrentMetrics.h>
-#include <Common/ThreadPool_fwd.h>
-#include <Common/Stopwatch.h>
-#include <base/defines.h>
 #include <Storages/MergeTree/IExecutableTask.h>
+#include <base/defines.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/Exception.h>
+#include <Common/Stopwatch.h>
+#include <Common/ThreadPool_fwd.h>
 
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
 struct TaskRuntimeData;
 using TaskRuntimeDataPtr = std::shared_ptr<TaskRuntimeData>;
 
@@ -92,6 +98,11 @@ public:
     void setCapacity(size_t count) { queue.set_capacity(count); }
     bool empty() { return queue.empty(); }
 
+    [[noreturn]] void updatePolicy(std::string_view)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updatePolicy() is not implemented");
+    }
+
     static constexpr std::string_view name = "round_robin";
 
 private:
@@ -126,6 +137,11 @@ public:
     void setCapacity(size_t count) { buffer.reserve(count); }
     bool empty() { return buffer.empty(); }
 
+    [[noreturn]] void updatePolicy(std::string_view)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method updatePolicy() is not implemented");
+    }
+
     static constexpr std::string_view name = "shortest_task_first";
 
 private:
@@ -239,20 +255,14 @@ template <class Queue>
 class MergeTreeBackgroundExecutor final : boost::noncopyable
 {
 public:
-    MergeTreeBackgroundExecutor(
-        String name_,
-        size_t threads_count_,
-        size_t max_tasks_count_,
-        CurrentMetrics::Metric metric_,
-        CurrentMetrics::Metric max_tasks_metric_);
     MergeTreeBackgroundExecutor(
         String name_,
         size_t threads_count_,
         size_t max_tasks_count_,
         CurrentMetrics::Metric metric_,
         CurrentMetrics::Metric max_tasks_metric_,
-        std::string_view policy)
-        requires requires(Queue queue) { queue.updatePolicy(policy); }; // Because we use explicit template instantiation
+        std::string_view policy = {});
+
     ~MergeTreeBackgroundExecutor();
 
     /// Handler for hot-reloading
@@ -271,7 +281,6 @@ public:
 
     /// Update scheduling policy for pending tasks. It does nothing if `new_policy` is the same or unknown.
     void updateSchedulingPolicy(std::string_view new_policy)
-        requires requires(Queue queue) { queue.updatePolicy(new_policy); } // Because we use explicit template instantiation
     {
         std::lock_guard lock(mutex);
         pending.updatePolicy(new_policy);
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index e9e3548f66f..4470524d306 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -18,6 +18,7 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/typeid_cast.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/ThreadFuzzer.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Core/QueryProcessingStage.h>
 #include <DataTypes/DataTypeEnum.h>
@@ -42,9 +43,14 @@
 #include <Interpreters/PartLog.h>
 #include <Interpreters/TransactionLog.h>
 #include <Interpreters/TreeRewriter.h>
+#include <Interpreters/Context_fwd.h>
 #include <IO/S3Common.h>
+#include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromString.h>
+#include <IO/SharedThreadPools.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTIndexDeclaration.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTNameTypePair.h>
@@ -54,6 +60,7 @@
 #include <Parsers/ExpressionListParsers.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/queryToString.h>
+#include <Parsers/ASTAlterQuery.h>
 #include <Processors/Formats/IInputFormat.h>
 #include <Processors/QueryPlan/QueryIdHolder.h>
 #include <Processors/QueryPlan/ReadFromMergeTree.h>
@@ -76,6 +83,7 @@
 
 #include <algorithm>
 #include <atomic>
+#include <cmath>
 #include <chrono>
 #include <iomanip>
 #include <limits>
@@ -177,6 +185,19 @@ namespace ErrorCodes
     extern const int TOO_MANY_MUTATIONS;
 }
 
+static void checkSuspiciousIndices(const ASTFunction * index_function)
+{
+    std::unordered_set<UInt64> unique_index_expression_hashes;
+    for (const auto & child : index_function->arguments->children)
+    {
+        IAST::Hash hash = child->getTreeHash();
+        UInt64 first_half_of_hash = hash.first;
+
+        if (!unique_index_expression_hashes.emplace(first_half_of_hash).second)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                    "Primary key or secondary index contains a duplicate expression. To suppress this exception, rerun the command with setting 'allow_suspicious_indices = 1'");
+    }
+}
 
 static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool allow_sampling_expression_not_in_primary_key, bool check_sample_column_is_correct)
 {
@@ -444,7 +465,10 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam
 }
 
 void MergeTreeData::checkProperties(
-    const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) const
+    const StorageInMemoryMetadata & new_metadata,
+    const StorageInMemoryMetadata & old_metadata,
+    bool attach,
+    ContextPtr local_context) const
 {
     if (!new_metadata.sorting_key.definition_ast)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty");
@@ -458,7 +482,13 @@ void MergeTreeData::checkProperties(
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: "
             "{} is greater than the sorting key length: {}", primary_key_size, sorting_key_size);
 
-    NameSet primary_key_columns_set;
+    bool allow_suspicious_indices = getSettings()->allow_suspicious_indices;
+    if (local_context)
+        allow_suspicious_indices = local_context->getSettingsRef().allow_suspicious_indices;
+
+    if (!allow_suspicious_indices && !attach)
+        if (const auto * index_function = typeid_cast<ASTFunction *>(new_sorting_key.definition_ast.get()))
+            checkSuspiciousIndices(index_function);
 
     for (size_t i = 0; i < sorting_key_size; ++i)
     {
@@ -472,9 +502,6 @@ void MergeTreeData::checkProperties(
                                 "Primary key must be a prefix of the sorting key, "
                                 "but the column in the position {} is {}", i, sorting_key_column +", not " + pk_column);
 
-            if (!primary_key_columns_set.emplace(pk_column).second)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns");
-
         }
     }
 
@@ -535,6 +562,13 @@ void MergeTreeData::checkProperties(
 
         for (const auto & index : new_metadata.secondary_indices)
         {
+            if (!allow_suspicious_indices && !attach)
+            {
+                const auto * index_ast = typeid_cast<const ASTIndexDeclaration *>(index.definition_ast.get());
+                if (const auto * index_function = typeid_cast<const ASTFunction *>(index_ast->expr))
+                    checkSuspiciousIndices(index_function);
+            }
+
             MergeTreeIndexFactory::instance().validate(index, attach);
 
             if (indices_names.find(index.name) != indices_names.end())
@@ -560,9 +594,13 @@ void MergeTreeData::checkProperties(
     checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key);
 }
 
-void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach)
+void MergeTreeData::setProperties(
+    const StorageInMemoryMetadata & new_metadata,
+    const StorageInMemoryMetadata & old_metadata,
+    bool attach,
+    ContextPtr local_context)
 {
-    checkProperties(new_metadata, old_metadata, attach);
+    checkProperties(new_metadata, old_metadata, attach, local_context);
     setInMemoryMetadata(new_metadata);
 }
 
@@ -994,7 +1032,6 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
     return res;
 }
 
-
 String MergeTreeData::MergingParams::getModeName() const
 {
     switch (mode)
@@ -1176,14 +1213,59 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(
     auto single_disk_volume = std::make_shared<SingleDiskVolume>("volume_" + part_name, part_disk_ptr, 0);
     auto data_part_storage = std::make_shared<DataPartStorageOnDiskFull>(single_disk_volume, relative_data_path, part_name);
 
-    res.part = getDataPartBuilder(part_name, single_disk_volume, part_name)
-        .withPartInfo(part_info)
-        .withPartFormatFromDisk()
-        .build();
-
     String part_path = fs::path(relative_data_path) / part_name;
     String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME;
 
+    /// Ignore broken parts that can appear as a result of hard server restart.
+    auto mark_broken = [&]
+    {
+        if (!res.part)
+        {
+            /// Build a fake part and mark it as broken in case of filesystem error.
+            /// If the error impacts part directory instead of single files,
+            /// an exception will be thrown during detach and silently ignored.
+            res.part = getDataPartBuilder(part_name, single_disk_volume, part_name)
+                .withPartStorageType(MergeTreeDataPartStorageType::Full)
+                .withPartType(MergeTreeDataPartType::Wide)
+                .build();
+        }
+
+        res.is_broken = true;
+        tryLogCurrentException(log, fmt::format("while loading part {} on path {}", part_name, part_path));
+
+        res.size_of_part = calculatePartSizeSafe(res.part, log);
+        auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size";
+
+        LOG_ERROR(log,
+            "Detaching broken part {}{} (size: {}). "
+            "If it happened after update, it is likely because of backward incompatibility. "
+            "You need to resolve this manually",
+            getFullPathOnDisk(part_disk_ptr), part_name, part_size_str);
+    };
+
+    try
+    {
+        res.part = getDataPartBuilder(part_name, single_disk_volume, part_name)
+            .withPartInfo(part_info)
+            .withPartFormatFromDisk()
+            .build();
+    }
+    catch (const Exception & e)
+    {
+        /// Don't count the part as broken if there was a retryalbe error
+        /// during loading, such as "not enough memory" or network error.
+        if (isRetryableException(e))
+            throw;
+
+        mark_broken();
+        return res;
+    }
+    catch (...)
+    {
+        mark_broken();
+        return res;
+    }
+
     if (part_disk_ptr->exists(marker_path))
     {
         /// NOTE: getBytesOnDisk() cannot be used here, since it may be zero if checksums.txt does not exist.
@@ -1211,27 +1293,12 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(
         if (isRetryableException(e))
             throw;
 
-        res.is_broken = true;
-        tryLogCurrentException(log, fmt::format("while loading part {} on path {}", res.part->name, part_path));
+        mark_broken();
+        return res;
     }
     catch (...)
     {
-        res.is_broken = true;
-        tryLogCurrentException(log, fmt::format("while loading part {} on path {}", res.part->name, part_path));
-    }
-
-    /// Ignore broken parts that can appear as a result of hard server restart.
-    if (res.is_broken)
-    {
-        res.size_of_part = calculatePartSizeSafe(res.part, log);
-        auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size";
-
-        LOG_ERROR(log,
-            "Detaching broken part {}{} (size: {}). "
-            "If it happened after update, it is likely because of backward incompatibility. "
-            "You need to resolve this manually",
-            getFullPathOnDisk(part_disk_ptr), part_name, part_size_str);
-
+        mark_broken();
         return res;
     }
 
@@ -1826,6 +1893,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
         {
             std::lock_guard lock(outdated_data_parts_mutex);
             outdated_unloaded_data_parts = std::move(unloaded_parts);
+            outdated_data_parts_loading_finished = false;
         }
 
         outdated_data_parts_loading_task = getContext()->getSchedulePool().createTask(
@@ -1843,7 +1911,11 @@ try
     {
         std::lock_guard lock(outdated_data_parts_mutex);
         if (outdated_unloaded_data_parts.empty())
+        {
+            outdated_data_parts_loading_finished = true;
+            outdated_data_parts_cv.notify_all();
             return;
+        }
 
         LOG_DEBUG(log, "Loading {} outdated data parts {}",
             outdated_unloaded_data_parts.size(),
@@ -1855,7 +1927,11 @@ try
     if (is_async)
         shared_lock = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations);
 
-    size_t num_loaded_parts = 0;
+    std::atomic_size_t num_loaded_parts = 0;
+
+    auto runner = threadPoolCallbackRunner<void>(OutdatedPartsLoadingThreadPool::get(), "OutdatedParts");
+    std::vector<std::future<void>> parts_futures;
+
     while (true)
     {
         PartLoadingTree::NodePtr part;
@@ -1865,6 +1941,10 @@ try
 
             if (is_async && outdated_data_parts_loading_canceled)
             {
+                /// Wait for every scheduled task
+                for (auto & future : parts_futures)
+                    future.wait();
+
                 LOG_DEBUG(log,
                     "Stopped loading outdated data parts because task was canceled. "
                     "Loaded {} parts, {} left unloaded", num_loaded_parts, outdated_unloaded_data_parts.size());
@@ -1875,34 +1955,38 @@ try
                 break;
 
             part = outdated_unloaded_data_parts.back();
+            outdated_unloaded_data_parts.pop_back();
         }
 
-        auto res = loadDataPartWithRetries(
+        parts_futures.push_back(runner([&, part = part]()
+        {
+            auto res = loadDataPartWithRetries(
             part->info, part->name, part->disk,
             DataPartState::Outdated, data_parts_mutex, loading_parts_initial_backoff_ms,
             loading_parts_max_backoff_ms, loading_parts_max_tries);
 
-        ++num_loaded_parts;
-        if (res.is_broken)
-            res.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
-        else if (res.part->is_duplicate)
-            res.part->remove();
-        else
-            preparePartForRemoval(res.part);
-
-        {
-            std::lock_guard lock(outdated_data_parts_mutex);
-            chassert(part == outdated_unloaded_data_parts.back());
-            outdated_unloaded_data_parts.pop_back();
-
-            if (outdated_unloaded_data_parts.empty())
-                break;
-        }
+            ++num_loaded_parts;
+            if (res.is_broken)
+                res.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
+            else if (res.part->is_duplicate)
+                res.part->remove();
+            else
+                preparePartForRemoval(res.part);
+        }, 0));
     }
 
+    /// Wait for every scheduled task
+    for (auto & future : parts_futures)
+        future.wait();
+
     LOG_DEBUG(log, "Loaded {} outdated data parts {}",
         num_loaded_parts, is_async ? "asynchronously" : "synchronously");
-    outdated_data_parts_cv.notify_all();
+
+    {
+        std::lock_guard lock(outdated_data_parts_mutex);
+        outdated_data_parts_loading_finished = true;
+        outdated_data_parts_cv.notify_all();
+    }
 }
 catch (...)
 {
@@ -1919,15 +2003,13 @@ void MergeTreeData::waitForOutdatedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_
     if (isStaticStorage())
         return;
 
-    std::unique_lock lock(outdated_data_parts_mutex);
-    if (outdated_unloaded_data_parts.empty())
-        return;
-
     LOG_TRACE(log, "Will wait for outdated data parts to be loaded");
 
+    std::unique_lock lock(outdated_data_parts_mutex);
+
     outdated_data_parts_cv.wait(lock, [this]() TSA_NO_THREAD_SAFETY_ANALYSIS
     {
-        return outdated_unloaded_data_parts.empty() || outdated_data_parts_loading_canceled;
+        return outdated_data_parts_loading_finished || outdated_data_parts_loading_canceled;
     });
 
     if (outdated_data_parts_loading_canceled)
@@ -2013,6 +2095,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif
             {
                 if (isOldPartDirectory(disk, it->path(), deadline))
                 {
+                    ThreadFuzzer::maybeInjectSleep();
                     if (temporary_parts.contains(basename))
                     {
                         /// Actually we don't rely on temporary_directories_lifetime when removing old temporaries directories,
@@ -2020,6 +2103,13 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif
                         LOG_INFO(LogFrequencyLimiter(log, 10), "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path);
                         continue;
                     }
+                    else if (!disk->exists(it->path()))
+                    {
+                        /// We should recheck that the dir exists, otherwise we can get "No such file or directory"
+                        /// due to a race condition with "Renaming temporary part" (temporary part holder could be already released, so the check above is not enough)
+                        LOG_WARNING(log, "Temporary directory {} suddenly disappeared while iterating, assuming it was concurrently renamed to persistent", it->path());
+                        continue;
+                    }
                     else
                     {
                         LOG_WARNING(log, "Removing temporary directory {}", full_path);
@@ -3155,7 +3245,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
         }
     }
 
-    checkProperties(new_metadata, old_metadata);
+    checkProperties(new_metadata, old_metadata, false, local_context);
     checkTTLExpressions(new_metadata, old_metadata);
 
     if (!columns_to_check_conversion.empty())
@@ -3917,6 +4007,9 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT
     else
         LOG_INFO(log, "Renaming {} to {}_{} and forgetting it.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name);
 
+    if (restore_covered)
+        waitForOutdatedPartsToBeLoaded();
+
     auto lock = lockParts();
     bool removed_active_part = false;
     bool restored_active_part = false;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 1a8177c6ae3..76e07641383 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -32,6 +32,7 @@
 #include <Storages/extractKeyExpressionList.h>
 #include <Storages/PartitionCommands.h>
 #include <Interpreters/PartLog.h>
+#include <Interpreters/threadPoolCallbackRunner.h>
 
 
 #include <boost/multi_index_container.hpp>
@@ -431,6 +432,8 @@ public:
 
     bool supportsLightweightDelete() const override;
 
+    bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; }
+
     NamesAndTypesList getVirtuals() const override;
 
     bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override;
@@ -1225,9 +1228,9 @@ protected:
     /// The same for clearOldTemporaryDirectories.
     std::mutex clear_old_temporary_directories_mutex;
 
-    void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false) const;
+    void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr) const;
 
-    void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false);
+    void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false, ContextPtr local_context = nullptr);
 
     void checkPartitionKeyAndInitMinMax(const KeyDescription & new_partition_key);
 
@@ -1414,6 +1417,10 @@ protected:
     PartLoadingTreeNodes outdated_unloaded_data_parts TSA_GUARDED_BY(outdated_data_parts_mutex);
     bool outdated_data_parts_loading_canceled TSA_GUARDED_BY(outdated_data_parts_mutex) = false;
 
+    /// This has to be "true" by default, because in case of empty table or absence of Outdated parts
+    /// it is automatically finished.
+    bool outdated_data_parts_loading_finished TSA_GUARDED_BY(outdated_data_parts_mutex) = true;
+
     void loadOutdatedDataParts(bool is_async);
     void startOutdatedDataPartsLoadingTask();
     void stopOutdatedDataPartsLoadingTask();
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index adb7505a8ba..dd7a0fcea24 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -280,23 +280,23 @@ Block MergeTreeDataWriter::mergeBlock(
                 return nullptr;
             case MergeTreeData::MergingParams::Replacing:
                 return std::make_shared<ReplacingSortedAlgorithm>(
-                    block, 1, sort_description, merging_params.is_deleted_column, merging_params.version_column, block_size + 1);
+                    block, 1, sort_description, merging_params.is_deleted_column, merging_params.version_column, block_size + 1, /*block_size_bytes=*/0);
             case MergeTreeData::MergingParams::Collapsing:
                 return std::make_shared<CollapsingSortedAlgorithm>(
                     block, 1, sort_description, merging_params.sign_column,
-                    false, block_size + 1, &Poco::Logger::get("MergeTreeDataWriter"));
+                    false, block_size + 1, /*block_size_bytes=*/0, &Poco::Logger::get("MergeTreeDataWriter"));
             case MergeTreeData::MergingParams::Summing:
                 return std::make_shared<SummingSortedAlgorithm>(
                     block, 1, sort_description, merging_params.columns_to_sum,
-                    partition_key_columns, block_size + 1);
+                    partition_key_columns, block_size + 1, /*block_size_bytes=*/0);
             case MergeTreeData::MergingParams::Aggregating:
-                return std::make_shared<AggregatingSortedAlgorithm>(block, 1, sort_description, block_size + 1);
+                return std::make_shared<AggregatingSortedAlgorithm>(block, 1, sort_description, block_size + 1, /*block_size_bytes=*/0);
             case MergeTreeData::MergingParams::VersionedCollapsing:
                 return std::make_shared<VersionedCollapsingAlgorithm>(
-                    block, 1, sort_description, merging_params.sign_column, block_size + 1);
+                    block, 1, sort_description, merging_params.sign_column, block_size + 1, /*block_size_bytes=*/0);
             case MergeTreeData::MergingParams::Graphite:
                 return std::make_shared<GraphiteRollupSortedAlgorithm>(
-                    block, 1, sort_description, block_size + 1, merging_params.graphite_params, time(nullptr));
+                    block, 1, sort_description, block_size + 1, /*block_size_bytes=*/0, merging_params.graphite_params, time(nullptr));
         }
 
         UNREACHABLE();
diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
index 7d7024a8ac2..88fbc8c2488 100644
--- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp
@@ -1,5 +1,6 @@
 #include <Storages/MergeTree/MergeTreeIndexReader.h>
 #include <Interpreters/Context.h>
+#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
 
 namespace
 {
@@ -20,7 +21,7 @@ std::unique_ptr<MergeTreeReaderStream> makeIndexReader(
     auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr;
 
     return std::make_unique<MergeTreeReaderStream>(
-        part->getDataPartStoragePtr(),
+        std::make_shared<LoadedMergeTreeDataPartInfoForReader>(part),
         index->getFileName(), extension, marks_count,
         all_mark_ranges,
         std::move(settings), mark_cache, uncompressed_cache,
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
index c6bb021e80f..9a5576f0ad2 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp
@@ -30,7 +30,7 @@ namespace ErrorCodes
 }
 
 MergeTreeMarksLoader::MergeTreeMarksLoader(
-    DataPartStoragePtr data_part_storage_,
+    MergeTreeDataPartInfoForReaderPtr data_part_reader_,
     MarkCache * mark_cache_,
     const String & mrk_path_,
     size_t marks_count_,
@@ -39,7 +39,7 @@ MergeTreeMarksLoader::MergeTreeMarksLoader(
     const ReadSettings & read_settings_,
     ThreadPool * load_marks_threadpool_,
     size_t columns_in_mark_)
-    : data_part_storage(std::move(data_part_storage_))
+    : data_part_reader(data_part_reader_)
     , mark_cache(mark_cache_)
     , mrk_path(mrk_path_)
     , marks_count(marks_count_)
@@ -98,6 +98,8 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl()
     /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache.
     MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
 
+    auto data_part_storage = data_part_reader->getDataPartStorage();
+
     size_t file_size = data_part_storage->getFileSize(mrk_path);
     size_t mark_size = index_granularity_info.getMarkSizeInBytes(columns_in_mark);
     size_t expected_uncompressed_size = mark_size * marks_count;
@@ -177,6 +179,8 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarks()
 {
     MarkCache::MappedPtr loaded_marks;
 
+    auto data_part_storage = data_part_reader->getDataPartStorage();
+
     if (mark_cache)
     {
         auto key = mark_cache->hash(fs::path(data_part_storage->getFullPath()) / mrk_path);
diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.h b/src/Storages/MergeTree/MergeTreeMarksLoader.h
index 17e52939d3f..0889da0cb85 100644
--- a/src/Storages/MergeTree/MergeTreeMarksLoader.h
+++ b/src/Storages/MergeTree/MergeTreeMarksLoader.h
@@ -1,9 +1,9 @@
 #pragma once
 
-#include <Storages/MergeTree/IDataPartStorage.h>
 #include <Storages/MarkCache.h>
 #include <IO/ReadSettings.h>
 #include <Common/ThreadPool_fwd.h>
+#include <Storages/MergeTree/IMergeTreeDataPartInfoForReader.h>
 
 
 namespace DB
@@ -18,7 +18,7 @@ public:
     using MarksPtr = MarkCache::MappedPtr;
 
     MergeTreeMarksLoader(
-        DataPartStoragePtr data_part_storage_,
+        MergeTreeDataPartInfoForReaderPtr data_part_reader_,
         MarkCache * mark_cache_,
         const String & mrk_path,
         size_t marks_count_,
@@ -33,7 +33,7 @@ public:
     MarkInCompressedFile getMark(size_t row_index, size_t column_index = 0);
 
 private:
-    DataPartStoragePtr data_part_storage;
+    MergeTreeDataPartInfoForReaderPtr data_part_reader;
     MarkCache * mark_cache = nullptr;
     String mrk_path;
     size_t marks_count;
diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp
index 2c5350909d5..1626018f1c1 100644
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@@ -84,7 +84,15 @@ namespace
         }
         void operator() (const UUID & x) const
         {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+            auto tmp_x = x.toUnderType();
+            char * start = reinterpret_cast<char *>(&tmp_x);
+            char * end = start + sizeof(tmp_x);
+            std::reverse(start, end);
+            operator()(tmp_x);
+#else
             operator()(x.toUnderType());
+#endif
         }
         void operator() (const IPv4 & x) const
         {
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index d1796dac6cc..26a7cb2b50b 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -36,7 +36,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
         settings_,
         avg_value_size_hints_)
     , marks_loader(
-          data_part_info_for_read_->getDataPartStorage(),
+          data_part_info_for_read_,
           mark_cache,
           data_part_info_for_read_->getIndexGranularityInfo().getMarksFilePath(MergeTreeDataPartCompact::DATA_FILE_NAME),
           data_part_info_for_read_->getMarksCount(),
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
index cdca5aa1247..6d80dc5522c 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp
@@ -15,7 +15,7 @@ namespace ErrorCodes
 }
 
 MergeTreeReaderStream::MergeTreeReaderStream(
-        DataPartStoragePtr data_part_storage_,
+        MergeTreeDataPartInfoForReaderPtr data_part_reader_,
         const String & path_prefix_,
         const String & data_file_extension_,
         size_t marks_count_,
@@ -35,7 +35,7 @@ MergeTreeReaderStream::MergeTreeReaderStream(
     , all_mark_ranges(all_mark_ranges_)
     , file_size(file_size_)
     , uncompressed_cache(uncompressed_cache_)
-    , data_part_storage(std::move(data_part_storage_))
+    , data_part_storage(data_part_reader_->getDataPartStorage())
     , path_prefix(path_prefix_)
     , data_file_extension(data_file_extension_)
     , is_low_cardinality_dictionary(is_low_cardinality_dictionary_)
@@ -44,7 +44,7 @@ MergeTreeReaderStream::MergeTreeReaderStream(
     , save_marks_in_cache(settings.save_marks_in_cache)
     , index_granularity_info(index_granularity_info_)
     , marks_loader(
-        data_part_storage,
+        data_part_reader_,
         mark_cache,
         index_granularity_info->getMarksFilePath(path_prefix),
         marks_count,
diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h
index f3785e175df..baf8ec713f9 100644
--- a/src/Storages/MergeTree/MergeTreeReaderStream.h
+++ b/src/Storages/MergeTree/MergeTreeReaderStream.h
@@ -9,6 +9,7 @@
 #include <Compression/CompressedReadBufferFromFile.h>
 #include <Storages/MergeTree/MergeTreeIOSettings.h>
 #include <Storages/MergeTree/MergeTreeMarksLoader.h>
+#include <Storages/MergeTree/IMergeTreeDataPartInfoForReader.h>
 
 
 namespace DB
@@ -19,7 +20,7 @@ class MergeTreeReaderStream
 {
 public:
     MergeTreeReaderStream(
-        DataPartStoragePtr data_part_storage_,
+        MergeTreeDataPartInfoForReaderPtr data_part_reader_,
         const String & path_prefix_,
         const String & data_file_extension_,
         size_t marks_count_,
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index 05af33da20a..69617fdf9e3 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -242,7 +242,7 @@ void MergeTreeReaderWide::addStreams(
         auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr;
 
         streams.emplace(stream_name, std::make_unique<MergeTreeReaderStream>(
-            data_part_info_for_read->getDataPartStorage(), stream_name, DATA_FILE_EXTENSION,
+            data_part_info_for_read, stream_name, DATA_FILE_EXTENSION,
             data_part_info_for_read->getMarksCount(), all_mark_ranges, settings, mark_cache,
             uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION),
             &data_part_info_for_read->getIndexGranularityInfo(),
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index b7b94359ccf..5416b77a97e 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -40,7 +40,8 @@ struct Settings;
     M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \
     \
     /** Merge settings. */ \
-    M(UInt64, merge_max_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.", 0) \
+    M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \
+    M(UInt64, merge_max_block_size_bytes, 10 * 1024 * 1024, "How many bytes in blocks should be formed for merge operations. By default has the same value as `index_granularity_bytes`.", 0) \
     M(UInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).", 0) \
     M(UInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).", 0) \
     M(UInt64, max_replicated_merges_in_queue, 1000, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \
@@ -73,6 +74,10 @@ struct Settings;
     M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \
     M(CleanDeletedRows, clean_deleted_rows, CleanDeletedRows::Never, "Is the Replicated Merge cleanup has to be done automatically at each merge or manually (possible values are 'Always'/'Never' (default))", 0) \
     M(UInt64, replicated_max_mutations_in_one_entry, 10000, "Max number of mutation commands that can be merged together and executed in one MUTATE_PART entry (0 means unlimited)", 0) \
+    M(UInt64, number_of_mutations_to_delay, 0, "If table has at least that many unfinished mutations, artificially slow down mutations of table. Disabled if set to 0", 0) \
+    M(UInt64, number_of_mutations_to_throw, 0, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \
+    M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
+    M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
     \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \
@@ -83,10 +88,7 @@ struct Settings;
     M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \
-    M(UInt64, number_of_mutations_to_delay, 0, "If table has at least that many unfinished mutations, artificially slow down mutations of table. Disabled if set to 0", 0) \
-    M(UInt64, number_of_mutations_to_throw, 0, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \
-    M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
-    M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
+    M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background.", 0) \
     \
     /* Part removal settings. */ \
     M(UInt64, simultaneous_parts_removal_limit, 0, "Maximum number of parts to remove during one CleanupThread iteration (0 means unlimited).", 0) \
@@ -130,10 +132,12 @@ struct Settings;
     M(UInt64, min_relative_delay_to_close, 300, "Minimal delay from other replicas to close, stop serving requests and not return Ok during status check.", 0) \
     M(UInt64, min_absolute_delay_to_close, 0, "Minimal absolute delay to close, stop serving requests and not return Ok during status check.", 0) \
     M(UInt64, enable_vertical_merge_algorithm, 1, "Enable usage of Vertical merge algorithm.", 0) \
-    M(UInt64, vertical_merge_algorithm_min_rows_to_activate, 16 * DEFAULT_MERGE_BLOCK_SIZE, "Minimal (approximate) sum of rows in merging parts to activate Vertical merge algorithm.", 0) \
+    M(UInt64, vertical_merge_algorithm_min_rows_to_activate, 16 * 8192, "Minimal (approximate) sum of rows in merging parts to activate Vertical merge algorithm.", 0) \
+    M(UInt64, vertical_merge_algorithm_min_bytes_to_activate, 0, "Minimal (approximate) uncompressed size in bytes in merging parts to activate Vertical merge algorithm.", 0) \
     M(UInt64, vertical_merge_algorithm_min_columns_to_activate, 11, "Minimal amount of non-PK columns to activate Vertical merge algorithm.", 0) \
     \
     /** Compatibility settings */ \
+    M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \
     M(Bool, compatibility_allow_sampling_expression_not_in_primary_key, false, "Allow to create a table with sampling expression not in primary key. This is needed only to temporarily allow to run the server with wrong tables for backward compatibility.", 0) \
     M(Bool, use_minimalistic_checksums_in_zookeeper, true, "Use small format (dozens bytes) for part checksums in ZooKeeper instead of ordinary ones (dozens KB). Before enabling check that all replicas support new format.", 0) \
     M(Bool, use_minimalistic_part_header_in_zookeeper, true, "Store part header (checksums and columns) in a compact format and a single part znode instead of separate znodes (<part>/columns and <part>/checksums). This can dramatically reduce snapshot size in ZooKeeper. Before enabling check that all replicas support new format.", 0) \
diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp
index 328336ff71a..500327afd61 100644
--- a/src/Storages/MergeTree/MergeTreeSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSource.cpp
@@ -1,7 +1,7 @@
 #include <Storages/MergeTree/MergeTreeSource.h>
 #include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
-#include <IO/IOThreadPool.h>
+#include <IO/SharedThreadPools.h>
 #include <Common/EventFD.h>
 
 namespace DB
diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
index 6ad0628eac4..77c280d4710 100644
--- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
+++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp
@@ -131,7 +131,7 @@ public:
 
 DefaultCoordinator::~DefaultCoordinator()
 {
-    LOG_INFO(log, "Coordination done: {}", toString(stats));
+    LOG_DEBUG(log, "Coordination done: {}", toString(stats));
 }
 
 void DefaultCoordinator::updateReadingState(const InitialAllRangesAnnouncement & announcement)
@@ -214,7 +214,7 @@ void DefaultCoordinator::finalizeReadingState()
         description += fmt::format("Replicas: ({}) --- ", fmt::join(part.replicas, ","));
     }
 
-    LOG_INFO(log, "Reading state is fully initialized: {}", description);
+    LOG_DEBUG(log, "Reading state is fully initialized: {}", description);
 }
 
 
@@ -228,7 +228,7 @@ void DefaultCoordinator::handleInitialAllRangesAnnouncement(InitialAllRangesAnno
     stats[announcement.replica_num].number_of_requests +=1;
 
     ++sent_initial_requests;
-    LOG_INFO(log, "{} {}", sent_initial_requests, replicas_count);
+    LOG_DEBUG(log, "Sent initial requests: {} Replicas count: {}", sent_initial_requests, replicas_count);
     if (sent_initial_requests == replicas_count)
         finalizeReadingState();
 }
@@ -334,7 +334,7 @@ public:
     {}
     ~InOrderCoordinator() override
     {
-        LOG_INFO(log, "Coordination done: {}", toString(stats));
+        LOG_DEBUG(log, "Coordination done: {}", toString(stats));
     }
 
     ParallelReadResponse handleRequest([[ maybe_unused ]]  ParallelReadRequest request) override;
@@ -349,7 +349,7 @@ public:
 template <CoordinationMode mode>
 void InOrderCoordinator<mode>::handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement announcement)
 {
-    LOG_TRACE(log, "Received an announecement {}", announcement.describe());
+    LOG_TRACE(log, "Received an announcement {}", announcement.describe());
 
     /// To get rid of duplicates
     for (const auto & part: announcement.description)
diff --git a/src/Storages/MergeTree/RangesInDataPart.cpp b/src/Storages/MergeTree/RangesInDataPart.cpp
index ab76611a507..6203f9f7483 100644
--- a/src/Storages/MergeTree/RangesInDataPart.cpp
+++ b/src/Storages/MergeTree/RangesInDataPart.cpp
@@ -1,12 +1,23 @@
 #include <Storages/MergeTree/RangesInDataPart.h>
 
-#include <Storages/MergeTree/IMergeTreeDataPart.h>
-
-#include "IO/VarInt.h"
+#include <fmt/format.h>
 
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
+#include <Storages/MergeTree/IMergeTreeDataPart.h>
+#include "IO/VarInt.h"
 
+template <>
+struct fmt::formatter<DB::RangesInDataPartDescription>
+{
+    static constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); }
+
+    template <typename FormatContext>
+    auto format(const DB::RangesInDataPartDescription & range, FormatContext & ctx)
+    {
+        return format_to(ctx.out(), "{}", range.describe());
+    }
+};
 
 namespace DB
 {
@@ -26,8 +37,7 @@ void RangesInDataPartDescription::serialize(WriteBuffer & out) const
 String RangesInDataPartDescription::describe() const
 {
     String result;
-    result += fmt::format("Part: {}, ", info.getPartNameV1());
-    result += fmt::format("Ranges: [{}], ", fmt::join(ranges, ","));
+    result += fmt::format("part {} with ranges [{}]", info.getPartNameV1(), fmt::join(ranges, ","));
     return result;
 }
 
@@ -46,10 +56,7 @@ void RangesInDataPartsDescription::serialize(WriteBuffer & out) const
 
 String RangesInDataPartsDescription::describe() const
 {
-    String result;
-    for (const auto & desc : *this)
-        result += desc.describe() + ",";
-    return result;
+    return fmt::format("{} parts: [{}]", this->size(), fmt::join(*this, ", "));
 }
 
 void RangesInDataPartsDescription::deserialize(ReadBuffer & in)
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
index 357b9e0125a..d7166b4a3b9 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp
@@ -365,6 +365,12 @@ void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown)
         CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
         assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
     }
+
+    if (storage.since_metadata_err_incr_readonly_metric)
+    {
+        CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
+        assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
+    }
 }
 
 void ReplicatedMergeTreeRestartingThread::setNotReadonly()
diff --git a/src/Storages/MergeTree/ReplicatedTableStatus.h b/src/Storages/MergeTree/ReplicatedTableStatus.h
index 46e971f562a..ce9ad3640f4 100644
--- a/src/Storages/MergeTree/ReplicatedTableStatus.h
+++ b/src/Storages/MergeTree/ReplicatedTableStatus.h
@@ -16,6 +16,7 @@ struct ReplicatedTableStatus
 
     ReplicatedMergeTreeQueue::Status queue;
     UInt32 parts_to_check;
+    String zookeeper_name;
     String zookeeper_path;
     String replica_name;
     String replica_path;
diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp
index 945477c5a34..05930d5a4c4 100644
--- a/src/Storages/MergeTree/RequestResponse.cpp
+++ b/src/Storages/MergeTree/RequestResponse.cpp
@@ -88,10 +88,7 @@ void ParallelReadResponse::serialize(WriteBuffer & out) const
 
 String ParallelReadResponse::describe() const
 {
-    String result;
-    result += fmt::format("finish: {} \n", finish);
-    result += description.describe();
-    return result;
+    return fmt::format("{}. Finish: {}", description.describe(), finish);
 }
 
 void ParallelReadResponse::deserialize(ReadBuffer & in)
diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
index 78e72564ab7..e6389da32fd 100644
--- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
+++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp
@@ -143,7 +143,7 @@ StoragePtr StorageMaterializedPostgreSQL::createTemporary() const
     if (tmp_storage)
     {
         LOG_TRACE(&Poco::Logger::get("MaterializedPostgreSQLStorage"), "Temporary table {} already exists, dropping", tmp_table_id.getNameForLogs());
-        InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), getContext(), tmp_table_id, /* no delay */true);
+        InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), getContext(), tmp_table_id, /* sync */true);
     }
 
     auto new_context = Context::createCopy(context);
diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp
index dec741beb45..23bcdd23484 100644
--- a/src/Storages/StorageJoin.cpp
+++ b/src/Storages/StorageJoin.cpp
@@ -220,12 +220,13 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr<TableJoin> analyzed_join,
     Names left_key_names_resorted;
     for (const auto & key_name : key_names)
     {
-        const auto & renamed_key = analyzed_join->renamedRightColumnName(key_name);
+        const auto & renamed_key = analyzed_join->renamedRightColumnNameWithAlias(key_name);
         /// find position of renamed_key in key_names_right
         auto it = std::find(key_names_right.begin(), key_names_right.end(), renamed_key);
         if (it == key_names_right.end())
             throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN,
-                "Key '{}' not found in JOIN ON section. All Join engine keys '{}' have to be used", key_name, fmt::join(key_names, ", "));
+                "Key '{}' not found in JOIN ON section. Join engine key{} '{}' have to be used",
+                key_name, key_names.size() > 1 ? "s" : "", fmt::join(key_names, ", "));
         const size_t key_position = std::distance(key_names_right.begin(), it);
         left_key_names_resorted.push_back(key_names_left[key_position]);
     }
diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp
index d469113d358..37d9e3bc32c 100644
--- a/src/Storages/StorageMemory.cpp
+++ b/src/Storages/StorageMemory.cpp
@@ -144,7 +144,8 @@ StorageSnapshotPtr StorageMemory::getStorageSnapshot(const StorageMetadataPtr &
     return std::make_shared<StorageSnapshot>(*this, metadata_snapshot, object_columns, std::move(snapshot_data));
 }
 
-Pipe StorageMemory::read(
+void StorageMemory::read(
+    QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & /*query_info*/,
@@ -153,29 +154,7 @@ Pipe StorageMemory::read(
     size_t /*max_block_size*/,
     size_t num_streams)
 {
-    return ReadFromMemoryStorageStep::makePipe(column_names, storage_snapshot, num_streams, delay_read_for_global_subqueries);
-}
-
-void StorageMemory::read(
-    QueryPlan & query_plan,
-    const Names & column_names,
-    const StorageSnapshotPtr & storage_snapshot,
-    SelectQueryInfo & query_info,
-    ContextPtr context,
-    QueryProcessingStage::Enum processed_stage,
-    size_t max_block_size,
-    size_t num_streams)
-{
-    // @TODO it looks like IStorage::readFromPipe. different only step's type.
-    auto pipe = read(column_names, storage_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
-    if (pipe.empty())
-    {
-        auto header = storage_snapshot->getSampleBlockForColumns(column_names);
-        InterpreterSelectQuery::addEmptySourceToQueryPlan(query_plan, header, query_info, context);
-        return;
-    }
-    auto read_step = std::make_unique<ReadFromMemoryStorageStep>(std::move(pipe));
-    query_plan.addStep(std::move(read_step));
+    query_plan.addStep(std::make_unique<ReadFromMemoryStorageStep>(column_names, storage_snapshot, num_streams, delay_read_for_global_subqueries));
 }
 
 
diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h
index db231379df9..ce8a59b8bcd 100644
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@@ -45,15 +45,6 @@ public:
 
     StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override;
 
-    Pipe read(
-        const Names & column_names,
-        const StorageSnapshotPtr & storage_snapshot,
-        SelectQueryInfo & query_info,
-        ContextPtr context,
-        QueryProcessingStage::Enum processed_stage,
-        size_t max_block_size,
-        size_t num_streams) override;
-
     void read(
         QueryPlan & query_plan,
         const Names & column_names,
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 5592004d599..5f482a82400 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -343,7 +343,7 @@ void StorageMergeTree::alter(
             if (prev_mutation != 0)
             {
                 LOG_DEBUG(log, "Cannot change metadata with barrier alter query, will wait for mutation {}", prev_mutation);
-                waitForMutation(prev_mutation);
+                waitForMutation(prev_mutation, /* from_another_mutation */ true);
                 LOG_DEBUG(log, "Mutation {} finished", prev_mutation);
             }
         }
@@ -352,7 +352,7 @@ void StorageMergeTree::alter(
             changeSettings(new_metadata.settings_changes, table_lock_holder);
             checkTTLExpressions(new_metadata, old_metadata);
             /// Reinitialize primary key because primary key column types might have changed.
-            setProperties(new_metadata, old_metadata);
+            setProperties(new_metadata, old_metadata, false, local_context);
 
             DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata);
 
@@ -539,27 +539,27 @@ void StorageMergeTree::updateMutationEntriesErrors(FutureMergedMutatedPartPtr re
     mutation_wait_event.notify_all();
 }
 
-void StorageMergeTree::waitForMutation(Int64 version)
+void StorageMergeTree::waitForMutation(Int64 version, bool wait_for_another_mutation)
 {
     String mutation_id = MergeTreeMutationEntry::versionToFileName(version);
-    waitForMutation(version, mutation_id);
+    waitForMutation(version, mutation_id, wait_for_another_mutation);
 }
 
-void StorageMergeTree::waitForMutation(const String & mutation_id)
+void StorageMergeTree::waitForMutation(const String & mutation_id, bool wait_for_another_mutation)
 {
     Int64 version = MergeTreeMutationEntry::parseFileName(mutation_id);
-    waitForMutation(version, mutation_id);
+    waitForMutation(version, mutation_id, wait_for_another_mutation);
 }
 
-void StorageMergeTree::waitForMutation(Int64 version, const String & mutation_id)
+void StorageMergeTree::waitForMutation(Int64 version, const String & mutation_id, bool wait_for_another_mutation)
 {
     LOG_INFO(log, "Waiting mutation: {}", mutation_id);
     {
-        auto check = [version, this]()
+        auto check = [version, wait_for_another_mutation, this]()
         {
             if (shutdown_called)
                 return true;
-            auto mutation_status = getIncompleteMutationsStatus(version);
+            auto mutation_status = getIncompleteMutationsStatus(version, nullptr, wait_for_another_mutation);
             return !mutation_status || mutation_status->is_done || !mutation_status->latest_fail_reason.empty();
         };
 
@@ -571,7 +571,7 @@ void StorageMergeTree::waitForMutation(Int64 version, const String & mutation_id
     std::set<String> mutation_ids;
     mutation_ids.insert(mutation_id);
 
-    auto mutation_status = getIncompleteMutationsStatus(version, &mutation_ids);
+    auto mutation_status = getIncompleteMutationsStatus(version, &mutation_ids, wait_for_another_mutation);
     checkMutationStatus(mutation_status, mutation_ids);
 
     LOG_INFO(log, "Mutation {} done", mutation_id);
@@ -622,7 +622,8 @@ bool comparator(const PartVersionWithName & f, const PartVersionWithName & s)
 
 }
 
-std::optional<MergeTreeMutationStatus> StorageMergeTree::getIncompleteMutationsStatus(Int64 mutation_version, std::set<String> * mutation_ids) const
+std::optional<MergeTreeMutationStatus> StorageMergeTree::getIncompleteMutationsStatus(
+    Int64 mutation_version, std::set<String> * mutation_ids, bool from_another_mutation) const
 {
     std::lock_guard lock(currently_processing_in_background_mutex);
 
@@ -636,7 +637,9 @@ std::optional<MergeTreeMutationStatus> StorageMergeTree::getIncompleteMutationsS
     const auto & mutation_entry = current_mutation_it->second;
 
     auto txn = tryGetTransactionForMutation(mutation_entry, log);
-    assert(txn || mutation_entry.tid.isPrehistoric());
+    /// There's no way a transaction may finish before a mutation that was started by the transaction.
+    /// But sometimes we need to check status of an unrelated mutation, in this case we don't care about transactions.
+    assert(txn || mutation_entry.tid.isPrehistoric() || from_another_mutation);
     auto data_parts = getVisibleDataPartsVector(txn);
     for (const auto & data_part : data_parts)
     {
@@ -661,7 +664,7 @@ std::optional<MergeTreeMutationStatus> StorageMergeTree::getIncompleteMutationsS
                             mutation_ids->insert(it->second.file_name);
                 }
             }
-            else if (txn)
+            else if (txn && !from_another_mutation)
             {
                 /// Part is locked by concurrent transaction, most likely it will never be mutated
                 TIDHash part_locked = data_part->version.removal_tid_lock.load();
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 78bd6e3f374..66780402a41 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -192,9 +192,9 @@ private:
     /// and into in-memory structures. Wake up merge-mutation task.
     Int64 startMutation(const MutationCommands & commands, ContextPtr query_context);
     /// Wait until mutation with version will finish mutation for all parts
-    void waitForMutation(Int64 version);
-    void waitForMutation(const String & mutation_id) override;
-    void waitForMutation(Int64 version, const String & mutation_id);
+    void waitForMutation(Int64 version, bool wait_for_another_mutation = false);
+    void waitForMutation(const String & mutation_id, bool wait_for_another_mutation) override;
+    void waitForMutation(Int64 version, const String & mutation_id, bool wait_for_another_mutation = false);
     void setMutationCSN(const String & mutation_id, CSN csn) override;
 
 
@@ -255,7 +255,8 @@ private:
     /// because we can execute several mutations at once. Order is important for
     /// better readability of exception message. If mutation was killed doesn't
     /// return any ids.
-    std::optional<MergeTreeMutationStatus> getIncompleteMutationsStatus(Int64 mutation_version, std::set<String> * mutation_ids = nullptr) const;
+    std::optional<MergeTreeMutationStatus> getIncompleteMutationsStatus(Int64 mutation_version, std::set<String> * mutation_ids = nullptr,
+                                                                        bool from_another_mutation = false) const;
 
     void fillNewPartName(MutableDataPartPtr & part, DataPartsLock & lock);
 
diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp
index 8e1a799fa07..e013199c584 100644
--- a/src/Storages/StoragePostgreSQL.cpp
+++ b/src/Storages/StoragePostgreSQL.cpp
@@ -345,6 +345,7 @@ private:
         PreparedInsert(pqxx::connection & connection_, const String & table, const String & schema,
                        const ColumnsWithTypeAndName & columns, const String & on_conflict_)
             : Inserter(connection_)
+            , statement_name("insert_" + getHexUIntLowercase(thread_local_rng()))
         {
             WriteBufferFromOwnString buf;
             buf << getInsertQuery(schema, table, columns, IdentifierQuotingStyle::DoubleQuotes);
@@ -357,12 +358,14 @@ private:
             }
             buf << ") ";
             buf << on_conflict_;
-            connection.prepare("insert", buf.str());
+            connection.prepare(statement_name, buf.str());
+            prepared = true;
         }
 
         void complete() override
         {
-            connection.unprepare("insert");
+            connection.unprepare(statement_name);
+            prepared = false;
             tx.commit();
         }
 
@@ -371,8 +374,24 @@ private:
             pqxx::params params;
             params.reserve(row.size());
             params.append_multi(row);
-            tx.exec_prepared("insert", params);
+            tx.exec_prepared(statement_name, params);
         }
+
+        ~PreparedInsert() override
+        {
+            try
+            {
+                if (prepared)
+                    connection.unprepare(statement_name);
+            }
+            catch (...)
+            {
+                tryLogCurrentException(__PRETTY_FUNCTION__);
+            }
+        }
+
+        const String statement_name;
+        bool prepared = false;
     };
 
     StorageMetadataPtr metadata_snapshot;
@@ -397,7 +416,7 @@ StoragePostgreSQL::Configuration StoragePostgreSQL::processNamedCollectionResult
         required_arguments.insert("table");
 
     validateNamedCollection<ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>>(
-        named_collection, required_arguments, {"schema", "on_conflict", "addresses_expr", "host", "hostname", "port"});
+        named_collection, required_arguments, {"schema", "on_conflict", "addresses_expr", "host", "hostname", "port", "use_tables_cache"});
 
     configuration.addresses_expr = named_collection.getOrDefault<String>("addresses_expr", "");
     if (configuration.addresses_expr.empty())
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 7bca3cbf581..8155f4fb98f 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -131,6 +131,7 @@ namespace ProfileEvents
 namespace CurrentMetrics
 {
     extern const Metric BackgroundFetchesPoolTask;
+    extern const Metric ReadonlyReplica;
 }
 
 namespace DB
@@ -1330,6 +1331,11 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
                     uncovered_unexpected_parts.size(), uncovered_unexpected_parts_rows, unexpected_parts_nonnew, unexpected_parts_nonnew_rows,
                     parts_to_fetch.size(), parts_to_fetch_blocks, covered_unexpected_parts.size(), unexpected_parts_rows - uncovered_unexpected_parts_rows);
     }
+    else
+    {
+        if (!parts_to_fetch.empty())
+            LOG_DEBUG(log, "Found parts to fetch (exist in zookeeper, but not locally): [{}]", fmt::join(parts_to_fetch, ", "));
+    }
 
     /// Add to the queue jobs to pick up the missing parts from other replicas and remove from ZK the information that we have them.
     queue.setBrokenPartsToEnqueueFetchesOnLoading(std::move(parts_to_fetch));
@@ -1468,16 +1474,18 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil:
 }
 
 MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAndCommit(Transaction & transaction,
-    const MutableDataPartPtr & part, std::optional<MergeTreeData::HardlinkedFiles> hardlinked_files)
+    const MutableDataPartPtr & part, std::optional<MergeTreeData::HardlinkedFiles> hardlinked_files, bool replace_zero_copy_lock)
 {
     auto zookeeper = getZooKeeper();
 
     while (true)
     {
+        LOG_DEBUG(log, "Committing part {} to zookeeper", part->name);
         Coordination::Requests ops;
         NameSet absent_part_paths_on_replicas;
 
-        lockSharedData(*part, false, hardlinked_files);
+        getLockSharedDataOps(*part, std::make_shared<ZooKeeperWithFaultInjection>(zookeeper), replace_zero_copy_lock, hardlinked_files, ops);
+        size_t zero_copy_lock_ops_size = ops.size();
 
         /// Checksums are checked here and `ops` is filled. In fact, the part is added to ZK just below, when executing `multi`.
         checkPartChecksumsAndAddCommitOps(zookeeper, part, ops, part->name, &absent_part_paths_on_replicas);
@@ -1505,11 +1513,14 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd
         Coordination::Responses responses;
         Coordination::Error e = zookeeper->tryMulti(ops, responses);
         if (e == Coordination::Error::ZOK)
+        {
+            LOG_DEBUG(log, "Part {} committed to zookeeper", part->name);
             return transaction.commit();
+        }
 
         if (e == Coordination::Error::ZNODEEXISTS)
         {
-            size_t num_check_ops = 2 * absent_part_paths_on_replicas.size();
+            size_t num_check_ops = 2 * absent_part_paths_on_replicas.size() + zero_copy_lock_ops_size;
             size_t failed_op_index = zkutil::getFailedOpIndex(e, responses);
             if (failed_op_index < num_check_ops)
             {
@@ -2470,8 +2481,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
         {
             /// We check that it was not suddenly upgraded to new version.
             /// Otherwise it can be upgraded and instantly become lost, but we cannot notice that.
-            ops.push_back(zkutil::makeCreateRequest(fs::path(source_path) / "is_lost", "0", zkutil::CreateMode::Persistent));
-            ops.push_back(zkutil::makeRemoveRequest(fs::path(source_path) / "is_lost", -1));
+            zkutil::addCheckNotExistsRequest(ops, *zookeeper, fs::path(source_path) / "is_lost");
         }
         else /// The replica we clone should not suddenly become lost.
             ops.push_back(zkutil::makeCheckRequest(fs::path(source_path) / "is_lost", source_is_lost_stat.version));
@@ -4161,7 +4171,7 @@ bool StorageReplicatedMergeTree::fetchPart(
             Transaction transaction(*this, NO_TRANSACTION_RAW);
             renameTempPartAndReplace(part, transaction);
 
-            replaced_parts = checkPartChecksumsAndCommit(transaction, part, hardlinked_files);
+            replaced_parts = checkPartChecksumsAndCommit(transaction, part, hardlinked_files, !part_to_clone);
 
             /** If a quorum is tracked for this part, you must update it.
               * If you do not have time, in case of losing the session, when you restart the server - see the `ReplicatedMergeTreeRestartingThread::updateQuorumIfWeHavePart` method.
@@ -4358,7 +4368,21 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread)
 {
     /// Do not start replication if ZooKeeper is not configured or there is no metadata in zookeeper
     if (!has_metadata_in_zookeeper.has_value() || !*has_metadata_in_zookeeper)
+    {
+        if (!since_metadata_err_incr_readonly_metric)
+        {
+            since_metadata_err_incr_readonly_metric = true;
+            CurrentMetrics::add(CurrentMetrics::ReadonlyReplica);
+        }
         return;
+    }
+
+    if (since_metadata_err_incr_readonly_metric)
+    {
+        since_metadata_err_incr_readonly_metric = false;
+        CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica);
+        assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0);
+    }
 
     try
     {
@@ -5973,6 +5997,7 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit
     /// NOTE: consider convert to UInt64
     res.parts_to_check = static_cast<UInt32>(part_check_thread.size());
 
+    res.zookeeper_name = zookeeper_name;
     res.zookeeper_path = zookeeper_path;
     res.replica_name = replica_name;
     res.replica_path = replica_path;
@@ -7622,7 +7647,7 @@ bool StorageReplicatedMergeTree::waitForProcessingQueue(UInt64 max_wait_millisec
     background_operations_assignee.trigger();
 
     std::unordered_set<String> wait_for_ids;
-    bool was_interrupted = false;
+    std::atomic_bool was_interrupted = false;
 
     Poco::Event target_entry_event;
     auto callback = [this, &target_entry_event, &wait_for_ids, &was_interrupted, sync_mode]
@@ -8121,31 +8146,31 @@ std::optional<String> StorageReplicatedMergeTree::tryGetTableSharedIDFromCreateQ
 }
 
 
-void StorageReplicatedMergeTree::lockSharedDataTemporary(const String & part_name, const String & part_id, const DiskPtr & disk) const
+zkutil::EphemeralNodeHolderPtr StorageReplicatedMergeTree::lockSharedDataTemporary(const String & part_name, const String & part_id, const DiskPtr & disk) const
 {
     auto settings = getSettings();
 
     if (!disk || !disk->supportZeroCopyReplication() || !settings->allow_remote_fs_zero_copy_replication)
-        return;
+        return {};
 
     zkutil::ZooKeeperPtr zookeeper = tryGetZooKeeper();
     if (!zookeeper)
-        return;
+        return {};
 
     String id = part_id;
     boost::replace_all(id, "/", "_");
 
-    Strings zc_zookeeper_paths = getZeroCopyPartPath(*getSettings(), toString(disk->getDataSourceDescription().type), getTableSharedID(),
-        part_name, zookeeper_path);
+    String zc_zookeeper_path = getZeroCopyPartPath(*getSettings(), toString(disk->getDataSourceDescription().type), getTableSharedID(),
+        part_name, zookeeper_path)[0];
 
-    for (const auto & zc_zookeeper_path : zc_zookeeper_paths)
-    {
-        String zookeeper_node = fs::path(zc_zookeeper_path) / id / replica_name;
+    String zookeeper_node = fs::path(zc_zookeeper_path) / id / replica_name;
 
-        LOG_TRACE(log, "Set zookeeper temporary ephemeral lock {}", zookeeper_node);
-        createZeroCopyLockNode(
-            std::make_shared<ZooKeeperWithFaultInjection>(zookeeper), zookeeper_node, zkutil::CreateMode::Ephemeral, false);
-    }
+    LOG_TRACE(log, "Set zookeeper temporary ephemeral lock {}", zookeeper_node);
+    createZeroCopyLockNode(
+        std::make_shared<ZooKeeperWithFaultInjection>(zookeeper), zookeeper_node, zkutil::CreateMode::Ephemeral, false);
+
+    LOG_TRACE(log, "Zookeeper temporary ephemeral lock {} created", zookeeper_node);
+    return zkutil::EphemeralNodeHolder::existing(zookeeper_node, *zookeeper);
 }
 
 void StorageReplicatedMergeTree::lockSharedData(
@@ -8153,6 +8178,7 @@ void StorageReplicatedMergeTree::lockSharedData(
     bool replace_existing_lock,
     std::optional<HardlinkedFiles> hardlinked_files) const
 {
+    LOG_DEBUG(log, "Trying to create zero-copy lock for part {}", part.name);
     auto zookeeper = tryGetZooKeeper();
     if (zookeeper)
         return lockSharedData(part, std::make_shared<ZooKeeperWithFaultInjection>(zookeeper), replace_existing_lock, hardlinked_files);
@@ -8160,6 +8186,54 @@ void StorageReplicatedMergeTree::lockSharedData(
         return lockSharedData(part, std::make_shared<ZooKeeperWithFaultInjection>(nullptr), replace_existing_lock, hardlinked_files);
 }
 
+void StorageReplicatedMergeTree::getLockSharedDataOps(
+    const IMergeTreeDataPart & part,
+    const ZooKeeperWithFaultInjectionPtr & zookeeper,
+    bool replace_existing_lock,
+    std::optional<HardlinkedFiles> hardlinked_files,
+    Coordination::Requests & requests) const
+{
+    auto settings = getSettings();
+
+    if (!part.isStoredOnDisk() || !settings->allow_remote_fs_zero_copy_replication)
+        return;
+
+    if (!part.getDataPartStorage().supportZeroCopyReplication())
+        return;
+
+    if (zookeeper->isNull())
+        return;
+
+    String id = part.getUniqueId();
+    boost::replace_all(id, "/", "_");
+
+    Strings zc_zookeeper_paths = getZeroCopyPartPath(
+        *getSettings(), part.getDataPartStorage().getDiskType(), getTableSharedID(),
+        part.name, zookeeper_path);
+
+    String path_to_set_hardlinked_files;
+    NameSet hardlinks;
+
+    if (hardlinked_files.has_value() && !hardlinked_files->hardlinks_from_source_part.empty())
+    {
+        path_to_set_hardlinked_files = getZeroCopyPartPath(
+            *getSettings(), part.getDataPartStorage().getDiskType(), hardlinked_files->source_table_shared_id,
+            hardlinked_files->source_part_name, zookeeper_path)[0];
+
+        hardlinks = hardlinked_files->hardlinks_from_source_part;
+    }
+
+    for (const auto & zc_zookeeper_path : zc_zookeeper_paths)
+    {
+        String zookeeper_node = fs::path(zc_zookeeper_path) / id / replica_name;
+
+        getZeroCopyLockNodeCreateOps(
+            zookeeper, zookeeper_node, requests, zkutil::CreateMode::Persistent,
+            replace_existing_lock, path_to_set_hardlinked_files, hardlinks);
+    }
+}
+
+
 void StorageReplicatedMergeTree::lockSharedData(
     const IMergeTreeDataPart & part,
     const ZooKeeperWithFaultInjectionPtr & zookeeper,
@@ -8200,11 +8274,13 @@ void StorageReplicatedMergeTree::lockSharedData(
     {
         String zookeeper_node = fs::path(zc_zookeeper_path) / id / replica_name;
 
-        LOG_TRACE(log, "Set zookeeper persistent lock {}", zookeeper_node);
+        LOG_TRACE(log, "Trying to create zookeeper persistent lock {}", zookeeper_node);
 
         createZeroCopyLockNode(
             zookeeper, zookeeper_node, zkutil::CreateMode::Persistent,
             replace_existing_lock, path_to_set_hardlinked_files, hardlinks);
+
+        LOG_TRACE(log, "Zookeeper persistent lock {} created", zookeeper_node);
     }
 }
 
@@ -8338,6 +8414,7 @@ std::pair<bool, NameSet> getParentLockedBlobs(const ZooKeeperWithFaultInjectionP
     /// all_0_0_0_1
     /// all_0_0_0
     std::sort(parts_infos.begin(), parts_infos.end());
+    std::string part_info_str = part_info.getPartNameV1();
 
     /// In reverse order to process from bigger to smaller
     for (const auto & [parent_candidate_info, part_candidate_info_str] : parts_infos | std::views::reverse)
@@ -8348,7 +8425,7 @@ std::pair<bool, NameSet> getParentLockedBlobs(const ZooKeeperWithFaultInjectionP
         /// We are mutation child of this parent
         if (part_info.isMutationChildOf(parent_candidate_info))
         {
-            LOG_TRACE(log, "Found mutation parent {} for part {}", part_candidate_info_str, part_info.getPartNameV1());
+            LOG_TRACE(log, "Found mutation parent {} for part {}", part_candidate_info_str, part_info_str);
             /// Get hardlinked files
             String files_not_to_remove_str;
             Coordination::Error code;
@@ -8365,6 +8442,7 @@ std::pair<bool, NameSet> getParentLockedBlobs(const ZooKeeperWithFaultInjectionP
             return {true, files_not_to_remove};
         }
     }
+    LOG_TRACE(log, "No mutation parent found for part {}", part_info_str);
     return {false, files_not_to_remove};
 }
 
@@ -8416,6 +8494,10 @@ std::pair<bool, NameSet> StorageReplicatedMergeTree::unlockSharedDataByID(
                     LOG_INFO(logger, "Lock on path {} for part {} doesn't exist, refuse to remove blobs", zookeeper_part_replica_node, part_name);
                     return {false, {}};
                 }
+                else
+                {
+                    LOG_INFO(logger, "Lock on path {} for part {} doesn't exist, but we don't have mutation parent, can remove blobs", zookeeper_part_replica_node, part_name);
+                }
             }
             else
             {
@@ -8883,8 +8965,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP
                 /// We must be sure that this part doesn't exist on other replicas
                 if (!zookeeper->exists(current_part_path))
                 {
-                    ops.emplace_back(zkutil::makeCreateRequest(current_part_path, "", zkutil::CreateMode::Persistent));
-                    ops.emplace_back(zkutil::makeRemoveRequest(current_part_path, -1));
+                    zkutil::addCheckNotExistsRequest(ops, *zookeeper, current_part_path);
                 }
                 else
                 {
@@ -8937,6 +9018,46 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP
     return true;
 }
 
+void StorageReplicatedMergeTree::getZeroCopyLockNodeCreateOps(
+    const ZooKeeperWithFaultInjectionPtr & zookeeper, const String & zookeeper_node, Coordination::Requests & requests,
+    int32_t mode, bool replace_existing_lock,
+    const String & path_to_set_hardlinked_files, const NameSet & hardlinked_files)
+{
+
+    /// Ephemeral locks can be created only when we fetch shared data.
+    /// So it never require to create ancestors. If we create them
+    /// race condition with source replica drop is possible.
+    if (mode == zkutil::CreateMode::Persistent)
+        zookeeper->checkExistsAndGetCreateAncestorsOps(zookeeper_node, requests);
+
+    if (replace_existing_lock && zookeeper->exists(zookeeper_node))
+    {
+        requests.emplace_back(zkutil::makeRemoveRequest(zookeeper_node, -1));
+        requests.emplace_back(zkutil::makeCreateRequest(zookeeper_node, "", mode));
+        if (!path_to_set_hardlinked_files.empty() && !hardlinked_files.empty())
+        {
+            std::string data = boost::algorithm::join(hardlinked_files, "\n");
+            /// List of files used to detect hardlinks. path_to_set_hardlinked_files --
+            /// is a path to source part zero copy node. During part removal hardlinked
+            /// files will be left for source part.
+            requests.emplace_back(zkutil::makeSetRequest(path_to_set_hardlinked_files, data, -1));
+        }
+    }
+    else
+    {
+        Coordination::Requests ops;
+        if (!path_to_set_hardlinked_files.empty() && !hardlinked_files.empty())
+        {
+            std::string data = boost::algorithm::join(hardlinked_files, "\n");
+            /// List of files used to detect hardlinks. path_to_set_hardlinked_files --
+            /// is a path to source part zero copy node. During part removal hardlinked
+            /// files will be left for source part.
+            requests.emplace_back(zkutil::makeSetRequest(path_to_set_hardlinked_files, data, -1));
+        }
+        requests.emplace_back(zkutil::makeCreateRequest(zookeeper_node, "", mode));
+    }
+}
+
 
 void StorageReplicatedMergeTree::createZeroCopyLockNode(
     const ZooKeeperWithFaultInjectionPtr & zookeeper, const String & zookeeper_node, int32_t mode,
@@ -8948,75 +9069,49 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode(
     bool created = false;
     for (int attempts = 5; attempts > 0; --attempts)
     {
-        try
+        Coordination::Requests ops;
+        Coordination::Responses responses;
+        getZeroCopyLockNodeCreateOps(zookeeper, zookeeper_node, ops, mode, replace_existing_lock, path_to_set_hardlinked_files, hardlinked_files);
+        auto error = zookeeper->tryMulti(ops, responses);
+        if (error == Coordination::Error::ZOK)
         {
-            /// Ephemeral locks can be created only when we fetch shared data.
-            /// So it never require to create ancestors. If we create them
-            /// race condition with source replica drop is possible.
-            if (mode == zkutil::CreateMode::Persistent)
-                zookeeper->createAncestors(zookeeper_node);
-
-            if (replace_existing_lock && zookeeper->exists(zookeeper_node))
-            {
-                Coordination::Requests ops;
-                ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_node, -1));
-                ops.emplace_back(zkutil::makeCreateRequest(zookeeper_node, "", mode));
-                if (!path_to_set_hardlinked_files.empty() && !hardlinked_files.empty())
-                {
-                    std::string data = boost::algorithm::join(hardlinked_files, "\n");
-                    /// List of files used to detect hardlinks. path_to_set_hardlinked_files --
-                    /// is a path to source part zero copy node. During part removal hardlinked
-                    /// files will be left for source part.
-                    ops.emplace_back(zkutil::makeSetRequest(path_to_set_hardlinked_files, data, -1));
-                }
-                Coordination::Responses responses;
-                auto error = zookeeper->tryMulti(ops, responses);
-                if (error == Coordination::Error::ZOK)
-                {
-                    created = true;
-                    break;
-                }
-                else if (error == Coordination::Error::ZNONODE && mode != zkutil::CreateMode::Persistent)
-                {
-                    throw Exception(ErrorCodes::NOT_FOUND_NODE,
-                                    "Cannot create ephemeral zero copy lock {} because part was unlocked from zookeeper", zookeeper_node);
-                }
-            }
-            else
-            {
-                Coordination::Requests ops;
-                if (!path_to_set_hardlinked_files.empty() && !hardlinked_files.empty())
-                {
-                    std::string data = boost::algorithm::join(hardlinked_files, "\n");
-                    /// List of files used to detect hardlinks. path_to_set_hardlinked_files --
-                    /// is a path to source part zero copy node. During part removal hardlinked
-                    /// files will be left for source part.
-                    ops.emplace_back(zkutil::makeSetRequest(path_to_set_hardlinked_files, data, -1));
-                }
-                ops.emplace_back(zkutil::makeCreateRequest(zookeeper_node, "", mode));
-
-                Coordination::Responses responses;
-                auto error = zookeeper->tryMulti(ops, responses);
-                if (error == Coordination::Error::ZOK || error == Coordination::Error::ZNODEEXISTS)
-                {
-                    created = true;
-                    break;
-                }
-                else if (error == Coordination::Error::ZNONODE && mode != zkutil::CreateMode::Persistent)
-                {
-                    /// Ephemeral locks used during fetches so if parent node was removed we cannot do anything
-                    throw Exception(ErrorCodes::NOT_FOUND_NODE,
-                                    "Cannot create ephemeral zero copy lock {} because part was unlocked from zookeeper", zookeeper_node);
-                }
-            }
+            created = true;
+            break;
         }
-        catch (const zkutil::KeeperException & e)
+        else if (mode == zkutil::CreateMode::Persistent)
         {
-            if (e.code == Coordination::Error::ZNONODE)
+            if (error == Coordination::Error::ZNONODE)
                 continue;
 
-            throw;
+            if (error == Coordination::Error::ZNODEEXISTS)
+            {
+                size_t failed_op = zkutil::getFailedOpIndex(error, responses);
+                /// Part was locked before, unfortunately it's possible during moves
+                if (ops[failed_op]->getPath() == zookeeper_node)
+                {
+                    created = true;
+                    break;
+                }
+                continue;
+            }
         }
+        else if (mode == zkutil::CreateMode::Ephemeral)
+        {
+            /// It is super rare case when we had part, but it was lost and we were unable to unlock it from keeper.
+            /// Now we are trying to fetch it from other replica and unlocking.
+            if (error == Coordination::Error::ZNODEEXISTS)
+            {
+                size_t failed_op = zkutil::getFailedOpIndex(error, responses);
+                if (ops[failed_op]->getPath() == zookeeper_node)
+                {
+                    LOG_WARNING(&Poco::Logger::get("ZeroCopyLocks"), "Replacing persistent lock with ephemeral for path {}. It can happen only in case of local part loss", zookeeper_node);
+                    replace_existing_lock = true;
+                    continue;
+                }
+            }
+        }
+
+        zkutil::KeeperMultiException::check(error, ops, responses);
     }
 
     if (!created)
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index e81be299144..3a8025d3e78 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -252,7 +252,14 @@ public:
         bool replace_existing_lock,
         std::optional<HardlinkedFiles> hardlinked_files) const;
 
-    void lockSharedDataTemporary(const String & part_name, const String & part_id, const DiskPtr & disk) const;
+    void getLockSharedDataOps(
+        const IMergeTreeDataPart & part,
+        const ZooKeeperWithFaultInjectionPtr & zookeeper,
+        bool replace_existing_lock,
+        std::optional<HardlinkedFiles> hardlinked_files,
+        Coordination::Requests & requests) const;
+
+    zkutil::EphemeralNodeHolderPtr lockSharedDataTemporary(const String & part_name, const String & part_id, const DiskPtr & disk) const;
 
     /// Unlock shared data part in zookeeper
     /// Return true if data unlocked
@@ -382,6 +389,11 @@ private:
     /// If false - ZooKeeper is available, but there is no table metadata. It's safe to drop table in this case.
     std::optional<bool> has_metadata_in_zookeeper;
 
+    /// during server restart or attach table process, set since_metadata_err_incr_readonly_metric = true and increase readonly metric if has_metadata_in_zookeeper = false.
+    /// during detach or drop table process, decrease readonly metric if since_metadata_err_incr_readonly_metric = true.
+    /// during restore replica process, set since_metadata_err_incr_readonly_metric = false and decrease readonly metric if since_metadata_err_incr_readonly_metric = true.
+    bool since_metadata_err_incr_readonly_metric = false;
+
     static const String default_zookeeper_name;
     const String zookeeper_name;
     const String zookeeper_path;
@@ -544,7 +556,7 @@ private:
     String getChecksumsForZooKeeper(const MergeTreeDataPartChecksums & checksums) const;
 
     /// Accepts a PreActive part, atomically checks its checksums with ones on other replicas and commit the part
-    DataPartsVector checkPartChecksumsAndCommit(Transaction & transaction, const MutableDataPartPtr & part, std::optional<HardlinkedFiles> hardlinked_files = {});
+    DataPartsVector checkPartChecksumsAndCommit(Transaction & transaction, const MutableDataPartPtr & part, std::optional<HardlinkedFiles> hardlinked_files = {}, bool replace_zero_copy_lock=false);
 
     bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const override;
 
@@ -863,6 +875,12 @@ private:
         int32_t mode = zkutil::CreateMode::Persistent, bool replace_existing_lock = false,
         const String & path_to_set_hardlinked_files = "", const NameSet & hardlinked_files = {});
 
+    static void getZeroCopyLockNodeCreateOps(
+        const ZooKeeperWithFaultInjectionPtr & zookeeper, const String & zookeeper_node, Coordination::Requests & requests,
+        int32_t mode = zkutil::CreateMode::Persistent, bool replace_existing_lock = false,
+        const String & path_to_set_hardlinked_files = "", const NameSet & hardlinked_files = {});
+
+
     bool removeDetachedPart(DiskPtr disk, const String & path, const String & part_name) override;
 
     /// Create freeze metadata for table and save in zookeeper. Required only if zero-copy replication enabled.
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index bd3e8fe886d..4e119027809 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1,7 +1,5 @@
 #include "config.h"
 #include <Common/ProfileEvents.h>
-#include "IO/ParallelReadBuffer.h"
-#include "IO/IOThreadPool.h"
 #include "Parsers/ASTCreateQuery.h"
 
 #if USE_AWS_S3
@@ -12,6 +10,8 @@
 
 #include <IO/S3Common.h>
 #include <IO/S3/Requests.h>
+#include <IO/ParallelReadBuffer.h>
+#include <IO/SharedThreadPools.h>
 
 #include <Interpreters/TreeRewriter.h>
 #include <Interpreters/evaluateConstantExpression.h>
@@ -648,9 +648,9 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createAsyncS3ReadBuffer(
 {
     auto read_buffer_creator =
         [this, read_settings, object_size]
-        (const std::string & path, size_t read_until_position) -> std::shared_ptr<ReadBufferFromFileBase>
+        (const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
     {
-        return std::make_shared<ReadBufferFromS3>(
+        return std::make_unique<ReadBufferFromS3>(
             client,
             bucket,
             path,
@@ -1252,6 +1252,7 @@ void StorageS3::Configuration::connect(ContextPtr context)
         credentials.GetAWSAccessKeyId(),
         credentials.GetAWSSecretKey(),
         auth_settings.server_side_encryption_customer_key_base64,
+        auth_settings.server_side_encryption_kms_config,
         std::move(headers),
         S3::CredentialsConfiguration{
         auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)),
diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp
index 220cc1dc1f6..fb99d95ef52 100644
--- a/src/Storages/StorageS3Cluster.cpp
+++ b/src/Storages/StorageS3Cluster.cpp
@@ -140,7 +140,8 @@ Pipe StorageS3Cluster::read(
         /* only_replace_in_join_= */true);
     visitor.visit(query_to_send);
 
-    const auto & current_settings = context->getSettingsRef();
+    auto new_context = IStorageCluster::updateSettingsForTableFunctionCluster(context, context->getSettingsRef());
+    const auto & current_settings = new_context->getSettingsRef();
     auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings);
     for (const auto & shard_info : cluster->getShardsInfo())
     {
@@ -151,7 +152,7 @@ Pipe StorageS3Cluster::read(
                     std::vector<IConnectionPool::Entry>{try_result},
                     queryToString(query_to_send),
                     sample_block,
-                    context,
+                    new_context,
                     /*throttler=*/nullptr,
                     scalars,
                     Tables(),
diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp
index 17a11ba9848..4db44b7b4f7 100644
--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@@ -32,6 +32,7 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(
     : PartUploadSettings(settings)
 {
     String key = config_prefix + "." + setting_name_prefix;
+    strict_upload_part_size = config.getUInt64(key + "strict_upload_part_size", strict_upload_part_size);
     min_upload_part_size = config.getUInt64(key + "min_upload_part_size", min_upload_part_size);
     max_upload_part_size = config.getUInt64(key + "max_upload_part_size", max_upload_part_size);
     upload_part_size_multiply_factor = config.getUInt64(key + "upload_part_size_multiply_factor", upload_part_size_multiply_factor);
@@ -49,10 +50,11 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(
 
 S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedCollection & collection)
 {
+    strict_upload_part_size = collection.getOrDefault<UInt64>("strict_upload_part_size", strict_upload_part_size);
     min_upload_part_size = collection.getOrDefault<UInt64>("min_upload_part_size", min_upload_part_size);
+    max_single_part_upload_size = collection.getOrDefault<UInt64>("max_single_part_upload_size", max_single_part_upload_size);
     upload_part_size_multiply_factor = collection.getOrDefault<UInt64>("upload_part_size_multiply_factor", upload_part_size_multiply_factor);
     upload_part_size_multiply_parts_count_threshold = collection.getOrDefault<UInt64>("upload_part_size_multiply_parts_count_threshold", upload_part_size_multiply_parts_count_threshold);
-    max_single_part_upload_size = collection.getOrDefault<UInt64>("max_single_part_upload_size", max_single_part_upload_size);
 
     /// This configuration is only applicable to s3. Other types of object storage are not applicable or have different meanings.
     storage_class_name = collection.getOrDefault<String>("s3_storage_class", storage_class_name);
@@ -63,6 +65,9 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedC
 
 void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed)
 {
+    if (!if_changed || settings.s3_strict_upload_part_size.changed)
+        strict_upload_part_size = settings.s3_strict_upload_part_size;
+
     if (!if_changed || settings.s3_min_upload_part_size.changed)
         min_upload_part_size = settings.s3_min_upload_part_size;
 
@@ -82,6 +87,12 @@ void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(con
 void S3Settings::RequestSettings::PartUploadSettings::validate()
 {
     static constexpr size_t min_upload_part_size_limit = 5 * 1024 * 1024;
+    if (strict_upload_part_size && strict_upload_part_size < min_upload_part_size_limit)
+        throw Exception(
+            ErrorCodes::INVALID_SETTING_VALUE,
+            "Setting strict_upload_part_size has invalid value {} which is less than the s3 API limit {}",
+            ReadableSize(strict_upload_part_size), ReadableSize(min_upload_part_size_limit));
+
     if (min_upload_part_size < min_upload_part_size_limit)
         throw Exception(
             ErrorCodes::INVALID_SETTING_VALUE,
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index 49cb481626d..cd5be1626b5 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -28,6 +28,7 @@ struct S3Settings
     {
         struct PartUploadSettings
         {
+            size_t strict_upload_part_size = 0;
             size_t min_upload_part_size = 16 * 1024 * 1024;
             size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024;
             size_t upload_part_size_multiply_factor = 2;
diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp
index 64d4609bcfb..f4ef52d7605 100644
--- a/src/Storages/System/StorageSystemClusters.cpp
+++ b/src/Storages/System/StorageSystemClusters.cpp
@@ -1,5 +1,6 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeNullable.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/Context.h>
 #include <Storages/System/StorageSystemClusters.h>
@@ -24,7 +25,10 @@ NamesAndTypesList StorageSystemClusters::getNamesAndTypes()
         {"default_database", std::make_shared<DataTypeString>()},
         {"errors_count", std::make_shared<DataTypeUInt32>()},
         {"slowdowns_count", std::make_shared<DataTypeUInt32>()},
-        {"estimated_recovery_time", std::make_shared<DataTypeUInt32>()}
+        {"estimated_recovery_time", std::make_shared<DataTypeUInt32>()},
+        {"database_shard_name", std::make_shared<DataTypeString>()},
+        {"database_replica_name", std::make_shared<DataTypeString>()},
+        {"is_active", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>())},
     };
 }
 
@@ -32,26 +36,30 @@ NamesAndTypesList StorageSystemClusters::getNamesAndTypes()
 void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
 {
     for (const auto & name_and_cluster : context->getClusters())
-        writeCluster(res_columns, name_and_cluster);
+        writeCluster(res_columns, name_and_cluster, {});
 
     const auto databases = DatabaseCatalog::instance().getDatabases();
     for (const auto & name_and_database : databases)
     {
         if (const auto * replicated = typeid_cast<const DatabaseReplicated *>(name_and_database.second.get()))
         {
+
             if (auto database_cluster = replicated->tryGetCluster())
-                writeCluster(res_columns, {name_and_database.first, database_cluster});
+                writeCluster(res_columns, {name_and_database.first, database_cluster},
+                             replicated->tryGetAreReplicasActive(database_cluster));
         }
     }
 }
 
-void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster)
+void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster,
+                                         const std::vector<UInt8> & is_active)
 {
     const String & cluster_name = name_and_cluster.first;
     const ClusterPtr & cluster = name_and_cluster.second;
     const auto & shards_info = cluster->getShardsInfo();
     const auto & addresses_with_failover = cluster->getShardsAddresses();
 
+    size_t replica_idx = 0;
     for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
     {
         const auto & shard_info = shards_info[shard_index];
@@ -77,6 +85,12 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const Nam
             res_columns[i++]->insert(pool_status[replica_index].error_count);
             res_columns[i++]->insert(pool_status[replica_index].slowdown_count);
             res_columns[i++]->insert(pool_status[replica_index].estimated_recovery_time.count());
+            res_columns[i++]->insert(address.database_shard_name);
+            res_columns[i++]->insert(address.database_replica_name);
+            if (is_active.empty())
+                res_columns[i++]->insertDefault();
+            else
+                res_columns[i++]->insert(is_active[replica_idx++]);
         }
     }
 }
diff --git a/src/Storages/System/StorageSystemClusters.h b/src/Storages/System/StorageSystemClusters.h
index f14446bf4d3..9aa1a6a5183 100644
--- a/src/Storages/System/StorageSystemClusters.h
+++ b/src/Storages/System/StorageSystemClusters.h
@@ -27,7 +27,7 @@ protected:
     using NameAndCluster = std::pair<String, std::shared_ptr<Cluster>>;
 
     void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
-    static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster);
+    static void writeCluster(MutableColumns & res_columns, const NameAndCluster & name_and_cluster, const std::vector<UInt8> & is_active);
 };
 
 }
diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp
index a6704144fde..0f307650c9c 100644
--- a/src/Storages/System/StorageSystemContributors.generated.cpp
+++ b/src/Storages/System/StorageSystemContributors.generated.cpp
@@ -33,6 +33,7 @@ const char * auto_contributors[] {
     "Aleksandra (Ася)",
     "Aleksandrov Vladimir",
     "Aleksei Filatov",
+    "Aleksei Golub",
     "Aleksei Levushkin",
     "Aleksei Semiglazov",
     "Aleksei Tikhomirov",
@@ -159,6 +160,7 @@ const char * auto_contributors[] {
     "Anton Zhabolenko",
     "Antonio Andelic",
     "Antonio Bonuccelli",
+    "Aram Peres",
     "Ariel Robaldo",
     "Arsen Hakobyan",
     "Arslan G",
@@ -167,6 +169,7 @@ const char * auto_contributors[] {
     "Artem Gavrilov",
     "Artem Hnilov",
     "Artem Konovalov",
+    "Artem Pershin",
     "Artem Streltsov",
     "Artem Zuikov",
     "Artemeey",
@@ -317,6 +320,7 @@ const char * auto_contributors[] {
     "Enrique Herreros",
     "Eric",
     "Eric Daniel",
+    "Eric Thomas",
     "Erixonich",
     "Ernest Poletaev",
     "Eugene Galkin",
@@ -353,6 +357,7 @@ const char * auto_contributors[] {
     "Frank Zhao",
     "François Violette",
     "Fred Wulff",
+    "FriendLey",
     "Fruit of Eden",
     "Fu Zhe",
     "Fullstop000",
@@ -455,6 +460,7 @@ const char * auto_contributors[] {
     "Jason Keirstead",
     "Javi Santana",
     "Javi santana bot",
+    "JaySon",
     "JaySon-Huang",
     "Jean Baptiste Favre",
     "Jeffrey Dang",
@@ -482,6 +488,7 @@ const char * auto_contributors[] {
     "Junfu Wu",
     "Jus",
     "Justin Hilliard",
+    "Justin de Guzman",
     "Kang Liu",
     "Karl Pietrzak",
     "Keiji Yoshida",
@@ -661,6 +668,7 @@ const char * auto_contributors[] {
     "Neeke Gao",
     "Neng Liu",
     "NengLiu",
+    "Nick-71",
     "Nickita",
     "Nickita Taranov",
     "Nickolay Yastrebov",
@@ -834,6 +842,7 @@ const char * auto_contributors[] {
     "Seyed Mehrshad Hosseini",
     "Sherry Wang",
     "Shoh Jahon",
+    "Sichen Zhao",
     "SiderZhang",
     "Sidorov Pavel",
     "Silviu Caragea",
@@ -910,6 +919,7 @@ const char * auto_contributors[] {
     "Vadim Skipin",
     "Vadim Volodin",
     "VadimPE",
+    "Vadym Chekan",
     "Vage Ogannisian",
     "Val",
     "Valera Ryaboshapko",
@@ -1030,6 +1040,7 @@ const char * auto_contributors[] {
     "akonyaev",
     "akuzm",
     "alekseik1",
+    "alekseygolub",
     "alesapin",
     "alex filatov",
     "alex-zaitsev",
@@ -1080,6 +1091,7 @@ const char * auto_contributors[] {
     "bobrovskij artemij",
     "booknouse",
     "bseng",
+    "caipengxiang",
     "candiduslynx",
     "canenoneko",
     "caspian",
@@ -1104,6 +1116,8 @@ const char * auto_contributors[] {
     "clarkcaoliu",
     "clickhouse-adrianfraguela",
     "clickhouse-robot-curie",
+    "clundro",
+    "cluster",
     "cms",
     "cmsxbc",
     "cn-ds",
@@ -1263,6 +1277,7 @@ const char * auto_contributors[] {
     "ks1322",
     "kshvakov",
     "kssenii",
+    "kst-morozov",
     "l",
     "l1tsolaiki",
     "lalex",
@@ -1302,6 +1317,7 @@ const char * auto_contributors[] {
     "lixuchun",
     "liyang",
     "liyang830",
+    "lizhuoyu5",
     "lokax",
     "lomberts",
     "loneylee",
@@ -1359,6 +1375,7 @@ const char * auto_contributors[] {
     "mwish",
     "myrrc",
     "nagorny",
+    "natasha",
     "nathanbegbie",
     "nauta",
     "nautaa",
@@ -1388,6 +1405,7 @@ const char * auto_contributors[] {
     "pawelsz-rb",
     "pdai",
     "pdv-ru",
+    "pengxiangcai",
     "peshkurov",
     "peter279k",
     "philip.han",
@@ -1516,6 +1534,7 @@ const char * auto_contributors[] {
     "wzl",
     "xPoSx",
     "xiedeyantu",
+    "xieyichen",
     "xinhuitian",
     "xlwh",
     "yakkomajuri",
@@ -1587,6 +1606,7 @@ const char * auto_contributors[] {
     "万康",
     "何李夫",
     "凌涛",
+    "刘浩林",
     "刘陶峰",
     "吴健",
     "小蝌蚪",
diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp
index 4d1f6c171db..a3d05281b28 100644
--- a/src/Storages/System/StorageSystemDatabases.cpp
+++ b/src/Storages/System/StorageSystemDatabases.cpp
@@ -2,6 +2,7 @@
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeUUID.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/formatWithPossiblyHidingSecrets.h>
 #include <Access/ContextAccess.h>
 #include <Storages/System/StorageSystemDatabases.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -31,7 +32,7 @@ NamesAndAliases StorageSystemDatabases::getNamesAndAliases()
     };
 }
 
-static String getEngineFull(const DatabasePtr & database)
+static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database)
 {
     DDLGuardPtr guard;
     while (true)
@@ -59,7 +60,7 @@ static String getEngineFull(const DatabasePtr & database)
     if (!ast_create || !ast_create->storage)
         return {};
 
-    String engine_full = ast_create->storage->formatWithSecretsHidden();
+    String engine_full = format({ctx, *ast_create->storage});
     static const char * const extra_head = " ENGINE = ";
 
     if (startsWith(engine_full, extra_head))
@@ -87,7 +88,7 @@ void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr c
         res_columns[2]->insert(context->getPath() + database->getDataPath());
         res_columns[3]->insert(database->getMetadataPath());
         res_columns[4]->insert(database->getUUID());
-        res_columns[5]->insert(getEngineFull(database));
+        res_columns[5]->insert(getEngineFull(context, database));
         res_columns[6]->insert(database->getDatabaseComment());
    }
 }
diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp
index 66e610ca653..9f80b994051 100644
--- a/src/Storages/System/StorageSystemDetachedParts.cpp
+++ b/src/Storages/System/StorageSystemDetachedParts.cpp
@@ -8,7 +8,7 @@
 #include <Storages/System/StorageSystemPartsBase.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
 #include <QueryPipeline/Pipe.h>
-#include <IO/IOThreadPool.h>
+#include <IO/SharedThreadPools.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
 
 #include <mutex>
diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp
index 4fa821bf689..8e9ad2ac501 100644
--- a/src/Storages/System/StorageSystemFilesystemCache.cpp
+++ b/src/Storages/System/StorageSystemFilesystemCache.cpp
@@ -18,6 +18,7 @@ NamesAndTypesList StorageSystemFilesystemCache::getNamesAndTypes()
         {"cache_name", std::make_shared<DataTypeString>()},
         {"cache_base_path", std::make_shared<DataTypeString>()},
         {"cache_path", std::make_shared<DataTypeString>()},
+        {"key", std::make_shared<DataTypeString>()},
         {"file_segment_range_begin", std::make_shared<DataTypeUInt64>()},
         {"file_segment_range_end", std::make_shared<DataTypeUInt64>()},
         {"size", std::make_shared<DataTypeUInt64>()},
@@ -45,27 +46,27 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex
         const auto & cache = cache_data->cache;
         auto file_segments = cache->getSnapshot();
 
-        for (const auto & file_segment : file_segments)
+        for (const auto & file_segment : *file_segments)
         {
             res_columns[0]->insert(cache_name);
             res_columns[1]->insert(cache->getBasePath());
 
             /// Do not use `file_segment->getPathInLocalCache` here because it will lead to nullptr dereference
             /// (because file_segments in getSnapshot doesn't have `cache` field set)
-            res_columns[2]->insert(
-                cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind()));
+            res_columns[2]->insert(cache->getPathInLocalCache(file_segment->key(), file_segment->offset(), file_segment->getKind()));
+            res_columns[3]->insert(file_segment->key().toString());
 
             const auto & range = file_segment->range();
-            res_columns[3]->insert(range.left);
-            res_columns[4]->insert(range.right);
-            res_columns[5]->insert(range.size());
-            res_columns[6]->insert(FileSegment::stateToString(file_segment->state()));
-            res_columns[7]->insert(file_segment->getHitsCount());
-            res_columns[8]->insert(file_segment->getRefCount());
-            res_columns[9]->insert(file_segment->getDownloadedSize());
-            res_columns[10]->insert(file_segment->isPersistent());
-            res_columns[11]->insert(toString(file_segment->getKind()));
-            res_columns[12]->insert(file_segment->isUnbound());
+            res_columns[4]->insert(range.left);
+            res_columns[5]->insert(range.right);
+            res_columns[6]->insert(range.size());
+            res_columns[7]->insert(FileSegment::stateToString(file_segment->state()));
+            res_columns[8]->insert(file_segment->getHitsCount());
+            res_columns[9]->insert(file_segment->getRefCount());
+            res_columns[10]->insert(file_segment->getDownloadedSize(false));
+            res_columns[11]->insert(file_segment->isPersistent());
+            res_columns[12]->insert(toString(file_segment->getKind()));
+            res_columns[13]->insert(file_segment->isUnbound());
         }
     }
 }
diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp
index a0a406a974c..f3a297a11d1 100644
--- a/src/Storages/System/StorageSystemFunctions.cpp
+++ b/src/Storages/System/StorageSystemFunctions.cpp
@@ -1,4 +1,5 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <DataTypes/DataTypeMap.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeEnum.h>
@@ -55,15 +56,38 @@ namespace
         if constexpr (std::is_same_v<Factory, FunctionFactory>)
         {
             if (factory.isAlias(name))
+            {
                 res_columns[6]->insertDefault();
+                res_columns[7]->insertDefault();
+                res_columns[8]->insertDefault();
+                res_columns[9]->insertDefault();
+                res_columns[10]->insertDefault();
+                res_columns[11]->insertDefault();
+            }
             else
-                res_columns[6]->insert(factory.getDocumentation(name).description);
+            {
+                auto documentation = factory.getDocumentation(name);
+                res_columns[6]->insert(documentation.description);
+                res_columns[7]->insertDefault();
+                res_columns[8]->insertDefault();
+                res_columns[9]->insertDefault();
+                res_columns[10]->insert(documentation.examplesAsString());
+                res_columns[11]->insert(documentation.categoriesAsString());
+            }
         }
         else
+        {
             res_columns[6]->insertDefault();
+            res_columns[7]->insertDefault();
+            res_columns[8]->insertDefault();
+            res_columns[9]->insertDefault();
+            res_columns[10]->insertDefault();
+            res_columns[11]->insertDefault();
+        }
     }
 }
 
+
 std::vector<std::pair<String, Int8>> getOriginEnumsAndValues()
 {
     return std::vector<std::pair<String, Int8>>{
@@ -83,6 +107,11 @@ NamesAndTypesList StorageSystemFunctions::getNamesAndTypes()
         {"create_query", std::make_shared<DataTypeString>()},
         {"origin", std::make_shared<DataTypeEnum8>(getOriginEnumsAndValues())},
         {"description", std::make_shared<DataTypeString>()},
+        {"syntax", std::make_shared<DataTypeString>()},
+        {"arguments", std::make_shared<DataTypeString>()},
+        {"returned_value", std::make_shared<DataTypeString>()},
+        {"examples", std::make_shared<DataTypeString>()},
+        {"categories", std::make_shared<DataTypeString>()}
     };
 }
 
diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp
index b29836206d0..1f32a0ff700 100644
--- a/src/Storages/System/StorageSystemMerges.cpp
+++ b/src/Storages/System/StorageSystemMerges.cpp
@@ -22,6 +22,7 @@ NamesAndTypesList StorageSystemMerges::getNamesAndTypes()
         {"partition_id", std::make_shared<DataTypeString>()},
         {"is_mutation", std::make_shared<DataTypeUInt8>()},
         {"total_size_bytes_compressed", std::make_shared<DataTypeUInt64>()},
+        {"total_size_bytes_uncompressed", std::make_shared<DataTypeUInt64>()},
         {"total_size_marks", std::make_shared<DataTypeUInt64>()},
         {"bytes_read_uncompressed", std::make_shared<DataTypeUInt64>()},
         {"rows_read", std::make_shared<DataTypeUInt64>()},
@@ -59,6 +60,7 @@ void StorageSystemMerges::fillData(MutableColumns & res_columns, ContextPtr cont
         res_columns[i++]->insert(merge.partition_id);
         res_columns[i++]->insert(merge.is_mutation);
         res_columns[i++]->insert(merge.total_size_bytes_compressed);
+        res_columns[i++]->insert(merge.total_size_bytes_uncompressed);
         res_columns[i++]->insert(merge.total_size_marks);
         res_columns[i++]->insert(merge.bytes_read_uncompressed);
         res_columns[i++]->insert(merge.rows_read);
diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp
index 9a70633155d..eb514d3b3f4 100644
--- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp
+++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp
@@ -76,13 +76,13 @@ Pipe StorageSystemRemoteDataPaths::read(
                     else
                         col_cache_base_path->insertDefault();
                     col_local_path->insert(local_path);
-                    col_remote_path->insert(object.absolute_path);
+                    col_remote_path->insert(object.remote_path);
                     col_size->insert(object.bytes_size);
                     col_namespace->insert(common_prefox_for_objects);
 
                     if (cache)
                     {
-                        auto cache_paths = cache->tryGetCachePaths(cache->hash(object.getPathKeyForCache()));
+                        auto cache_paths = cache->tryGetCachePaths(cache->createKeyForPath(object.remote_path));
                         col_cache_paths->insert(Array(cache_paths.begin(), cache_paths.end()));
                     }
                     else
diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index f1e807a3313..92e04748149 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -39,6 +39,7 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_)
         { "is_session_expired",                   std::make_shared<DataTypeUInt8>()    },
         { "future_parts",                         std::make_shared<DataTypeUInt32>()   },
         { "parts_to_check",                       std::make_shared<DataTypeUInt32>()   },
+        { "zookeeper_name",                       std::make_shared<DataTypeString>()   },
         { "zookeeper_path",                       std::make_shared<DataTypeString>()   },
         { "replica_name",                         std::make_shared<DataTypeString>()   },
         { "replica_path",                         std::make_shared<DataTypeString>()   },
@@ -194,6 +195,7 @@ Pipe StorageSystemReplicas::read(
         res_columns[col_num++]->insert(status.is_session_expired);
         res_columns[col_num++]->insert(status.queue.future_parts);
         res_columns[col_num++]->insert(status.parts_to_check);
+        res_columns[col_num++]->insert(status.zookeeper_name);
         res_columns[col_num++]->insert(status.zookeeper_path);
         res_columns[col_num++]->insert(status.replica_name);
         res_columns[col_num++]->insert(status.replica_path);
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index e94d3710a64..e00d2d95568 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -9,6 +9,7 @@
 #include <Databases/IDatabase.h>
 #include <Access/ContextAccess.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/formatWithPossiblyHidingSecrets.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 #include <Common/typeid_cast.h>
@@ -220,7 +221,7 @@ protected:
                         {
                             auto temp_db = DatabaseCatalog::instance().getDatabaseForTemporaryTables();
                             ASTPtr ast = temp_db ? temp_db->tryGetCreateTableQuery(table.second->getStorageID().getTableName(), context) : nullptr;
-                            res_columns[res_index++]->insert(ast ? ast->formatWithSecretsHidden() : "");
+                            res_columns[res_index++]->insert(ast ? format({context, *ast}) : "");
                         }
 
                         // engine_full
@@ -366,7 +367,7 @@ protected:
                     }
 
                     if (columns_mask[src_index++])
-                        res_columns[res_index++]->insert(ast ? ast->formatWithSecretsHidden() : "");
+                        res_columns[res_index++]->insert(ast ? format({context, *ast}) : "");
 
                     if (columns_mask[src_index++])
                     {
@@ -374,7 +375,7 @@ protected:
 
                         if (ast_create && ast_create->storage)
                         {
-                            engine_full = ast_create->storage->formatWithSecretsHidden();
+                            engine_full = format({context, *ast_create->storage});
 
                             static const char * const extra_head = " ENGINE = ";
                             if (startsWith(engine_full, extra_head))
@@ -388,7 +389,7 @@ protected:
                     {
                         String as_select;
                         if (ast_create && ast_create->select)
-                            as_select = ast_create->select->formatWithSecretsHidden();
+                            as_select = format({context, *ast_create->select});
                         res_columns[res_index++]->insert(as_select);
                     }
                 }
@@ -401,7 +402,7 @@ protected:
                 if (columns_mask[src_index++])
                 {
                     if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPartitionKeyAST()))
-                        res_columns[res_index++]->insert(expression_ptr->formatWithSecretsHidden());
+                        res_columns[res_index++]->insert(format({context, *expression_ptr}));
                     else
                         res_columns[res_index++]->insertDefault();
                 }
@@ -409,7 +410,7 @@ protected:
                 if (columns_mask[src_index++])
                 {
                     if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast))
-                        res_columns[res_index++]->insert(expression_ptr->formatWithSecretsHidden());
+                        res_columns[res_index++]->insert(format({context, *expression_ptr}));
                     else
                         res_columns[res_index++]->insertDefault();
                 }
@@ -417,7 +418,7 @@ protected:
                 if (columns_mask[src_index++])
                 {
                     if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast))
-                        res_columns[res_index++]->insert(expression_ptr->formatWithSecretsHidden());
+                        res_columns[res_index++]->insert(format({context, *expression_ptr}));
                     else
                         res_columns[res_index++]->insertDefault();
                 }
@@ -425,7 +426,7 @@ protected:
                 if (columns_mask[src_index++])
                 {
                     if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSamplingKeyAST()))
-                        res_columns[res_index++]->insert(expression_ptr->formatWithSecretsHidden());
+                        res_columns[res_index++]->insert(format({context, *expression_ptr}));
                     else
                         res_columns[res_index++]->insertDefault();
                 }
diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp
index 4ff00facfdc..c38a2b4ed42 100644
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@@ -162,7 +162,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block
         const ColumnNumbersList grouping_set_keys;
 
         ActionsVisitor::Data visitor_data(
-            context, SizeLimits{}, 1, source_columns, std::move(actions), prepared_sets, true, true, true, false,
+            context, SizeLimits{}, 1, source_columns, std::move(actions), prepared_sets, true, true, true,
             { aggregation_keys, grouping_set_keys, GroupByKind::NONE });
 
         ActionsVisitor(visitor_data).visit(node);
diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp
index 1ee51bcb040..b2f09adf773 100644
--- a/src/TableFunctions/TableFunctionRemote.cpp
+++ b/src/TableFunctions/TableFunctionRemote.cpp
@@ -255,15 +255,18 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr
 
         bool treat_local_as_remote = false;
         bool treat_local_port_as_remote = context->getApplicationType() == Context::ApplicationType::LOCAL;
-        cluster = std::make_shared<Cluster>(
-            context->getSettingsRef(),
-            names,
+        ClusterConnectionParameters params{
             username,
             password,
-            (secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context->getTCPPort()),
+            static_cast<UInt16>(secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context->getTCPPort()),
             treat_local_as_remote,
             treat_local_port_as_remote,
-            secure);
+            secure,
+            /* priority= */ 1,
+            /* cluster_name= */ "",
+            /* password= */ ""
+        };
+        cluster = std::make_shared<Cluster>(context->getSettingsRef(), names, params);
     }
 
     if (!remote_table_function_ptr && table.empty())
diff --git a/src/configure_config.cmake b/src/configure_config.cmake
index 9cb0d0efb39..fedc05e1fdc 100644
--- a/src/configure_config.cmake
+++ b/src/configure_config.cmake
@@ -147,6 +147,9 @@ endif()
 if (TARGET ch_contrib::capnp)
     set(USE_CAPNP 1)
 endif()
+if (TARGET ch_contrib::bcrypt)
+    set(USE_BCRYPT 1)
+endif()
 if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
     set(USE_BORINGSSL 1)
 endif ()
diff --git a/tests/broken_tests.txt b/tests/broken_tests.txt
new file mode 100644
index 00000000000..96c4ea62f55
--- /dev/null
+++ b/tests/broken_tests.txt
@@ -0,0 +1,136 @@
+00223_shard_distributed_aggregation_memory_efficient
+00562_in_subquery_merge_tree
+00593_union_all_assert_columns_removed
+00597_push_down_predicate_long
+00673_subquery_prepared_set_performance
+00700_decimal_compare
+00717_merge_and_distributed
+00725_memory_tracking
+00754_distributed_optimize_skip_select_on_unused_shards
+00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere
+00838_unique_index
+00927_asof_joins
+00940_order_by_read_in_order_query_plan
+00945_bloom_filter_index
+00952_input_function
+00979_set_index_not
+00981_in_subquery_with_tuple
+01049_join_low_card_bug_long
+01062_pm_all_join_with_block_continuation
+01064_incremental_streaming_from_2_src_with_feedback
+01071_force_optimize_skip_unused_shards
+01072_optimize_skip_unused_shards_const_expr_eval
+01083_expressions_in_engine_arguments
+01086_odbc_roundtrip
+01142_join_lc_and_nullable_in_key
+01142_merge_join_lc_and_nullable_in_key
+01152_cross_replication
+01155_rename_move_materialized_view
+01173_transaction_control_queries
+01211_optimize_skip_unused_shards_type_mismatch
+01213_optimize_skip_unused_shards_DISTINCT
+01214_test_storage_merge_aliases_with_where
+01231_distributed_aggregation_memory_efficient_mix_levels
+01232_extremes
+01244_optimize_distributed_group_by_sharding_key
+01247_optimize_distributed_group_by_sharding_key_dist_on_dist
+01268_mv_scalars
+01268_shard_avgweighted
+01270_optimize_skip_unused_shards_low_cardinality
+01319_optimize_skip_unused_shards_nesting
+01353_low_cardinality_join_types
+01428_nullable_asof_join
+01455_shard_leaf_max_rows_bytes_to_read
+01476_right_full_join_switch
+01477_lc_in_merge_join_left_key
+01487_distributed_in_not_default_db
+01495_subqueries_in_with_statement
+01504_rocksdb
+01527_dist_sharding_key_dictGet_reload
+01528_allow_nondeterministic_optimize_skip_unused_shards
+01540_verbatim_partition_pruning
+01560_merge_distributed_join
+01563_distributed_query_finish
+01576_alias_column_rewrite
+01583_const_column_in_set_index
+01584_distributed_buffer_cannot_find_column
+01585_use_index_for_global_in
+01585_use_index_for_global_in_with_null
+01586_columns_pruning
+01615_random_one_shard_insertion
+01624_soft_constraints
+01651_bugs_from_15889
+01655_plan_optimizations
+01655_plan_optimizations_optimize_read_in_window_order
+01656_test_query_log_factories_info
+01681_bloom_filter_nullable_column
+01700_system_zookeeper_path_in
+01710_projection_additional_filters
+01721_join_implicit_cast_long
+01739_index_hint
+01747_join_view_filter_dictionary
+01748_partition_id_pruning
+01756_optimize_skip_unused_shards_rewrite_in
+01757_optimize_skip_unused_shards_limit
+01758_optimize_skip_unused_shards_once
+01759_optimize_skip_unused_shards_zero_shards
+01761_cast_to_enum_nullable
+01786_explain_merge_tree
+01889_key_condition_function_chains
+01890_materialized_distributed_join
+01901_in_literal_shard_prune
+01925_join_materialized_columns
+01925_test_storage_merge_aliases
+01930_optimize_skip_unused_shards_rewrite_in
+01947_mv_subquery
+01951_distributed_push_down_limit
+01952_optimize_distributed_group_by_sharding_key
+02000_join_on_const
+02001_shard_num_shard_count
+02024_join_on_or_long
+02131_used_row_policies_in_query_log
+02139_MV_with_scalar_subquery
+02174_cte_scalar_cache_mv
+02242_join_rocksdb
+02267_join_dup_columns_issue36199
+02302_s3_file_pruning
+02317_distinct_in_order_optimization_explain
+02341_global_join_cte
+02343_aggregation_pipeline
+02345_implicit_transaction
+02346_additional_filters_distr
+02352_grouby_shadows_arg
+02354_annoy
+02366_union_decimal_conversion
+02375_rocksdb_with_filters
+02377_optimize_sorting_by_input_stream_properties_explain
+02382_join_and_filtering_set
+02402_merge_engine_with_view
+02404_memory_bound_merging
+02421_decimal_in_precision_issue_41125
+02426_orc_bug
+02428_decimal_in_floating_point_literal
+02428_parameterized_view
+02458_use_structure_from_insertion_table
+02479_race_condition_between_insert_and_droppin_mv
+02481_merge_array_join_sample_by
+02493_inconsistent_hex_and_binary_number
+02494_optimize_group_by_function_keys_and_alias_columns
+02511_complex_literals_as_aggregate_function_parameters
+02521_aggregation_by_partitions
+02554_fix_grouping_sets_predicate_push_down
+02575_merge_prewhere_different_default_kind
+02713_array_low_cardinality_string
+02707_skip_index_with_in
+02324_map_combinator_bug
+02241_join_rocksdb_bs
+02003_WithMergeableStateAfterAggregationAndLimit_LIMIT_BY_LIMIT_OFFSET
+01626_cnf_fuzz_long
+01115_join_with_dictionary
+01009_global_array_join_names
+00917_multiple_joins_denny_crane
+00725_join_on_bug_1
+00636_partition_key_parts_pruning
+00261_storage_aliases_and_array_join
+01825_type_json_multiple_files
+01281_group_by_limit_memory_tracking
diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py
index 3c2fa05016f..514aaf7e2ac 100644
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@@ -9,19 +9,21 @@ from github import Github
 
 from build_download_helper import get_build_name_for_check, read_build_urls
 from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from commit_status_helper import format_description, post_commit_status
+from commit_status_helper import (
+    RerunHelper,
+    format_description,
+    get_commit,
+    post_commit_status,
+)
 from docker_pull_helper import get_image_with_version
 from env_helper import (
-    GITHUB_REPOSITORY,
     GITHUB_RUN_URL,
     REPORTS_PATH,
-    REPO_COPY,
     TEMP_PATH,
 )
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResult
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 
@@ -41,19 +43,12 @@ def get_run_command(pr_number, sha, download_url, workspace_path, image):
     )
 
 
-def get_commit(gh, commit_sha):
-    repo = gh.get_repo(GITHUB_REPOSITORY)
-    commit = repo.get_commit(commit_sha)
-    return commit
-
-
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
 
     stopwatch = Stopwatch()
 
     temp_path = TEMP_PATH
-    repo_path = REPO_COPY
     reports_path = REPORTS_PATH
 
     check_name = sys.argv[1]
@@ -64,8 +59,9 @@ if __name__ == "__main__":
     pr_info = PRInfo()
 
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
 
-    rerun_helper = RerunHelper(gh, pr_info, check_name)
+    rerun_helper = RerunHelper(commit, check_name)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -172,4 +168,8 @@ if __name__ == "__main__":
 
     logging.info("Result: '%s', '%s', '%s'", status, description, report_url)
     print(f"::notice ::Report url: {report_url}")
-    post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
+    post_commit_status(commit, status, report_url, description, check_name, pr_info)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py
index 1fcdbc40155..cbc9f4f8901 100644
--- a/tests/ci/autoscale_runners_lambda/app.py
+++ b/tests/ci/autoscale_runners_lambda/app.py
@@ -64,7 +64,10 @@ def get_scales(runner_type: str) -> Tuple[int, int]:
     if runner_type == "style-checker":
         # the style checkers have so many noise, so it scales up too quickly
         scale_down = 1
-        scale_up = 10
+        # The 5 was too quick, there are complainings regarding too slow with
+        # 10. I am trying 7 now.
+        # UPDATE THE COMMENT ON CHANGES
+        scale_up = 7
     return scale_down, scale_up
 
 
diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py
index 14ea58500bc..e5ce655bdde 100644
--- a/tests/ci/bugfix_validate_check.py
+++ b/tests/ci/bugfix_validate_check.py
@@ -8,7 +8,7 @@ import os
 
 from github import Github
 
-from commit_status_helper import post_commit_status
+from commit_status_helper import get_commit, post_commit_status
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResults, TestResult
@@ -81,13 +81,14 @@ def main(args):
     )
 
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
     post_commit_status(
-        gh,
-        pr_info.sha,
-        check_name_with_group,
-        "" if is_ok else "Changed tests don't reproduce the bug",
+        commit,
         "success" if is_ok else "error",
         report_url,
+        "" if is_ok else "Changed tests don't reproduce the bug",
+        check_name_with_group,
+        pr_info,
     )
 
 
diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py
index a829069985d..4bc61c79fc0 100644
--- a/tests/ci/build_check.py
+++ b/tests/ci/build_check.py
@@ -9,7 +9,12 @@ import time
 from typing import List, Tuple
 
 from ci_config import CI_CONFIG, BuildConfig
-from commit_status_helper import get_commit_filtered_statuses, get_commit
+from commit_status_helper import (
+    NotSet,
+    get_commit_filtered_statuses,
+    get_commit,
+    post_commit_status,
+)
 from docker_pull_helper import get_image_with_version
 from env_helper import (
     GITHUB_JOB,
@@ -232,10 +237,10 @@ def upload_master_static_binaries(
     print(f"::notice ::Binary static URL: {url}")
 
 
-def mark_failed_reports_pending(build_name: str, sha: str) -> None:
+def mark_failed_reports_pending(build_name: str, pr_info: PRInfo) -> None:
     try:
         gh = GitHub(get_best_robot_token())
-        commit = get_commit(gh, sha)
+        commit = get_commit(gh, pr_info.sha)
         statuses = get_commit_filtered_statuses(commit)
         report_status = [
             name
@@ -248,8 +253,13 @@ def mark_failed_reports_pending(build_name: str, sha: str) -> None:
                     "Commit already have failed status for '%s', setting it to 'pending'",
                     report_status,
                 )
-                commit.create_status(
-                    "pending", status.url, "Set to pending on rerun", report_status
+                post_commit_status(
+                    commit,
+                    "pending",
+                    status.target_url or NotSet,
+                    "Set to pending on rerun",
+                    report_status,
+                    pr_info,
                 )
     except:  # we do not care about any exception here
         logging.info("Failed to get or mark the reports status as pending, continue")
@@ -285,7 +295,7 @@ def main():
     check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config)
 
     # If it's a latter running, we need to mark possible failed status
-    mark_failed_reports_pending(build_name, pr_info.sha)
+    mark_failed_reports_pending(build_name, pr_info)
 
     docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME)
     image_version = docker_image.version
diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py
index c6136015316..47c11ee0911 100644
--- a/tests/ci/build_download_helper.py
+++ b/tests/ci/build_download_helper.py
@@ -6,10 +6,11 @@ import os
 import sys
 import time
 from pathlib import Path
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, List
 
 import requests  # type: ignore
 
+import get_robot_token as grt  # we need an updated ROBOT_TOKEN
 from ci_config import CI_CONFIG
 
 DOWNLOAD_RETRIES_COUNT = 5
@@ -24,22 +25,69 @@ def get_with_retries(
     logging.info(
         "Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url
     )
-    exc = None  # type: Optional[Exception]
+    exc = Exception("A placeholder to satisfy typing and avoid nesting")
     for i in range(retries):
         try:
             response = requests.get(url, **kwargs)
             response.raise_for_status()
-            break
+            return response
         except Exception as e:
             if i + 1 < retries:
                 logging.info("Exception '%s' while getting, retry %i", e, i + 1)
                 time.sleep(sleep)
 
             exc = e
-    else:
-        raise Exception(exc)
 
-    return response
+    raise exc
+
+
+def get_gh_api(
+    url: str,
+    retries: int = DOWNLOAD_RETRIES_COUNT,
+    sleep: int = 3,
+    **kwargs: Any,
+) -> requests.Response:
+    """It's a wrapper around get_with_retries that requests GH api w/o auth by
+    default, and falls back to the get_best_robot_token in case of receiving
+    "403 rate limit exceeded" error
+    It sets auth automatically when ROBOT_TOKEN is already set by get_best_robot_token
+    """
+
+    def set_auth_header():
+        if "headers" in kwargs:
+            if "Authorization" not in kwargs["headers"]:
+                kwargs["headers"][
+                    "Authorization"
+                ] = f"Bearer {grt.get_best_robot_token()}"
+        else:
+            kwargs["headers"] = {
+                "Authorization": f"Bearer {grt.get_best_robot_token()}"
+            }
+
+    if grt.ROBOT_TOKEN is not None:
+        set_auth_header()
+
+    need_retry = False
+    for _ in range(retries):
+        try:
+            response = get_with_retries(url, 1, sleep, **kwargs)
+            response.raise_for_status()
+            return response
+        except requests.HTTPError as exc:
+            if (
+                exc.response.status_code == 403
+                and b"rate limit exceeded"
+                in exc.response._content  # pylint:disable=protected-access
+            ):
+                logging.warning(
+                    "Received rate limit exception, setting the auth header and retry"
+                )
+                set_auth_header()
+                need_retry = True
+                break
+
+    if need_retry:
+        return get_with_retries(url, retries, sleep, **kwargs)
 
 
 def get_build_name_for_check(check_name: str) -> str:
diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py
index 32cbaf08f07..13257eabb71 100644
--- a/tests/ci/build_report_check.py
+++ b/tests/ci/build_report_check.py
@@ -22,11 +22,12 @@ from s3_helper import S3Helper
 from get_robot_token import get_best_robot_token
 from pr_info import NeedsDataType, PRInfo
 from commit_status_helper import (
+    RerunHelper,
     get_commit,
+    post_commit_status,
     update_mergeable_check,
 )
 from ci_config import CI_CONFIG
-from rerun_helper import RerunHelper
 
 
 NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "")
@@ -136,10 +137,11 @@ def main():
 
     gh = Github(get_best_robot_token(), per_page=100)
     pr_info = PRInfo()
+    commit = get_commit(gh, pr_info.sha)
 
     atexit.register(update_mergeable_check, gh, pr_info, build_check_name)
 
-    rerun_helper = RerunHelper(gh, pr_info, build_check_name)
+    rerun_helper = RerunHelper(commit, build_check_name)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -274,12 +276,8 @@ def main():
 
     description = f"{ok_groups}/{total_groups} artifact groups are OK {addition}"
 
-    commit = get_commit(gh, pr_info.sha)
-    commit.create_status(
-        context=build_check_name,
-        description=description,
-        state=summary_status,
-        target_url=url,
+    post_commit_status(
+        commit, summary_status, url, description, build_check_name, pr_info
     )
 
     if summary_status == "error":
diff --git a/tests/ci/cancel_and_rerun_workflow_lambda/app.py b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
index 047b630e241..54c87fbcfa5 100644
--- a/tests/ci/cancel_and_rerun_workflow_lambda/app.py
+++ b/tests/ci/cancel_and_rerun_workflow_lambda/app.py
@@ -2,11 +2,11 @@
 
 from base64 import b64decode
 from collections import namedtuple
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional, Tuple
 from threading import Thread
 from queue import Queue
 import json
-import os
+import re
 import time
 
 import jwt
@@ -27,6 +27,123 @@ MAX_RETRY = 5
 
 DEBUG_INFO = {}  # type: Dict[str, Any]
 
+# Descriptions are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
+# updated accordingly
+# The following lists are append only, try to avoid editing them
+# They still could be cleaned out after the decent time though.
+LABELS = {
+    "pr-backward-incompatible": ["Backward Incompatible Change"],
+    "pr-bugfix": [
+        "Bug Fix",
+        "Bug Fix (user-visible misbehavior in an official stable release)",
+        "Bug Fix (user-visible misbehaviour in official stable or prestable release)",
+        "Bug Fix (user-visible misbehavior in official stable or prestable release)",
+    ],
+    "pr-build": [
+        "Build/Testing/Packaging Improvement",
+        "Build Improvement",
+        "Build/Testing Improvement",
+        "Build",
+        "Packaging Improvement",
+    ],
+    "pr-documentation": [
+        "Documentation (changelog entry is not required)",
+        "Documentation",
+    ],
+    "pr-feature": ["New Feature"],
+    "pr-improvement": ["Improvement"],
+    "pr-not-for-changelog": [
+        "Not for changelog (changelog entry is not required)",
+        "Not for changelog",
+    ],
+    "pr-performance": ["Performance Improvement"],
+}
+
+CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories}
+
+
+def check_pr_description(pr_body: str) -> Tuple[str, str]:
+    """The function checks the body to being properly formatted according to
+    .github/PULL_REQUEST_TEMPLATE.md, if the first returned string is not empty,
+    then there is an error."""
+    lines = list(map(lambda x: x.strip(), pr_body.split("\n") if pr_body else []))
+    lines = [re.sub(r"\s+", " ", line) for line in lines]
+
+    # Check if body contains "Reverts ClickHouse/ClickHouse#36337"
+    if [
+        True
+        for line in lines
+        if re.match(r"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line)
+    ]:
+        return "", LABELS["pr-not-for-changelog"][0]
+
+    category = ""
+    entry = ""
+    description_error = ""
+
+    i = 0
+    while i < len(lines):
+        if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
+            i += 1
+            if i >= len(lines):
+                break
+            # Can have one empty line between header and the category
+            # itself. Filter it out.
+            if not lines[i]:
+                i += 1
+                if i >= len(lines):
+                    break
+            category = re.sub(r"^[-*\s]*", "", lines[i])
+            i += 1
+
+            # Should not have more than one category. Require empty line
+            # after the first found category.
+            if i >= len(lines):
+                break
+            if lines[i]:
+                second_category = re.sub(r"^[-*\s]*", "", lines[i])
+                description_error = (
+                    "More than one changelog category specified: "
+                    f"'{category}', '{second_category}'"
+                )
+                return description_error, category
+
+        elif re.match(
+            r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
+        ):
+            i += 1
+            # Can have one empty line between header and the entry itself.
+            # Filter it out.
+            if i < len(lines) and not lines[i]:
+                i += 1
+            # All following lines until empty one are the changelog entry.
+            entry_lines = []
+            while i < len(lines) and lines[i]:
+                entry_lines.append(lines[i])
+                i += 1
+            entry = " ".join(entry_lines)
+            # Don't accept changelog entries like '...'.
+            entry = re.sub(r"[#>*_.\- ]", "", entry)
+            # Don't accept changelog entries like 'Close #12345'.
+            entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
+        else:
+            i += 1
+
+    if not category:
+        description_error = "Changelog category is empty"
+    # Filter out the PR categories that are not for changelog.
+    elif re.match(
+        r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
+        category,
+    ):
+        pass  # to not check the rest of the conditions
+    elif category not in CATEGORY_TO_LABEL:
+        description_error, category = f"Category '{category}' is not valid", ""
+    elif not entry:
+        description_error = f"Changelog entry required for category '{category}'"
+
+    return description_error, category
+
 
 class Worker(Thread):
     def __init__(
@@ -268,11 +385,11 @@ def get_workflow_description(workflow_url: str, token: str) -> WorkflowDescripti
     )
 
 
-def _exec_post_with_retry(url, token):
+def _exec_post_with_retry(url: str, token: str, json: Optional[Any] = None) -> Any:
     headers = {"Authorization": f"token {token}"}
     for i in range(MAX_RETRY):
         try:
-            response = requests.post(url, headers=headers)
+            response = requests.post(url, headers=headers, json=json)
             response.raise_for_status()
             return response.json()
         except Exception as ex:
@@ -373,27 +490,20 @@ def main(event):
         return
 
     if action == "edited":
-        print("PR is edited, check if it needs to rerun")
-        workflow_descriptions = get_workflows_description_for_pull_request(
-            pull_request, token
-        )
-        workflow_descriptions = (
-            workflow_descriptions
-            or get_workflow_description_fallback(pull_request, token)
-        )
-        workflow_descriptions.sort(key=lambda x: x.run_id)  # type: ignore
-        most_recent_workflow = workflow_descriptions[-1]
-        if (
-            most_recent_workflow.status == "completed"
-            and most_recent_workflow.name in NEED_RERUN_ON_EDITED
-        ):
+        print("PR is edited, check if the body is correct")
+        error, category = check_pr_description(pull_request["body"])
+        if error:
             print(
-                "The PR's body is changed and workflow is finished. "
-                "Rerun to check the description"
+                f"The PR's body is wrong, is going to comment it. The error is: {error}"
             )
-            exec_workflow_url([most_recent_workflow.rerun_url], token)
-            print("Rerun finished, exiting")
-            return
+            post_json = {
+                "body": "This is an automatic comment. The PR descriptions does not "
+                f"match the [template]({pull_request['base']['repo']['html_url']}/"
+                "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1).\n\n"
+                f"Please, edit it accordingly.\n\nThe error is: {error}"
+            }
+            _exec_post_with_retry(pull_request["comments_url"], token, json=post_json)
+        return
 
     if action == "synchronize":
         print("PR is synchronized, going to stop old actions")
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 7ebed139b93..74cc75636b0 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
-from typing import Dict, TypeVar
+from dataclasses import dataclass
+from typing import Callable, Dict, TypeVar
 
 ConfValue = TypeVar("ConfValue", str, bool)
 BuildConfig = Dict[str, ConfValue]
@@ -259,6 +260,9 @@ CI_CONFIG = {
         "Stateless tests (release, wide parts enabled)": {
             "required_build": "package_release",
         },
+        "Stateless tests (release, analyzer)": {
+            "required_build": "package_release",
+        },
         "Stateless tests (release, DatabaseOrdinary)": {
             "required_build": "package_release",
         },
@@ -396,3 +400,167 @@ REQUIRED_CHECKS = [
     "Unit tests (tsan)",
     "Unit tests (ubsan)",
 ]
+
+
+@dataclass
+class CheckDescription:
+    name: str
+    description: str  # the check descriptions, will be put into the status table
+    match_func: Callable[[str], bool]  # the function to check vs the commit status
+
+    def __hash__(self) -> int:
+        return hash(self.name + self.description)
+
+
+CHECK_DESCRIPTIONS = [
+    CheckDescription(
+        "AST fuzzer",
+        "Runs randomly generated queries to catch program errors. "
+        "The build type is optionally given in parenthesis. "
+        "If it fails, ask a maintainer for help",
+        lambda x: x.startswith("AST fuzzer"),
+    ),
+    CheckDescription(
+        "Bugfix validate check",
+        "Checks that either a new test (functional or integration) or there "
+        "some changed tests that fail with the binary built on master branch",
+        lambda x: x == "Bugfix validate check",
+    ),
+    CheckDescription(
+        "CI running",
+        "A meta-check that indicates the running CI. Normally, it's in <b>success</b> or "
+        "<b>pending</b> state. The failed status indicates some problems with the PR",
+        lambda x: x == "CI running",
+    ),
+    CheckDescription(
+        "ClickHouse build check",
+        "Builds ClickHouse in various configurations for use in further steps. "
+        "You have to fix the builds that fail. Build logs often has enough "
+        "information to fix the error, but you might have to reproduce the failure "
+        "locally. The <b>cmake</b> options can be found in the build log, grepping for "
+        '<b>cmake</b>. Use these options and follow the <a href="'
+        'https://clickhouse.com/docs/en/development/build">general build process</a>',
+        lambda x: x.startswith("ClickHouse") and x.endswith("build check"),
+    ),
+    CheckDescription(
+        "Compatibility check",
+        "Checks that <b>clickhouse</b> binary runs on distributions with old libc "
+        "versions. If it fails, ask a maintainer for help",
+        lambda x: x.startswith("Compatibility check"),
+    ),
+    CheckDescription(
+        "Docker image for servers",
+        "The check to build and optionally push the mentioned image to docker hub",
+        lambda x: x.startswith("Docker image")
+        and (x.endswith("building check") or x.endswith("build and push")),
+    ),
+    CheckDescription(
+        "Docs Check", "Builds and tests the documentation", lambda x: x == "Docs Check"
+    ),
+    CheckDescription(
+        "Fast test",
+        "Normally this is the first check that is ran for a PR. It builds ClickHouse "
+        'and runs most of <a href="https://clickhouse.com/docs/en/development/tests'
+        '#functional-tests">stateless functional tests</a>, '
+        "omitting some. If it fails, further checks are not started until it is fixed. "
+        "Look at the report to see which tests fail, then reproduce the failure "
+        'locally as described <a href="https://clickhouse.com/docs/en/development/'
+        'tests#functional-test-locally">here</a>',
+        lambda x: x == "Fast test",
+    ),
+    CheckDescription(
+        "Flaky tests",
+        "Checks if new added or modified tests are flaky by running them repeatedly, "
+        "in parallel, with more randomization. Functional tests are run 100 times "
+        "with address sanitizer, and additional randomization of thread scheduling. "
+        "Integrational tests are run up to 10 times. If at least once a new test has "
+        "failed, or was too long, this check will be red. We don't allow flaky tests, "
+        'read <a href="https://clickhouse.com/blog/decorating-a-christmas-tree-with-'
+        'the-help-of-flaky-tests/">the doc</a>',
+        lambda x: "tests flaky check" in x,
+    ),
+    CheckDescription(
+        "Install packages",
+        "Checks that the built packages are installable in a clear environment",
+        lambda x: x.startswith("Install packages ("),
+    ),
+    CheckDescription(
+        "Integration tests",
+        "The integration tests report. In parenthesis the package type is given, "
+        "and in square brackets are the optional part/total tests",
+        lambda x: x.startswith("Integration tests ("),
+    ),
+    CheckDescription(
+        "Mergeable Check",
+        "Checks if all other necessary checks are successful",
+        lambda x: x == "Mergeable Check",
+    ),
+    CheckDescription(
+        "Performance Comparison",
+        "Measure changes in query performance. The performance test report is "
+        'described in detail <a href="https://github.com/ClickHouse/ClickHouse/tree'
+        '/master/docker/test/performance-comparison#how-to-read-the-report">here</a>. '
+        "In square brackets are the optional part/total tests",
+        lambda x: x.startswith("Performance Comparison"),
+    ),
+    CheckDescription(
+        "Push to Dockerhub",
+        "The check for building and pushing the CI related docker images to docker hub",
+        lambda x: x.startswith("Push") and "to Dockerhub" in x,
+    ),
+    CheckDescription(
+        "Sqllogic",
+        "Run clickhouse on the "
+        '<a href="https://www.sqlite.org/sqllogictest">sqllogic</a> '
+        "test set against sqlite and checks that all statements are passed",
+        lambda x: x.startswith("Sqllogic test"),
+    ),
+    CheckDescription(
+        "SQLancer",
+        "Fuzzing tests that detect logical bugs with "
+        '<a href="https://github.com/sqlancer/sqlancer">SQLancer</a> tool',
+        lambda x: x.startswith("SQLancer"),
+    ),
+    CheckDescription(
+        "Stateful tests",
+        "Runs stateful functional tests for ClickHouse binaries built in various "
+        "configurations -- release, debug, with sanitizers, etc",
+        lambda x: x.startswith("Stateful tests ("),
+    ),
+    CheckDescription(
+        "Stateless tests",
+        "Runs stateless functional tests for ClickHouse binaries built in various "
+        "configurations -- release, debug, with sanitizers, etc",
+        lambda x: x.startswith("Stateless tests ("),
+    ),
+    CheckDescription(
+        "Stress test",
+        "Runs stateless functional tests concurrently from several clients to detect "
+        "concurrency-related errors",
+        lambda x: x.startswith("Stress test ("),
+    ),
+    CheckDescription(
+        "Style Check",
+        "Runs a set of checks to keep the code style clean. If some of tests failed, "
+        "see the related log from the report",
+        lambda x: x == "Style Check",
+    ),
+    CheckDescription(
+        "Unit tests",
+        "Runs the unit tests for different release types",
+        lambda x: x.startswith("Unit tests ("),
+    ),
+    CheckDescription(
+        "Upgrade check",
+        "Runs stress tests on server version from last release and then tries to "
+        "upgrade it to the version from the PR. It checks if the new server can "
+        "successfully startup without any errors, crashes or sanitizer asserts",
+        lambda x: x.startswith("Upgrade check ("),
+    ),
+    CheckDescription(
+        "Falback for unknown",
+        "There's no description for the check yet, please add it to "
+        "tests/ci/ci_config.py:CHECK_DESCRIPTIONS",
+        lambda x: True,
+    ),
+]
diff --git a/tests/ci/codebrowser_check.py b/tests/ci/codebrowser_check.py
index 9fa202a357c..0eb4921e3fe 100644
--- a/tests/ci/codebrowser_check.py
+++ b/tests/ci/codebrowser_check.py
@@ -7,7 +7,7 @@ import logging
 
 from github import Github
 
-from commit_status_helper import post_commit_status
+from commit_status_helper import get_commit, post_commit_status
 from docker_pull_helper import get_image_with_version
 from env_helper import (
     IMAGES_PATH,
@@ -43,6 +43,7 @@ if __name__ == "__main__":
 
     gh = Github(get_best_robot_token(), per_page=100)
     pr_info = PRInfo()
+    commit = get_commit(gh, pr_info.sha)
 
     if not os.path.exists(TEMP_PATH):
         os.makedirs(TEMP_PATH)
@@ -87,4 +88,4 @@ if __name__ == "__main__":
 
     print(f"::notice ::Report url: {report_url}")
 
-    post_commit_status(gh, pr_info.sha, NAME, "Report built", "success", report_url)
+    post_commit_status(commit, "success", report_url, "Report built", NAME, pr_info)
diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index 6260abac1eb..8731f8280e2 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -3,20 +3,51 @@
 import csv
 import os
 import time
-from typing import List, Literal
+from typing import Dict, List, Literal, Optional, Union
 import logging
 
 from github import Github
+from github.GithubObject import _NotSetType, NotSet as NotSet  # type: ignore
 from github.Commit import Commit
 from github.CommitStatus import CommitStatus
+from github.IssueComment import IssueComment
+from github.Repository import Repository
 
-from ci_config import CI_CONFIG, REQUIRED_CHECKS
+from ci_config import CI_CONFIG, REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription
 from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL
 from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL
+from report import TestResult, TestResults
+from s3_helper import S3Helper
+from upload_result_helper import upload_results
 
 RETRY = 5
 CommitStatuses = List[CommitStatus]
 MERGEABLE_NAME = "Mergeable Check"
+GH_REPO = None  # type: Optional[Repository]
+CI_STATUS_NAME = "CI running"
+
+
+class RerunHelper:
+    def __init__(self, commit: Commit, check_name: str):
+        self.check_name = check_name
+        self.commit = commit
+        self.statuses = get_commit_filtered_statuses(commit)
+
+    def is_already_finished_by_status(self) -> bool:
+        # currently we agree even for failed statuses
+        for status in self.statuses:
+            if self.check_name in status.context and status.state in (
+                "success",
+                "failure",
+            ):
+                return True
+        return False
+
+    def get_finished_status(self) -> Optional[CommitStatus]:
+        for status in self.statuses:
+            if self.check_name in status.context:
+                return status
+        return None
 
 
 def override_status(status: str, check_name: str, invert: bool = False) -> str:
@@ -34,7 +65,7 @@ def override_status(status: str, check_name: str, invert: bool = False) -> str:
 def get_commit(gh: Github, commit_sha: str, retry_count: int = RETRY) -> Commit:
     for i in range(retry_count):
         try:
-            repo = gh.get_repo(GITHUB_REPOSITORY)
+            repo = get_repo(gh)
             commit = repo.get_commit(commit_sha)
             break
         except Exception as ex:
@@ -46,22 +77,191 @@ def get_commit(gh: Github, commit_sha: str, retry_count: int = RETRY) -> Commit:
 
 
 def post_commit_status(
-    gh: Github, sha: str, check_name: str, description: str, state: str, report_url: str
+    commit: Commit,
+    state: str,
+    report_url: Union[_NotSetType, str] = NotSet,
+    description: Union[_NotSetType, str] = NotSet,
+    check_name: Union[_NotSetType, str] = NotSet,
+    pr_info: Optional[PRInfo] = None,
 ) -> None:
+    """The parameters are given in the same order as for commit.create_status,
+    if an optional parameter `pr_info` is given, the `set_status_comment` functions
+    is invoked to add or update the comment with statuses overview"""
     for i in range(RETRY):
         try:
-            commit = get_commit(gh, sha, 1)
             commit.create_status(
-                context=check_name,
-                description=description,
                 state=state,
                 target_url=report_url,
+                description=description,
+                context=check_name,
             )
             break
         except Exception as ex:
             if i == RETRY - 1:
                 raise ex
             time.sleep(i)
+    if pr_info:
+        status_updated = False
+        for i in range(RETRY):
+            try:
+                set_status_comment(commit, pr_info)
+                status_updated = True
+                break
+            except Exception as ex:
+                logging.warning(
+                    "Failed to update the status commit, will retry %s times: %s",
+                    RETRY - i - 1,
+                    ex,
+                )
+
+        if not status_updated:
+            logging.error("Failed to update the status comment, continue anyway")
+
+
+def set_status_comment(commit: Commit, pr_info: PRInfo) -> None:
+    """It adds or updates the comment status to all Pull Requests but for release
+    one, so the method does nothing for simple pushes and pull requests with
+    `release`/`release-lts` labels"""
+    # to reduce number of parameters, the Github is constructed on the fly
+    gh = Github()
+    gh.__requester = commit._requester  # type:ignore #pylint:disable=protected-access
+    repo = get_repo(gh)
+    statuses = sorted(get_commit_filtered_statuses(commit), key=lambda x: x.context)
+    if not statuses:
+        return
+
+    if not [status for status in statuses if status.context == CI_STATUS_NAME]:
+        # This is the case, when some statuses already exist for the check,
+        # but not the CI_STATUS_NAME. We should create it as pending.
+        # W/o pr_info to avoid recursion, and yes, one extra create_ci_report
+        post_commit_status(
+            commit,
+            "pending",
+            create_ci_report(pr_info, statuses),
+            "The report for running CI",
+            CI_STATUS_NAME,
+        )
+
+    # We update the report in generate_status_comment function, so do it each
+    # run, even in the release PRs and normal pushes
+    comment_body = generate_status_comment(pr_info, statuses)
+    # We post the comment only to normal and backport PRs
+    if pr_info.number == 0 or pr_info.labels.intersection({"release", "release-lts"}):
+        return
+
+    comment_service_header = comment_body.split("\n", 1)[0]
+    comment = None  # type: Optional[IssueComment]
+    pr = repo.get_pull(pr_info.number)
+    for ic in pr.get_issue_comments():
+        if ic.body.startswith(comment_service_header):
+            comment = ic
+            break
+
+    if comment is None:
+        pr.create_issue_comment(comment_body)
+        return
+
+    if comment.body == comment_body:
+        logging.info("The status comment is already updated, no needs to change it")
+        return
+    comment.edit(comment_body)
+
+
+def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str:
+    """The method generates the comment body, as well it updates the CI report"""
+
+    def beauty_state(state: str) -> str:
+        if state == "success":
+            return f"🟢 {state}"
+        if state == "pending":
+            return f"🟡 {state}"
+        if state in ["error", "failure"]:
+            return f"🔴 {state}"
+        return state
+
+    report_url = create_ci_report(pr_info, statuses)
+    worst_state = get_worst_state(statuses)
+    if not worst_state:
+        # Theoretically possible, although
+        # the function should not be used on empty statuses
+        worst_state = "The commit doesn't have the statuses yet"
+    else:
+        worst_state = f"The overall status of the commit is {beauty_state(worst_state)}"
+
+    comment_body = (
+        f"<!-- automatic status comment for PR #{pr_info.number} "
+        f"from {pr_info.head_name}:{pr_info.head_ref} -->\n"
+        f"This is an automated comment for commit {pr_info.sha} with "
+        f"description of existing statuses. It's updated for the latest CI running\n"
+        f"The full report is available [here]({report_url})\n"
+        f"{worst_state}\n\n<table>"
+        "<thead><tr><th>Check name</th><th>Description</th><th>Status</th></tr></thead>\n"
+        "<tbody>"
+    )
+    # group checks by the name to get the worst one per each
+    grouped_statuses = {}  # type: Dict[CheckDescription, CommitStatuses]
+    for status in statuses:
+        cd = None
+        for c in CHECK_DESCRIPTIONS:
+            if c.match_func(status.context):
+                cd = c
+                break
+
+        if cd is None or cd == CHECK_DESCRIPTIONS[-1]:
+            # This is the case for either non-found description or a fallback
+            cd = CheckDescription(
+                status.context,
+                CHECK_DESCRIPTIONS[-1].description,
+                CHECK_DESCRIPTIONS[-1].match_func,
+            )
+
+        if cd in grouped_statuses:
+            grouped_statuses[cd].append(status)
+        else:
+            grouped_statuses[cd] = [status]
+
+    table_rows = []  # type: List[str]
+    for desc, gs in grouped_statuses.items():
+        table_rows.append(
+            f"<tr><td>{desc.name}</td><td>{desc.description}</td>"
+            f"<td>{beauty_state(get_worst_state(gs))}</td></tr>\n"
+        )
+
+    table_rows.sort()
+
+    comment_footer = "</table>"
+    return "".join([comment_body, *table_rows, comment_footer])
+
+
+def get_worst_state(statuses: CommitStatuses) -> str:
+    worst_status = None
+    states = {"error": 0, "failure": 1, "pending": 2, "success": 3}
+    for status in statuses:
+        if worst_status is None:
+            worst_status = status
+            continue
+        if states[status.state] < states[worst_status.state]:
+            worst_status = status
+        if worst_status.state == "error":
+            break
+
+    if worst_status is None:
+        return ""
+    return worst_status.state
+
+
+def create_ci_report(pr_info: PRInfo, statuses: CommitStatuses) -> str:
+    """The function converst the statuses to TestResults and uploads the report
+    to S3 tests bucket. Then it returns the URL"""
+    test_results = []  # type: TestResults
+    for status in statuses:
+        log_urls = None
+        if status.target_url is not None:
+            log_urls = [status.target_url]
+        test_results.append(TestResult(status.context, status.state, log_urls=log_urls))
+    return upload_results(
+        S3Helper(), pr_info.number, pr_info.sha, test_results, [], CI_STATUS_NAME
+    )
 
 
 def post_commit_status_to_file(
@@ -90,8 +290,16 @@ def get_commit_filtered_statuses(commit: Commit) -> CommitStatuses:
     return list(filtered.values())
 
 
+def get_repo(gh: Github) -> Repository:
+    global GH_REPO
+    if GH_REPO is not None:
+        return GH_REPO
+    GH_REPO = gh.get_repo(GITHUB_REPOSITORY)
+    return GH_REPO
+
+
 def remove_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None:
-    repo = gh.get_repo(GITHUB_REPOSITORY)
+    repo = get_repo(gh)
     pull_request = repo.get_pull(pr_info.number)
     for label in labels_names:
         pull_request.remove_from_labels(label)
@@ -99,7 +307,7 @@ def remove_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None:
 
 
 def post_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None:
-    repo = gh.get_repo(GITHUB_REPOSITORY)
+    repo = get_repo(gh)
     pull_request = repo.get_pull(pr_info.number)
     for label in labels_names:
         pull_request.add_to_labels(label)
diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py
index 432e9ec7c01..04203617dca 100644
--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@@ -16,13 +16,12 @@ from clickhouse_helper import (
     mark_flaky_tests,
     prepare_tests_results_for_clickhouse,
 )
-from commit_status_helper import post_commit_status
+from commit_status_helper import RerunHelper, get_commit, post_commit_status
 from docker_pull_helper import get_images_with_versions
 from env_helper import TEMP_PATH, REPORTS_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResults, TestResult
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from upload_result_helper import upload_results
@@ -150,8 +149,9 @@ def main():
     pr_info = PRInfo()
 
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
 
-    rerun_helper = RerunHelper(gh, pr_info, args.check_name)
+    rerun_helper = RerunHelper(commit, args.check_name)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -242,7 +242,7 @@ def main():
         args.check_name,
     )
     print(f"::notice ::Report url: {report_url}")
-    post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)
+    post_commit_status(commit, state, report_url, description, args.check_name, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py
index f2b1105b3b0..16a58a90dcf 100644
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union
 from github import Github
 
 from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from commit_status_helper import format_description, post_commit_status
+from commit_status_helper import format_description, get_commit, post_commit_status
 from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
 from get_robot_token import get_best_robot_token, get_parameter_from_ssm
 from pr_info import PRInfo
@@ -474,7 +474,8 @@ def main():
         return
 
     gh = Github(get_best_robot_token(), per_page=100)
-    post_commit_status(gh, pr_info.sha, NAME, description, status, url)
+    commit = get_commit(gh, pr_info.sha)
+    post_commit_status(commit, status, url, description, NAME, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py
index 0484ea8f641..d89708b9277 100644
--- a/tests/ci/docker_manifests_merge.py
+++ b/tests/ci/docker_manifests_merge.py
@@ -10,7 +10,7 @@ from typing import List, Dict, Tuple
 from github import Github
 
 from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from commit_status_helper import format_description, post_commit_status
+from commit_status_helper import format_description, get_commit, post_commit_status
 from env_helper import RUNNER_TEMP
 from get_robot_token import get_best_robot_token, get_parameter_from_ssm
 from pr_info import PRInfo
@@ -221,7 +221,8 @@ def main():
     description = format_description(description)
 
     gh = Github(get_best_robot_token(), per_page=100)
-    post_commit_status(gh, pr_info.sha, NAME, description, status, url)
+    commit = get_commit(gh, pr_info.sha)
+    post_commit_status(commit, status, url, description, NAME, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py
index c6854c5aa78..a434d3cc841 100644
--- a/tests/ci/docker_server.py
+++ b/tests/ci/docker_server.py
@@ -15,7 +15,7 @@ from github import Github
 
 from build_check import get_release_or_pr
 from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from commit_status_helper import format_description, post_commit_status
+from commit_status_helper import format_description, get_commit, post_commit_status
 from docker_images_check import DockerImage
 from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD
 from get_robot_token import get_best_robot_token, get_parameter_from_ssm
@@ -372,7 +372,8 @@ def main():
     description = format_description(description)
 
     gh = Github(get_best_robot_token(), per_page=100)
-    post_commit_status(gh, pr_info.sha, NAME, description, status, url)
+    commit = get_commit(gh, pr_info.sha)
+    post_commit_status(commit, status, url, description, NAME, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py
index ed2743ca965..e3930a20bd9 100644
--- a/tests/ci/docs_check.py
+++ b/tests/ci/docs_check.py
@@ -9,13 +9,18 @@ import sys
 from github import Github
 
 from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from commit_status_helper import post_commit_status, get_commit, update_mergeable_check
+from commit_status_helper import (
+    NotSet,
+    RerunHelper,
+    get_commit,
+    post_commit_status,
+    update_mergeable_check,
+)
 from docker_pull_helper import get_image_with_version
 from env_helper import TEMP_PATH, REPO_COPY
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResults, TestResult
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -52,8 +57,9 @@ def main():
     pr_info = PRInfo(need_changed_files=True)
 
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
 
-    rerun_helper = RerunHelper(gh, pr_info, NAME)
+    rerun_helper = RerunHelper(commit, NAME)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -61,9 +67,8 @@ def main():
 
     if not pr_info.has_changes_in_documentation() and not args.force:
         logging.info("No changes in documentation")
-        commit = get_commit(gh, pr_info.sha)
-        commit.create_status(
-            context=NAME, description="No changes in docs", state="success"
+        post_commit_status(
+            commit, "success", NotSet, "No changes in docs", NAME, pr_info
         )
         sys.exit(0)
 
@@ -132,7 +137,7 @@ def main():
         s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME
     )
     print("::notice ::Report url: {report_url}")
-    post_commit_status(gh, pr_info.sha, NAME, description, status, report_url)
+    post_commit_status(commit, status, report_url, description, NAME, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py
index a5a4913be0b..5c2139ae0bc 100644
--- a/tests/ci/env_helper.py
+++ b/tests/ci/env_helper.py
@@ -1,7 +1,7 @@
 import os
 from os import path as p
 
-from build_download_helper import get_with_retries
+from build_download_helper import get_gh_api
 
 module_dir = p.abspath(p.dirname(__file__))
 git_root = p.abspath(p.join(module_dir, "..", ".."))
@@ -46,7 +46,7 @@ def GITHUB_JOB_ID() -> str:
     jobs = []
     page = 1
     while not _GITHUB_JOB_ID:
-        response = get_with_retries(
+        response = get_gh_api(
             f"https://api.github.com/repos/{GITHUB_REPOSITORY}/"
             f"actions/runs/{GITHUB_RUN_ID}/jobs?per_page=100&page={page}"
         )
diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index f13b4099657..89066ade2cb 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -17,6 +17,8 @@ from clickhouse_helper import (
     prepare_tests_results_for_clickhouse,
 )
 from commit_status_helper import (
+    RerunHelper,
+    get_commit,
     post_commit_status,
     update_mergeable_check,
 )
@@ -25,7 +27,6 @@ from env_helper import S3_BUILDS_BUCKET, TEMP_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
 from report import TestResults, read_test_results
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -106,10 +107,11 @@ def main():
     pr_info = PRInfo()
 
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
 
     atexit.register(update_mergeable_check, gh, pr_info, NAME)
 
-    rerun_helper = RerunHelper(gh, pr_info, NAME)
+    rerun_helper = RerunHelper(commit, NAME)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         status = rerun_helper.get_finished_status()
@@ -197,7 +199,7 @@ def main():
         NAME,
     )
     print(f"::notice ::Report url: {report_url}")
-    post_commit_status(gh, pr_info.sha, NAME, description, state, report_url)
+    post_commit_status(commit, state, report_url, description, NAME, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py
index ea2f5eb3136..aa8a0cf9553 100644
--- a/tests/ci/finish_check.py
+++ b/tests/ci/finish_check.py
@@ -2,32 +2,42 @@
 import logging
 from github import Github
 
-from env_helper import GITHUB_RUN_URL
-from pr_info import PRInfo
+from commit_status_helper import (
+    CI_STATUS_NAME,
+    NotSet,
+    get_commit,
+    get_commit_filtered_statuses,
+    post_commit_status,
+)
 from get_robot_token import get_best_robot_token
-from commit_status_helper import get_commit, get_commit_filtered_statuses
-
-NAME = "Run Check"
+from pr_info import PRInfo
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
 
     pr_info = PRInfo(need_orgs=True)
     gh = Github(get_best_robot_token(), per_page=100)
     commit = get_commit(gh, pr_info.sha)
 
-    url = GITHUB_RUN_URL
-    statuses = get_commit_filtered_statuses(commit)
-    pending_status = any(  # find NAME status in pending state
-        True
-        for status in statuses
-        if status.context == NAME and status.state == "pending"
-    )
-    if pending_status:
-        commit.create_status(
-            context=NAME,
-            description="All checks finished",
-            state="success",
-            target_url=url,
+    statuses = [
+        status
+        for status in get_commit_filtered_statuses(commit)
+        if status.context == CI_STATUS_NAME
+    ]
+    if not statuses:
+        return
+    status = statuses[0]
+    if status.state == "pending":
+        post_commit_status(
+            commit,
+            "success",
+            status.target_url or NotSet,
+            "All checks finished",
+            CI_STATUS_NAME,
+            pr_info,
         )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index 8e55c084f21..037bb13f1f8 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -20,9 +20,11 @@ from clickhouse_helper import (
     prepare_tests_results_for_clickhouse,
 )
 from commit_status_helper import (
-    post_commit_status,
+    NotSet,
+    RerunHelper,
     get_commit,
     override_status,
+    post_commit_status,
     post_commit_status_to_file,
     update_mergeable_check,
 )
@@ -32,7 +34,6 @@ from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
 from report import TestResults, read_test_results
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -53,6 +54,8 @@ def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total):
         result.append("USE_PARALLEL_REPLICAS=1")
     if "s3 storage" in check_name:
         result.append("USE_S3_STORAGE_FOR_MERGE_TREE=1")
+    if "analyzer" in check_name:
+        result.append("USE_NEW_ANALYZER=1")
 
     if run_by_hash_total != 0:
         result.append(f"RUN_BY_HASH_NUM={run_by_hash_num}")
@@ -71,6 +74,7 @@ def get_image_name(check_name):
 
 
 def get_run_command(
+    check_name,
     builds_path,
     repo_tests_path,
     result_path,
@@ -103,10 +107,16 @@ def get_run_command(
     envs += [f"-e {e}" for e in additional_envs]
 
     env_str = " ".join(envs)
+    volume_with_broken_test = (
+        f"--volume={repo_tests_path}/broken_tests.txt:/broken_tests.txt"
+        if "analyzer" in check_name
+        else ""
+    )
 
     return (
         f"docker run --volume={builds_path}:/package_folder "
         f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
+        f"{volume_with_broken_test} "
         f"--volume={result_path}:/test_output --volume={server_log_path}:/var/log/clickhouse-server "
         f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}"
     )
@@ -238,6 +248,7 @@ def main():
         need_changed_files=run_changed_tests, pr_event_from_api=validate_bugfix_check
     )
 
+    commit = get_commit(gh, pr_info.sha)
     atexit.register(update_mergeable_check, gh, pr_info, check_name)
 
     if not os.path.exists(temp_path):
@@ -265,7 +276,7 @@ def main():
         run_by_hash_total = 0
         check_name_with_group = check_name
 
-    rerun_helper = RerunHelper(gh, pr_info, check_name_with_group)
+    rerun_helper = RerunHelper(commit, check_name_with_group)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -274,13 +285,15 @@ def main():
     if run_changed_tests:
         tests_to_run = get_tests_to_run(pr_info)
         if not tests_to_run:
-            commit = get_commit(gh, pr_info.sha)
             state = override_status("success", check_name, validate_bugfix_check)
             if args.post_commit_status == "commit_status":
-                commit.create_status(
-                    context=check_name_with_group,
-                    description=NO_CHANGES_MSG,
-                    state=state,
+                post_commit_status(
+                    commit,
+                    state,
+                    NotSet,
+                    NO_CHANGES_MSG,
+                    check_name_with_group,
+                    pr_info,
                 )
             elif args.post_commit_status == "file":
                 post_commit_status_to_file(
@@ -322,6 +335,7 @@ def main():
         additional_envs.append("GLOBAL_TAGS=no-random-settings")
 
     run_command = get_run_command(
+        check_name,
         packages_path,
         repo_tests_path,
         result_path,
@@ -366,16 +380,16 @@ def main():
     if args.post_commit_status == "commit_status":
         if "parallelreplicas" in check_name.lower():
             post_commit_status(
-                gh,
-                pr_info.sha,
-                check_name_with_group,
-                description,
+                commit,
                 "success",
                 report_url,
+                description,
+                check_name_with_group,
+                pr_info,
             )
         else:
             post_commit_status(
-                gh, pr_info.sha, check_name_with_group, description, state, report_url
+                commit, state, report_url, description, check_name_with_group, pr_info
             )
     elif args.post_commit_status == "file":
         if "parallelreplicas" in check_name.lower():
diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py
index 6ecaf468ed1..b41eba49cc3 100644
--- a/tests/ci/get_robot_token.py
+++ b/tests/ci/get_robot_token.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import logging
 from dataclasses import dataclass
+from typing import Optional
 
 import boto3  # type: ignore
 from github import Github
@@ -20,7 +21,13 @@ def get_parameter_from_ssm(name, decrypt=True, client=None):
     return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"]
 
 
+ROBOT_TOKEN = None  # type: Optional[Token]
+
+
 def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
+    global ROBOT_TOKEN
+    if ROBOT_TOKEN is not None:
+        return ROBOT_TOKEN.value
     client = boto3.client("ssm", region_name="us-east-1")
     parameters = client.describe_parameters(
         ParameterFilters=[
@@ -28,7 +35,6 @@ def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
         ]
     )["Parameters"]
     assert parameters
-    token = None
 
     for token_name in [p["Name"] for p in parameters]:
         value = get_parameter_from_ssm(token_name, True, client)
@@ -38,15 +44,17 @@ def get_best_robot_token(token_prefix_env_name="github_robot_token_"):
         user = gh.get_user()
         rest, _ = gh.rate_limiting
         logging.info("Get token with %s remaining requests", rest)
-        if token is None:
-            token = Token(user, value, rest)
+        if ROBOT_TOKEN is None:
+            ROBOT_TOKEN = Token(user, value, rest)
             continue
-        if token.rest < rest:
-            token.user, token.value, token.rest = user, value, rest
+        if ROBOT_TOKEN.rest < rest:
+            ROBOT_TOKEN.user, ROBOT_TOKEN.value, ROBOT_TOKEN.rest = user, value, rest
 
-    assert token
+    assert ROBOT_TOKEN
     logging.info(
-        "User %s with %s remaining requests is used", token.user.login, token.rest
+        "User %s with %s remaining requests is used",
+        ROBOT_TOKEN.user.login,
+        ROBOT_TOKEN.rest,
     )
 
-    return token.value
+    return ROBOT_TOKEN.value
diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py
index 54245670b26..d619ce96cee 100644
--- a/tests/ci/install_check.py
+++ b/tests/ci/install_check.py
@@ -19,7 +19,9 @@ from clickhouse_helper import (
     prepare_tests_results_for_clickhouse,
 )
 from commit_status_helper import (
+    RerunHelper,
     format_description,
+    get_commit,
     post_commit_status,
     update_mergeable_check,
 )
@@ -29,7 +31,6 @@ from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResults, TestResult
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -268,9 +269,10 @@ def main():
 
     if CI:
         gh = Github(get_best_robot_token(), per_page=100)
+        commit = get_commit(gh, pr_info.sha)
         atexit.register(update_mergeable_check, gh, pr_info, args.check_name)
 
-        rerun_helper = RerunHelper(gh, pr_info, args.check_name)
+        rerun_helper = RerunHelper(commit, args.check_name)
         if rerun_helper.is_already_finished_by_status():
             logging.info(
                 "Check is already finished according to github status, exiting"
@@ -347,7 +349,7 @@ def main():
 
     description = format_description(description)
 
-    post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)
+    post_commit_status(commit, state, report_url, description, args.check_name, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py
index f864751e830..8ef6244a1c5 100644
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@@ -19,8 +19,10 @@ from clickhouse_helper import (
     prepare_tests_results_for_clickhouse,
 )
 from commit_status_helper import (
-    post_commit_status,
+    RerunHelper,
+    get_commit,
     override_status,
+    post_commit_status,
     post_commit_status_to_file,
 )
 from docker_pull_helper import get_images_with_versions
@@ -29,7 +31,6 @@ from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResults, read_test_results
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -198,8 +199,9 @@ def main():
         sys.exit(0)
 
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
 
-    rerun_helper = RerunHelper(gh, pr_info, check_name_with_group)
+    rerun_helper = RerunHelper(commit, check_name_with_group)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -284,15 +286,10 @@ def main():
     print(f"::notice:: {check_name} Report url: {report_url}")
     if args.post_commit_status == "commit_status":
         post_commit_status(
-            gh, pr_info.sha, check_name_with_group, description, state, report_url
+            commit, state, report_url, description, check_name_with_group, pr_info
         )
     elif args.post_commit_status == "file":
-        post_commit_status_to_file(
-            post_commit_path,
-            description,
-            state,
-            report_url,
-        )
+        post_commit_status_to_file(post_commit_path, description, state, report_url)
     else:
         raise Exception(
             f'Unknown post_commit_status option "{args.post_commit_status}"'
diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py
index ffa9e45373f..9d35d2d6e35 100644
--- a/tests/ci/jepsen_check.py
+++ b/tests/ci/jepsen_check.py
@@ -13,13 +13,12 @@ from github import Github
 
 from build_download_helper import get_build_name_for_check
 from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from commit_status_helper import post_commit_status
+from commit_status_helper import RerunHelper, get_commit, post_commit_status
 from compress_files import compress_fast
 from env_helper import REPO_COPY, TEMP_PATH, S3_BUILDS_BUCKET, S3_DOWNLOAD
 from get_robot_token import get_best_robot_token, get_parameter_from_ssm
 from pr_info import PRInfo
 from report import TestResults, TestResult
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from ssh import SSHKey
 from stopwatch import Stopwatch
@@ -181,10 +180,11 @@ if __name__ == "__main__":
         sys.exit(0)
 
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
 
     check_name = KEEPER_CHECK_NAME if args.program == "keeper" else SERVER_CHECK_NAME
 
-    rerun_helper = RerunHelper(gh, pr_info, check_name)
+    rerun_helper = RerunHelper(commit, check_name)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -293,7 +293,7 @@ if __name__ == "__main__":
     )
 
     print(f"::notice ::Report url: {report_url}")
-    post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
+    post_commit_status(commit, status, report_url, description, check_name, pr_info)
 
     ch_helper = ClickHouseHelper()
     prepared_events = prepare_tests_results_for_clickhouse(
diff --git a/tests/ci/mark_release_ready.py b/tests/ci/mark_release_ready.py
index b103dd053bb..4501d40e4d3 100755
--- a/tests/ci/mark_release_ready.py
+++ b/tests/ci/mark_release_ready.py
@@ -4,7 +4,7 @@ import argparse
 import logging
 import os
 
-from commit_status_helper import get_commit
+from commit_status_helper import NotSet, get_commit, post_commit_status
 from env_helper import GITHUB_JOB_URL
 from get_robot_token import get_best_robot_token
 from github_helper import GitHub
@@ -34,6 +34,7 @@ def main():
     args = parser.parse_args()
     url = ""
     description = "the release can be created from the commit, manually set"
+    pr_info = None
     if not args.commit:
         pr_info = PRInfo()
         if pr_info.event == pr_info.default_event:
@@ -45,14 +46,10 @@ def main():
 
     gh = GitHub(args.token, create_cache_dir=False)
     # Get the rate limits for a quick fail
-    gh.get_rate_limit()
     commit = get_commit(gh, args.commit)
-
-    commit.create_status(
-        context=RELEASE_READY_STATUS,
-        description=description,
-        state="success",
-        target_url=url,
+    gh.get_rate_limit()
+    post_commit_status(
+        commit, "success", url or NotSet, description, RELEASE_READY_STATUS, pr_info
     )
 
 
diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index d0c84d56496..bf5704f31bd 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -12,13 +12,12 @@ from typing import Dict
 
 from github import Github
 
-from commit_status_helper import get_commit, post_commit_status
+from commit_status_helper import RerunHelper, get_commit, post_commit_status
 from ci_config import CI_CONFIG
 from docker_pull_helper import get_image_with_version
 from env_helper import GITHUB_EVENT_PATH, GITHUB_RUN_URL, S3_BUILDS_BUCKET, S3_DOWNLOAD
 from get_robot_token import get_best_robot_token, get_parameter_from_ssm
 from pr_info import PRInfo
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from tee_popen import TeePopen
 
@@ -118,7 +117,7 @@ if __name__ == "__main__":
         message = "Skipped, not labeled with 'pr-performance'"
         report_url = GITHUB_RUN_URL
         post_commit_status(
-            gh, pr_info.sha, check_name_with_group, message, status, report_url
+            commit, status, report_url, message, check_name_with_group, pr_info
         )
         sys.exit(0)
 
@@ -131,7 +130,7 @@ if __name__ == "__main__":
             "Fill fliter our performance tests by grep -v %s", test_grep_exclude_filter
         )
 
-    rerun_helper = RerunHelper(gh, pr_info, check_name_with_group)
+    rerun_helper = RerunHelper(commit, check_name_with_group)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -142,6 +141,7 @@ if __name__ == "__main__":
         .replace("(", "_")
         .replace(")", "_")
         .replace(",", "_")
+        .replace("/", "_")
     )
 
     docker_image = get_image_with_version(reports_path, IMAGE_NAME)
@@ -266,7 +266,7 @@ if __name__ == "__main__":
         report_url = uploaded["report.html"]
 
     post_commit_status(
-        gh, pr_info.sha, check_name_with_group, message, status, report_url
+        commit, status, report_url, message, check_name_with_group, pr_info
     )
 
     if status == "error":
diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py
index ddeb070b2b9..86d4985c6b2 100644
--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@@ -6,7 +6,7 @@ from typing import Dict, List, Set, Union
 
 from unidiff import PatchSet  # type: ignore
 
-from build_download_helper import get_with_retries
+from build_download_helper import get_gh_api
 from env_helper import (
     GITHUB_REPOSITORY,
     GITHUB_SERVER_URL,
@@ -45,7 +45,7 @@ def get_pr_for_commit(sha, ref):
         f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{sha}/pulls"
     )
     try:
-        response = get_with_retries(try_get_pr_url, sleep=RETRY_SLEEP)
+        response = get_gh_api(try_get_pr_url, sleep=RETRY_SLEEP)
         data = response.json()
         our_prs = []  # type: List[Dict]
         if len(data) > 1:
@@ -105,7 +105,7 @@ class PRInfo:
         # workflow completed event, used for PRs only
         if "action" in github_event and github_event["action"] == "completed":
             self.sha = github_event["workflow_run"]["head_sha"]
-            prs_for_sha = get_with_retries(
+            prs_for_sha = get_gh_api(
                 f"https://api.github.com/repos/{GITHUB_REPOSITORY}/commits/{self.sha}"
                 "/pulls",
                 sleep=RETRY_SLEEP,
@@ -117,7 +117,7 @@ class PRInfo:
             self.number = github_event["pull_request"]["number"]
             if pr_event_from_api:
                 try:
-                    response = get_with_retries(
+                    response = get_gh_api(
                         f"https://api.github.com/repos/{GITHUB_REPOSITORY}"
                         f"/pulls/{self.number}",
                         sleep=RETRY_SLEEP,
@@ -159,7 +159,7 @@ class PRInfo:
             self.user_login = github_event["pull_request"]["user"]["login"]
             self.user_orgs = set([])
             if need_orgs:
-                user_orgs_response = get_with_retries(
+                user_orgs_response = get_gh_api(
                     github_event["pull_request"]["user"]["organizations_url"],
                     sleep=RETRY_SLEEP,
                 )
@@ -255,7 +255,7 @@ class PRInfo:
             raise TypeError("The event does not have diff URLs")
 
         for diff_url in self.diff_urls:
-            response = get_with_retries(
+            response = get_gh_api(
                 diff_url,
                 sleep=RETRY_SLEEP,
             )
diff --git a/tests/ci/report.py b/tests/ci/report.py
index 15d8ff9010e..cdef8409e7e 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -262,17 +262,20 @@ class ReportColorTheme:
 ColorTheme = Tuple[str, str, str]
 
 
-def _format_header(header, branch_name, branch_url=None):
-    result = " ".join([w.capitalize() for w in header.split(" ")])
+def _format_header(
+    header: str, branch_name: str, branch_url: Optional[str] = None
+) -> str:
+    # Following line does not lower CI->Ci and SQLancer->Sqlancer. It only
+    # capitalizes the first letter and doesn't touch the rest of the word
+    result = " ".join([w[0].upper() + w[1:] for w in header.split(" ") if w])
     result = result.replace("Clickhouse", "ClickHouse")
     result = result.replace("clickhouse", "ClickHouse")
     if "ClickHouse" not in result:
-        result = "ClickHouse " + result
-    result += " for "
+        result = f"ClickHouse {result}"
     if branch_url:
-        result += f'<a href="{branch_url}">{branch_name}</a>'
+        result = f'{result} for <a href="{branch_url}">{branch_name}</a>'
     else:
-        result += branch_name
+        result = f"{result} for {branch_name}"
     return result
 
 
@@ -367,6 +370,7 @@ def create_test_html_report(
                 colspan += 1
 
             if test_result.log_urls is not None:
+                has_log_urls = True
                 test_logs_html = "<br>".join(
                     [_get_html_url(url) for url in test_result.log_urls]
                 )
diff --git a/tests/ci/rerun_helper.py b/tests/ci/rerun_helper.py
deleted file mode 100644
index fa73256d759..00000000000
--- a/tests/ci/rerun_helper.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-from typing import Optional
-
-from commit_status_helper import get_commit, get_commit_filtered_statuses
-from github import Github
-from github.CommitStatus import CommitStatus
-from pr_info import PRInfo
-
-
-# TODO: move it to commit_status_helper
-class RerunHelper:
-    def __init__(self, gh: Github, pr_info: PRInfo, check_name: str):
-        self.gh = gh
-        self.pr_info = pr_info
-        self.check_name = check_name
-        commit = get_commit(gh, self.pr_info.sha)
-        if commit is None:
-            raise ValueError(f"unable to receive commit for {pr_info.sha}")
-        self.pygh_commit = commit
-        self.statuses = get_commit_filtered_statuses(commit)
-
-    def is_already_finished_by_status(self) -> bool:
-        # currently we agree even for failed statuses
-        for status in self.statuses:
-            if self.check_name in status.context and status.state in (
-                "success",
-                "failure",
-            ):
-                return True
-        return False
-
-    def get_finished_status(self) -> Optional[CommitStatus]:
-        for status in self.statuses:
-            if self.check_name in status.context:
-                return status
-        return None
diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index 44e1e4132c8..9849f19a1e4 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -1,25 +1,28 @@
 #!/usr/bin/env python3
 import sys
 import logging
-import re
 from typing import Tuple
 
 from github import Github
 
 from commit_status_helper import (
+    CI_STATUS_NAME,
+    NotSet,
+    create_ci_report,
     format_description,
     get_commit,
+    post_commit_status,
     post_labels,
     remove_labels,
     set_mergeable_check,
 )
 from docs_check import NAME as DOCS_NAME
-from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
+from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
-from workflow_approve_rerun_lambda.app import TRUSTED_CONTRIBUTORS
 
-NAME = "Run Check"
+from cancel_and_rerun_workflow_lambda.app import CATEGORY_TO_LABEL, check_pr_description
+from workflow_approve_rerun_lambda.app import TRUSTED_CONTRIBUTORS
 
 TRUSTED_ORG_IDS = {
     54801242,  # clickhouse
@@ -31,40 +34,6 @@ DO_NOT_TEST_LABEL = "do not test"
 FEATURE_LABEL = "pr-feature"
 SUBMODULE_CHANGED_LABEL = "submodule changed"
 
-# They are used in .github/PULL_REQUEST_TEMPLATE.md, keep comments there
-# updated accordingly
-# The following lists are append only, try to avoid editing them
-# They atill could be cleaned out after the decent time though.
-LABELS = {
-    "pr-backward-incompatible": ["Backward Incompatible Change"],
-    "pr-bugfix": [
-        "Bug Fix",
-        "Bug Fix (user-visible misbehavior in an official stable release)",
-        "Bug Fix (user-visible misbehaviour in official stable or prestable release)",
-        "Bug Fix (user-visible misbehavior in official stable or prestable release)",
-    ],
-    "pr-build": [
-        "Build/Testing/Packaging Improvement",
-        "Build Improvement",
-        "Build/Testing Improvement",
-        "Build",
-        "Packaging Improvement",
-    ],
-    "pr-documentation": [
-        "Documentation (changelog entry is not required)",
-        "Documentation",
-    ],
-    "pr-feature": ["New Feature"],
-    "pr-improvement": ["Improvement"],
-    "pr-not-for-changelog": [
-        "Not for changelog (changelog entry is not required)",
-        "Not for changelog",
-    ],
-    "pr-performance": ["Performance Improvement"],
-}
-
-CATEGORY_TO_LABEL = {c: lb for lb, categories in LABELS.items() for c in categories}
-
 
 def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
     if pr_user_login.lower() in TRUSTED_CONTRIBUTORS:
@@ -89,7 +58,7 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
 # Returns whether we should look into individual checks for this PR. If not, it
 # can be skipped entirely.
 # Returns can_run, description, labels_state
-def should_run_checks_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]:
+def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]:
     # Consider the labels and whether the user is trusted.
     print("Got labels", pr_info.labels)
     if FORCE_TESTS_LABEL in pr_info.labels:
@@ -118,92 +87,7 @@ def should_run_checks_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]:
     return True, "No special conditions apply", "pending"
 
 
-def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]:
-    lines = list(
-        map(lambda x: x.strip(), pr_info.body.split("\n") if pr_info.body else [])
-    )
-    lines = [re.sub(r"\s+", " ", line) for line in lines]
-
-    # Check if body contains "Reverts ClickHouse/ClickHouse#36337"
-    if [
-        True
-        for line in lines
-        if re.match(rf"\AReverts {GITHUB_REPOSITORY}#[\d]+\Z", line)
-    ]:
-        return "", LABELS["pr-not-for-changelog"][0]
-
-    category = ""
-    entry = ""
-    description_error = ""
-
-    i = 0
-    while i < len(lines):
-        if re.match(r"(?i)^[#>*_ ]*change\s*log\s*category", lines[i]):
-            i += 1
-            if i >= len(lines):
-                break
-            # Can have one empty line between header and the category
-            # itself. Filter it out.
-            if not lines[i]:
-                i += 1
-                if i >= len(lines):
-                    break
-            category = re.sub(r"^[-*\s]*", "", lines[i])
-            i += 1
-
-            # Should not have more than one category. Require empty line
-            # after the first found category.
-            if i >= len(lines):
-                break
-            if lines[i]:
-                second_category = re.sub(r"^[-*\s]*", "", lines[i])
-                result_status = (
-                    "More than one changelog category specified: '"
-                    + category
-                    + "', '"
-                    + second_category
-                    + "'"
-                )
-                return result_status, category
-
-        elif re.match(
-            r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
-        ):
-            i += 1
-            # Can have one empty line between header and the entry itself.
-            # Filter it out.
-            if i < len(lines) and not lines[i]:
-                i += 1
-            # All following lines until empty one are the changelog entry.
-            entry_lines = []
-            while i < len(lines) and lines[i]:
-                entry_lines.append(lines[i])
-                i += 1
-            entry = " ".join(entry_lines)
-            # Don't accept changelog entries like '...'.
-            entry = re.sub(r"[#>*_.\- ]", "", entry)
-            # Don't accept changelog entries like 'Close #12345'.
-            entry = re.sub(r"^[\w\-\s]{0,10}#?\d{5,6}\.?$", "", entry)
-        else:
-            i += 1
-
-    if not category:
-        description_error = "Changelog category is empty"
-    # Filter out the PR categories that are not for changelog.
-    elif re.match(
-        r"(?i)doc|((non|in|not|un)[-\s]*significant)|(not[ ]*for[ ]*changelog)",
-        category,
-    ):
-        pass  # to not check the rest of the conditions
-    elif category not in CATEGORY_TO_LABEL:
-        description_error, category = f"Category '{category}' is not valid", ""
-    elif not entry:
-        description_error = f"Changelog entry required for category '{category}'"
-
-    return description_error, category
-
-
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
 
     pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
@@ -213,7 +97,7 @@ if __name__ == "__main__":
         print("::notice ::Cannot run, no PR exists for the commit")
         sys.exit(1)
 
-    can_run, description, labels_state = should_run_checks_for_pr(pr_info)
+    can_run, description, labels_state = should_run_ci_for_pr(pr_info)
     if can_run and OK_SKIP_LABELS.intersection(pr_info.labels):
         print("::notice :: Early finish the check, running in a special PR")
         sys.exit(0)
@@ -222,7 +106,7 @@ if __name__ == "__main__":
     gh = Github(get_best_robot_token(), per_page=100)
     commit = get_commit(gh, pr_info.sha)
 
-    description_error, category = check_pr_description(pr_info)
+    description_error, category = check_pr_description(pr_info.body)
     pr_labels_to_add = []
     pr_labels_to_remove = []
     if (
@@ -253,12 +137,14 @@ if __name__ == "__main__":
 
     if FEATURE_LABEL in pr_info.labels:
         print(f"The '{FEATURE_LABEL}' in the labels, expect the 'Docs Check' status")
-        commit.create_status(
-            context=DOCS_NAME,
-            description=f"expect adding docs for {FEATURE_LABEL}",
-            state="pending",
+        post_commit_status(  # do not pass pr_info here intentionally
+            commit,
+            "pending",
+            NotSet,
+            f"expect adding docs for {FEATURE_LABEL}",
+            DOCS_NAME,
         )
-    else:
+    elif not description_error:
         set_mergeable_check(commit, "skipped")
 
     if description_error:
@@ -267,7 +153,7 @@ if __name__ == "__main__":
             f"{description_error}"
         )
         logging.info(
-            "PR body doesn't match the template: (start)\n%s\n(end)\n" "Reason: %s",
+            "PR body doesn't match the template: (start)\n%s\n(end)\nReason: %s",
             pr_info.body,
             description_error,
         )
@@ -275,23 +161,29 @@ if __name__ == "__main__":
             f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/"
             "blob/master/.github/PULL_REQUEST_TEMPLATE.md?plain=1"
         )
-        commit.create_status(
-            context=NAME,
-            description=format_description(description_error),
-            state="failure",
-            target_url=url,
+        post_commit_status(
+            commit,
+            "failure",
+            url,
+            format_description(description_error),
+            CI_STATUS_NAME,
+            pr_info,
         )
         sys.exit(1)
 
-    url = GITHUB_RUN_URL
+    ci_report_url = create_ci_report(pr_info, [])
     if not can_run:
         print("::notice ::Cannot run")
-        commit.create_status(
-            context=NAME, description=description, state=labels_state, target_url=url
+        post_commit_status(
+            commit, labels_state, ci_report_url, description, CI_STATUS_NAME, pr_info
         )
         sys.exit(1)
     else:
         print("::notice ::Can run")
-        commit.create_status(
-            context=NAME, description=description, state="pending", target_url=url
+        post_commit_status(
+            commit, "pending", ci_report_url, description, CI_STATUS_NAME, pr_info
         )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py
index fbe9f33b49b..2af02d572c8 100644
--- a/tests/ci/s3_helper.py
+++ b/tests/ci/s3_helper.py
@@ -40,11 +40,11 @@ def _flatten_list(lst):
 
 
 class S3Helper:
-    def __init__(self, host=S3_URL, download_host=S3_DOWNLOAD):
+    def __init__(self):
         self.session = boto3.session.Session(region_name="us-east-1")
-        self.client = self.session.client("s3", endpoint_url=host)
-        self.host = host
-        self.download_host = download_host
+        self.client = self.session.client("s3", endpoint_url=S3_URL)
+        self.host = S3_URL
+        self.download_host = S3_DOWNLOAD
 
     def _upload_file_to_s3(self, bucket_name: str, file_path: str, s3_path: str) -> str:
         logging.debug(
diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py
index 1a6c4d14616..144dea54133 100644
--- a/tests/ci/sqlancer_check.py
+++ b/tests/ci/sqlancer_check.py
@@ -10,10 +10,14 @@ from github import Github
 
 from build_download_helper import get_build_name_for_check, read_build_urls
 from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from commit_status_helper import format_description, post_commit_status
+from commit_status_helper import (
+    RerunHelper,
+    format_description,
+    get_commit,
+    post_commit_status,
+)
 from docker_pull_helper import get_image_with_version
 from env_helper import (
-    GITHUB_REPOSITORY,
     GITHUB_RUN_URL,
     REPORTS_PATH,
     TEMP_PATH,
@@ -21,7 +25,6 @@ from env_helper import (
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResults, TestResult
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from upload_result_helper import upload_results
@@ -46,12 +49,6 @@ def get_run_command(download_url, workspace_path, image):
     )
 
 
-def get_commit(gh, commit_sha):
-    repo = gh.get_repo(GITHUB_REPOSITORY)
-    commit = repo.get_commit(commit_sha)
-    return commit
-
-
 def main():
     logging.basicConfig(level=logging.INFO)
 
@@ -68,8 +65,9 @@ def main():
     pr_info = PRInfo()
 
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
 
-    rerun_helper = RerunHelper(gh, pr_info, check_name)
+    rerun_helper = RerunHelper(commit, check_name)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -187,12 +185,10 @@ def main():
         check_name,
     )
 
-    post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
-
+    post_commit_status(commit, status, report_url, description, check_name, pr_info)
     print(f"::notice:: {check_name} Report url: {report_url}")
 
     ch_helper = ClickHouseHelper()
-
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
         test_results,
@@ -202,12 +198,8 @@ def main():
         report_url,
         check_name,
     )
-
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 
-    print(f"::notice Result: '{status}', '{description}', '{report_url}'")
-    post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
-
 
 if __name__ == "__main__":
     main()
diff --git a/tests/ci/sqllogic_test.py b/tests/ci/sqllogic_test.py
index 9b41ff4680f..942c9c60ee8 100755
--- a/tests/ci/sqllogic_test.py
+++ b/tests/ci/sqllogic_test.py
@@ -17,11 +17,15 @@ from pr_info import FORCE_TESTS_LABEL, PRInfo
 from build_download_helper import download_all_deb_packages
 from upload_result_helper import upload_results
 from docker_pull_helper import get_image_with_version
-from commit_status_helper import override_status, post_commit_status
+from commit_status_helper import (
+    RerunHelper,
+    get_commit,
+    override_status,
+    post_commit_status,
+)
 from report import TestResults, read_test_results
 
 from stopwatch import Stopwatch
-from rerun_helper import RerunHelper
 from tee_popen import TeePopen
 
 
@@ -103,8 +107,9 @@ if __name__ == "__main__":
 
     pr_info = PRInfo()
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
 
-    rerun_helper = RerunHelper(gh, pr_info, check_name)
+    rerun_helper = RerunHelper(commit, check_name)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -203,7 +208,7 @@ if __name__ == "__main__":
 
     # Until it pass all tests, do not block CI, report "success"
     assert description is not None
-    post_commit_status(gh, pr_info.sha, check_name, description, "success", report_url)
+    post_commit_status(commit, "success", report_url, description, check_name, pr_info)
 
     if status != "success":
         if FORCE_TESTS_LABEL in pr_info.labels:
diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py
index 7596a81ebc9..ac280916a2f 100644
--- a/tests/ci/stress_check.py
+++ b/tests/ci/stress_check.py
@@ -16,13 +16,12 @@ from clickhouse_helper import (
     mark_flaky_tests,
     prepare_tests_results_for_clickhouse,
 )
-from commit_status_helper import post_commit_status
+from commit_status_helper import RerunHelper, get_commit, post_commit_status
 from docker_pull_helper import get_image_with_version
 from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResults, read_test_results
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -125,8 +124,9 @@ def run_stress_test(docker_image_name):
     pr_info = PRInfo()
 
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
 
-    rerun_helper = RerunHelper(gh, pr_info, check_name)
+    rerun_helper = RerunHelper(commit, check_name)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -180,7 +180,7 @@ def run_stress_test(docker_image_name):
     )
     print(f"::notice ::Report url: {report_url}")
 
-    post_commit_status(gh, pr_info.sha, check_name, description, state, report_url)
+    post_commit_status(commit, state, report_url, description, check_name, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py
index 89878990c2c..33a5cd21f39 100644
--- a/tests/ci/style_check.py
+++ b/tests/ci/style_check.py
@@ -15,7 +15,12 @@ from clickhouse_helper import (
     mark_flaky_tests,
     prepare_tests_results_for_clickhouse,
 )
-from commit_status_helper import post_commit_status, update_mergeable_check
+from commit_status_helper import (
+    RerunHelper,
+    get_commit,
+    post_commit_status,
+    update_mergeable_check,
+)
 from docker_pull_helper import get_image_with_version
 from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP
 from get_robot_token import get_best_robot_token
@@ -23,7 +28,6 @@ from github_helper import GitHub
 from git_helper import git_runner
 from pr_info import PRInfo
 from report import TestResults, read_test_results
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from ssh import SSHKey
 from stopwatch import Stopwatch
@@ -149,10 +153,11 @@ def main():
         checkout_head(pr_info)
 
     gh = GitHub(get_best_robot_token(), create_cache_dir=False)
+    commit = get_commit(gh, pr_info.sha)
 
     atexit.register(update_mergeable_check, gh, pr_info, NAME)
 
-    rerun_helper = RerunHelper(gh, pr_info, NAME)
+    rerun_helper = RerunHelper(commit, NAME)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         # Finish with the same code as previous
@@ -190,7 +195,7 @@ def main():
         s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME
     )
     print(f"::notice ::Report url: {report_url}")
-    post_commit_status(gh, pr_info.sha, NAME, description, state, report_url)
+    post_commit_status(commit, state, report_url, description, NAME, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py
index edc096908f4..5279ccde492 100644
--- a/tests/ci/unit_tests_check.py
+++ b/tests/ci/unit_tests_check.py
@@ -15,13 +15,17 @@ from clickhouse_helper import (
     mark_flaky_tests,
     prepare_tests_results_for_clickhouse,
 )
-from commit_status_helper import post_commit_status, update_mergeable_check
+from commit_status_helper import (
+    RerunHelper,
+    get_commit,
+    post_commit_status,
+    update_mergeable_check,
+)
 from docker_pull_helper import get_image_with_version
 from env_helper import TEMP_PATH, REPORTS_PATH
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
 from report import TestResults, TestResult
-from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -116,10 +120,11 @@ def main():
     pr_info = PRInfo()
 
     gh = Github(get_best_robot_token(), per_page=100)
+    commit = get_commit(gh, pr_info.sha)
 
     atexit.register(update_mergeable_check, gh, pr_info, check_name)
 
-    rerun_helper = RerunHelper(gh, pr_info, check_name)
+    rerun_helper = RerunHelper(commit, check_name)
     if rerun_helper.is_already_finished_by_status():
         logging.info("Check is already finished according to github status, exiting")
         sys.exit(0)
@@ -165,7 +170,7 @@ def main():
         check_name,
     )
     print(f"::notice ::Report url: {report_url}")
-    post_commit_status(gh, pr_info.sha, check_name, description, state, report_url)
+    post_commit_status(commit, state, report_url, description, check_name, pr_info)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py
index b988e240b0e..150af7aff4a 100644
--- a/tests/ci/upload_result_helper.py
+++ b/tests/ci/upload_result_helper.py
@@ -59,9 +59,10 @@ def upload_results(
     additional_files: List[str],
     check_name: str,
 ) -> str:
-    s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace(
-        " ", "_"
-    ).replace("(", "_").replace(")", "_").replace(",", "_")
+    normalized_check_name = check_name.lower()
+    for r in ((" ", "_"), ("(", "_"), (")", "_"), (",", "_"), ("/", "_")):
+        normalized_check_name = normalized_check_name.replace(*r)
+    s3_path_prefix = f"{pr_number}/{commit_sha}/{normalized_check_name}"
     additional_urls = process_logs(
         s3_client, additional_files, s3_path_prefix, test_results
     )
diff --git a/tests/ci/workflow_jobs_lambda/app.py b/tests/ci/workflow_jobs_lambda/app.py
index 9436e01ad53..49d475d11dc 100644
--- a/tests/ci/workflow_jobs_lambda/app.py
+++ b/tests/ci/workflow_jobs_lambda/app.py
@@ -284,10 +284,12 @@ def handler(event: dict, _: Any) -> dict:
         wf_job["runner_group_name"] or "",  # nullable
         repo["full_name"],
     )
+    logging.info(
+        "Got the next event (private_repo=%s): %s", repo["private"], workflow_job
+    )
     if repo["private"]:
         workflow_job.anonimyze()
 
-    logging.info("Got the next event: %s", workflow_job)
     send_event_workflow_job(workflow_job)
 
     return {
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index eb5faa1ffb5..acc8688cc4a 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -11,6 +11,7 @@ import shutil
 import sys
 import os
 import os.path
+import platform
 import signal
 import re
 import copy
@@ -542,7 +543,10 @@ class SettingsRandomizer:
             0.2, 0.5, 1, 10 * 1024 * 1024 * 1024
         ),
         "local_filesystem_read_method": lambda: random.choice(
+            # Allow to use uring only when running on Linux
             ["read", "pread", "mmap", "pread_threadpool", "io_uring"]
+            if platform.system().lower() == "linux"
+            else ["read", "pread", "mmap", "pread_threadpool"]
         ),
         "remote_filesystem_read_method": lambda: random.choice(["read", "threadpool"]),
         "local_filesystem_read_prefetch": lambda: random.randint(0, 1),
@@ -579,10 +583,17 @@ class SettingsRandomizer:
     }
 
     @staticmethod
-    def get_random_settings():
+    def get_random_settings(args):
         random_settings = []
+        is_debug = BuildFlags.DEBUG in args.build_flags
         for setting, generator in SettingsRandomizer.settings.items():
-            random_settings.append(f"{setting}={generator()}")
+            if (
+                is_debug
+                and setting == "allow_prefetched_read_pool_for_remote_filesystem"
+            ):
+                random_settings.append(f"{setting}=0")
+            else:
+                random_settings.append(f"{setting}={generator()}")
         return random_settings
 
 
@@ -817,7 +828,7 @@ class TestCase:
         )
 
         if self.randomize_settings:
-            self.random_settings = SettingsRandomizer.get_random_settings()
+            self.random_settings = SettingsRandomizer.get_random_settings(args)
 
         if self.randomize_merge_tree_settings:
             self.merge_tree_random_settings = (
@@ -2106,7 +2117,14 @@ def reportLogStats(args):
               'Column ''{}'' already exists', 'No macro {} in config', 'Invalid origin H3 index: {}',
               'Invalid session timeout: ''{}''', 'Tuple cannot be empty', 'Database name is empty',
               'Table {} is not a Dictionary', 'Expected function, got: {}', 'Unknown identifier: ''{}''',
-              'Failed to {} input ''{}''', '{}.{} is not a VIEW', 'Cannot convert NULL to {}', 'Dictionary {} doesn''t exist'
+              'Failed to {} input ''{}''', '{}.{} is not a VIEW', 'Cannot convert NULL to {}', 'Dictionary {} doesn''t exist',
+              'Write file: {}', 'Unable to parse JSONPath', 'Host is empty in S3 URI.', 'Expected end of line',
+              'inflate failed: {}{}', 'Center is not valid', 'Column ''{}'' is ambiguous', 'Cannot parse object', 'Invalid date: {}',
+              'There is no cache by name: {}', 'No part {} in table', '`{}` should be a String', 'There are duplicate id {}',
+              'Invalid replica name: {}', 'Unexpected value {} in enum', 'Unknown BSON type: {}', 'Point is not valid',
+              'Invalid qualified name: {}', 'INTO OUTFILE is not allowed', 'Arguments must not be NaN', 'Cell is not valid',
+              'brotli decode error{}', 'Invalid H3 index: {}', 'Too large node state size', 'No additional keys found.',
+              'Attempt to read after EOF.', 'Replication was stopped', '{}	building file infos', 'Cannot parse uuid {}'
         ) AS known_short_messages
         SELECT count() AS c, message_format_string, substr(any(message), 1, 120)
         FROM system.text_log
@@ -2245,7 +2263,7 @@ def main(args):
                     "\nFound hung queries in processlist:", args, "red", attrs=["bold"]
                 )
             )
-            print(json.dumps(processlist, indent=4))
+            print(processlist)
             print(get_transactions_list(args))
 
             print_stacktraces()
diff --git a/tests/config/config.d/merge_tree_old_dirs_cleanup.xml b/tests/config/config.d/merge_tree_old_dirs_cleanup.xml
index 41932cb6d61..2b8ea63b63d 100644
--- a/tests/config/config.d/merge_tree_old_dirs_cleanup.xml
+++ b/tests/config/config.d/merge_tree_old_dirs_cleanup.xml
@@ -3,6 +3,6 @@
         <!-- Default is 86400 (1 day), but we have protection from removal of tmp dirs that are currently in use -->
         <temporary_directories_lifetime>1</temporary_directories_lifetime>
         <!-- Default is 60 seconds, but let's make tests more aggressive -->
-        <merge_tree_clear_old_temporary_directories_interval_seconds>10</merge_tree_clear_old_temporary_directories_interval_seconds>
+        <merge_tree_clear_old_temporary_directories_interval_seconds>5</merge_tree_clear_old_temporary_directories_interval_seconds>
     </merge_tree>
 </clickhouse>
diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml
index bc9269e6ec1..dee03307177 100644
--- a/tests/config/config.d/storage_conf.xml
+++ b/tests/config/config.d/storage_conf.xml
@@ -55,52 +55,58 @@
                 <type>cache</type>
                 <disk>s3_disk</disk>
                 <path>s3_cache/</path>
-                <max_size>2147483648</max_size>
+                <max_size>128Mi</max_size>
                 <cache_on_write_operations>1</cache_on_write_operations>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </s3_cache>
             <s3_cache_2>
                 <type>cache</type>
                 <disk>s3_disk_2</disk>
                 <path>s3_cache_2/</path>
-                <max_size>2Gi</max_size>
+                <max_size>128Mi</max_size>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
                 <max_file_segment_size>100Mi</max_file_segment_size>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </s3_cache_2>
             <s3_cache_3>
                 <type>cache</type>
                 <disk>s3_disk_3</disk>
                 <path>s3_disk_3_cache/</path>
-                <max_size>22548578304</max_size>
+                <max_size>128Mi</max_size>
                 <data_cache_max_size>22548578304</data_cache_max_size>
                 <cache_on_write_operations>1</cache_on_write_operations>
                 <enable_cache_hits_threshold>1</enable_cache_hits_threshold>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </s3_cache_3>
             <s3_cache_4>
                 <type>cache</type>
                 <disk>s3_disk_4</disk>
                 <path>s3_cache_4/</path>
-                <max_size>22548578304</max_size>
+                <max_size>128Mi</max_size>
                 <cache_on_write_operations>1</cache_on_write_operations>
                 <enable_filesystem_query_cache_limit>1</enable_filesystem_query_cache_limit>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </s3_cache_4>
             <s3_cache_5>
                 <type>cache</type>
                 <disk>s3_disk_5</disk>
                 <path>s3_cache_5/</path>
-                <max_size>22548578304</max_size>
+                <max_size>128Mi</max_size>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </s3_cache_5>
             <s3_cache_6>
                 <type>cache</type>
                 <disk>s3_disk_6</disk>
                 <path>s3_cache_6/</path>
-                <max_size>22548578304</max_size>
+                <max_size>128Mi</max_size>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
                 <enable_bypass_cache_with_threashold>1</enable_bypass_cache_with_threashold>
                 <bypass_cache_threashold>100</bypass_cache_threashold>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </s3_cache_6>
             <s3_cache_small>
                 <type>cache</type>
@@ -108,27 +114,29 @@
                 <path>s3_cache_small/</path>
                 <max_size>1000</max_size>
                 <do_not_evict_index_and_mark_files>1</do_not_evict_index_and_mark_files>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </s3_cache_small>
             <s3_cache_small_segment_size>
                 <type>cache</type>
                 <disk>s3_disk_6</disk>
                 <path>s3_cache_small_segment_size/</path>
-                <max_size>22548578304</max_size>
+                <max_size>128Mi</max_size>
                 <max_file_segment_size>10Ki</max_file_segment_size>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
                 <cache_on_write_operations>1</cache_on_write_operations>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </s3_cache_small_segment_size>
             <!-- local disks -->
             <local_disk>
-                <type>local</type>
+                <type>local_blob_storage</type>
                 <path>local_disk/</path>
             </local_disk>
             <local_disk_2>
-                <type>local</type>
+                <type>local_blob_storage</type>
                 <path>local_disk_2/</path>
             </local_disk_2>
             <local_disk_3>
-                <type>local</type>
+                <type>local_blob_storage</type>
                 <path>local_disk_3/</path>
             </local_disk_3>
             <!-- cache for local disks -->
@@ -139,6 +147,7 @@
                 <max_size>22548578304</max_size>
                 <cache_on_write_operations>1</cache_on_write_operations>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </local_cache>
             <local_cache_2>
                 <type>cache</type>
@@ -146,6 +155,7 @@
                 <path>local_cache_2/</path>
                 <max_size>22548578304</max_size>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </local_cache_2>
             <local_cache_3>
                 <type>cache</type>
@@ -155,6 +165,7 @@
                 <cache_on_write_operations>1</cache_on_write_operations>
                 <enable_cache_hits_threshold>1</enable_cache_hits_threshold>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </local_cache_3>
             <!-- multi layer cache -->
             <s3_cache_multi>
@@ -163,6 +174,7 @@
                 <path>s3_cache_multi/</path>
                 <max_size>22548578304</max_size>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </s3_cache_multi>
             <s3_cache_multi_2>
                 <type>cache</type>
@@ -170,6 +182,7 @@
                 <path>s3_cache_multi_2/</path>
                 <max_size>22548578304</max_size>
                 <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>
+                <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
             </s3_cache_multi_2>
         </disks>
         <policies>
diff --git a/tests/config/install.sh b/tests/config/install.sh
index 77e8a8460ad..efa5a9c086e 100755
--- a/tests/config/install.sh
+++ b/tests/config/install.sh
@@ -79,6 +79,10 @@ ln -sf $SRC_PATH/users.d/marks.xml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/insert_keeper_retries.xml $DEST_SERVER_PATH/users.d/
 ln -sf $SRC_PATH/users.d/prefetch_settings.xml $DEST_SERVER_PATH/users.d/
 
+if [[ -n "$USE_NEW_ANALYZER" ]] && [[ "$USE_NEW_ANALYZER" -eq 1 ]]; then
+    ln -sf $SRC_PATH/users.d/analyzer.xml $DEST_SERVER_PATH/users.d/
+fi
+
 # FIXME DataPartsExchange may hang for http_send_timeout seconds
 # when nobody is going to read from the other side of socket (due to "Fetching of part was cancelled"),
 # but socket is owned by HTTPSessionPool, so it's not closed.
diff --git a/tests/config/users.d/analyzer.xml b/tests/config/users.d/analyzer.xml
new file mode 100644
index 00000000000..aa374364ef0
--- /dev/null
+++ b/tests/config/users.d/analyzer.xml
@@ -0,0 +1,7 @@
+<clickhouse>
+    <profiles>
+        <default>
+            <allow_experimental_analyzer>1</allow_experimental_analyzer>
+        </default>
+    </profiles>
+</clickhouse>
diff --git a/tests/integration/helpers/external_sources.py b/tests/integration/helpers/external_sources.py
index fd086fc4526..afb91083d57 100644
--- a/tests/integration/helpers/external_sources.py
+++ b/tests/integration/helpers/external_sources.py
@@ -161,6 +161,29 @@ class SourceMySQL(ExternalSource):
 
 
 class SourceMongo(ExternalSource):
+    def __init__(
+        self,
+        name,
+        internal_hostname,
+        internal_port,
+        docker_hostname,
+        docker_port,
+        user,
+        password,
+        secure=False,
+    ):
+        ExternalSource.__init__(
+            self,
+            name,
+            internal_hostname,
+            internal_port,
+            docker_hostname,
+            docker_port,
+            user,
+            password,
+        )
+        self.secure = secure
+
     def get_source_str(self, table_name):
         return """
             <mongodb>
@@ -170,6 +193,7 @@ class SourceMongo(ExternalSource):
                 <password>{password}</password>
                 <db>test</db>
                 <collection>{tbl}</collection>
+                {options}
             </mongodb>
         """.format(
             host=self.docker_hostname,
@@ -177,6 +201,7 @@ class SourceMongo(ExternalSource):
             user=self.user,
             password=self.password,
             tbl=table_name,
+            options="<options>ssl=true</options>" if self.secure else "",
         )
 
     def prepare(self, structure, table_name, cluster):
@@ -186,6 +211,8 @@ class SourceMongo(ExternalSource):
             user=self.user,
             password=self.password,
         )
+        if self.secure:
+            connection_str += "/?tls=true&tlsAllowInvalidCertificates=true"
         self.connection = pymongo.MongoClient(connection_str)
         self.converters = {}
         for field in structure.get_all_fields():
@@ -228,7 +255,7 @@ class SourceMongoURI(SourceMongo):
     def get_source_str(self, table_name):
         return """
             <mongodb>
-                <uri>mongodb://{user}:{password}@{host}:{port}/test</uri>
+                <uri>mongodb://{user}:{password}@{host}:{port}/test{options}</uri>
                 <collection>{tbl}</collection>
             </mongodb>
         """.format(
@@ -237,6 +264,7 @@ class SourceMongoURI(SourceMongo):
             user=self.user,
             password=self.password,
             tbl=table_name,
+            options="?ssl=true" if self.secure else "",
         )
 
 
diff --git a/tests/integration/helpers/postgres_utility.py b/tests/integration/helpers/postgres_utility.py
index 838c22c8a7c..dfae37af434 100644
--- a/tests/integration/helpers/postgres_utility.py
+++ b/tests/integration/helpers/postgres_utility.py
@@ -204,7 +204,7 @@ class PostgresManager:
         assert materialized_database in self.instance.query("SHOW DATABASES")
 
     def drop_materialized_db(self, materialized_database="test_database"):
-        self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database} NO DELAY")
+        self.instance.query(f"DROP DATABASE IF EXISTS {materialized_database} SYNC")
         if materialized_database in self.created_materialized_postgres_db_list:
             self.created_materialized_postgres_db_list.remove(materialized_database)
         assert materialized_database not in self.instance.query("SHOW DATABASES")
diff --git a/tests/integration/test_alternative_keeper_config/test.py b/tests/integration/test_alternative_keeper_config/test.py
index 2d59d2ee8b9..f1016ee1ae3 100644
--- a/tests/integration/test_alternative_keeper_config/test.py
+++ b/tests/integration/test_alternative_keeper_config/test.py
@@ -44,7 +44,7 @@ def started_cluster():
 
 
 def test_create_insert(started_cluster):
-    node1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'test_cluster' NO DELAY")
+    node1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'test_cluster' SYNC")
     node1.query(
         """
         CREATE TABLE tbl ON CLUSTER 'test_cluster' (
diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py
index 41db2580c7d..53f1599a0d6 100644
--- a/tests/integration/test_backup_restore_new/test.py
+++ b/tests/integration/test_backup_restore_new/test.py
@@ -1482,23 +1482,23 @@ def test_tables_dependency():
 
     # Drop everything in reversive order.
     def drop():
-        instance.query(f"DROP TABLE {t15} NO DELAY")
-        instance.query(f"DROP TABLE {t14} NO DELAY")
-        instance.query(f"DROP TABLE {t13} NO DELAY")
-        instance.query(f"DROP TABLE {t12} NO DELAY")
-        instance.query(f"DROP TABLE {t11} NO DELAY")
-        instance.query(f"DROP TABLE {t10} NO DELAY")
-        instance.query(f"DROP TABLE {t9} NO DELAY")
+        instance.query(f"DROP TABLE {t15} SYNC")
+        instance.query(f"DROP TABLE {t14} SYNC")
+        instance.query(f"DROP TABLE {t13} SYNC")
+        instance.query(f"DROP TABLE {t12} SYNC")
+        instance.query(f"DROP TABLE {t11} SYNC")
+        instance.query(f"DROP TABLE {t10} SYNC")
+        instance.query(f"DROP TABLE {t9} SYNC")
         instance.query(f"DROP DICTIONARY {t8}")
-        instance.query(f"DROP TABLE {t7} NO DELAY")
-        instance.query(f"DROP TABLE {t6} NO DELAY")
-        instance.query(f"DROP TABLE {t5} NO DELAY")
+        instance.query(f"DROP TABLE {t7} SYNC")
+        instance.query(f"DROP TABLE {t6} SYNC")
+        instance.query(f"DROP TABLE {t5} SYNC")
         instance.query(f"DROP DICTIONARY {t4}")
-        instance.query(f"DROP TABLE {t3} NO DELAY")
-        instance.query(f"DROP TABLE {t2} NO DELAY")
-        instance.query(f"DROP TABLE {t1} NO DELAY")
-        instance.query("DROP DATABASE test NO DELAY")
-        instance.query("DROP DATABASE test2 NO DELAY")
+        instance.query(f"DROP TABLE {t3} SYNC")
+        instance.query(f"DROP TABLE {t2} SYNC")
+        instance.query(f"DROP TABLE {t1} SYNC")
+        instance.query("DROP DATABASE test SYNC")
+        instance.query("DROP DATABASE test2 SYNC")
 
     drop()
 
diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py
index 9ed39627d82..5542eac856d 100644
--- a/tests/integration/test_backup_restore_on_cluster/test.py
+++ b/tests/integration/test_backup_restore_on_cluster/test.py
@@ -65,9 +65,9 @@ def drop_after_test():
     try:
         yield
     finally:
-        node1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster3' NO DELAY")
-        node1.query("DROP TABLE IF EXISTS tbl2 ON CLUSTER 'cluster3' NO DELAY")
-        node1.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster3' NO DELAY")
+        node1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster3' SYNC")
+        node1.query("DROP TABLE IF EXISTS tbl2 ON CLUSTER 'cluster3' SYNC")
+        node1.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster3' SYNC")
         node1.query("DROP USER IF EXISTS u1, u2 ON CLUSTER 'cluster3'")
 
 
@@ -107,7 +107,7 @@ def test_replicated_table():
     )
 
     # Drop table on both nodes.
-    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     # Restore from backup on node2.
     node2.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
@@ -138,7 +138,7 @@ def test_empty_replicated_table():
     )
 
     # Drop table on both nodes.
-    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     # Restore from backup on node2.
     node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
@@ -172,7 +172,7 @@ def test_replicated_database():
     )
 
     # Drop table on both nodes.
-    node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' SYNC")
 
     # Restore from backup on node2.
     node1.query(f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name}")
@@ -201,7 +201,7 @@ def test_different_tables_on_nodes():
     backup_name = new_backup_name()
     node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
 
-    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     node2.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
 
@@ -224,7 +224,7 @@ def test_backup_restore_on_single_replica():
     backup_name = new_backup_name()
     node1.query(f"BACKUP DATABASE mydb TO {backup_name}")
 
-    node1.query("DROP DATABASE mydb NO DELAY")
+    node1.query("DROP DATABASE mydb SYNC")
 
     # Cannot restore table because it already contains data on other replicas.
     expected_error = "already contains some data"
@@ -243,7 +243,7 @@ def test_backup_restore_on_single_replica():
     )
 
     # Can restore table with allow_non_empty_tables=true.
-    node1.query("DROP DATABASE mydb NO DELAY")
+    node1.query("DROP DATABASE mydb SYNC")
     node1.query(
         f"RESTORE DATABASE mydb FROM {backup_name} SETTINGS allow_non_empty_tables=true"
     )
@@ -266,7 +266,7 @@ def test_table_with_parts_in_queue_considered_non_empty():
     backup_name = new_backup_name()
     node1.query(f"BACKUP DATABASE mydb TO {backup_name}")
 
-    node1.query("DROP DATABASE mydb NO DELAY")
+    node1.query("DROP DATABASE mydb SYNC")
 
     # Cannot restore table because it already contains data on other replicas.
     expected_error = "already contains some data"
@@ -295,7 +295,7 @@ def test_replicated_table_with_not_synced_insert():
     backup_name = new_backup_name()
     node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
 
-    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
     node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
@@ -325,7 +325,7 @@ def test_replicated_table_with_not_synced_merge():
     backup_name = new_backup_name()
     node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
 
-    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
     node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
@@ -348,7 +348,7 @@ def test_replicated_table_restored_into_bigger_cluster():
     backup_name = new_backup_name()
     node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
 
-    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster3' FROM {backup_name}")
     node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster3' tbl")
@@ -372,7 +372,7 @@ def test_replicated_table_restored_into_smaller_cluster():
     backup_name = new_backup_name()
     node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
 
-    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster1' FROM {backup_name}")
     assert node1.query("SELECT * FROM tbl ORDER BY x") == TSV([111, 222])
@@ -410,7 +410,7 @@ def test_replicated_database_async():
         TSV([["BACKUP_CREATED", ""]]),
     )
 
-    node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' SYNC")
 
     [id, status] = node1.query(
         f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name} ASYNC"
@@ -454,7 +454,7 @@ def test_keeper_value_max_size():
         settings={"backup_restore_keeper_value_max_size": 50},
     )
 
-    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
     node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
@@ -541,7 +541,7 @@ def test_async_backups_to_same_destination(interface, on_cluster):
     assert num_failed_backups == len(ids) - 1
 
     # Check that the succeeded backup is all right.
-    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
     node1.query(f"RESTORE TABLE tbl FROM {backup_name}")
     assert node1.query("SELECT * FROM tbl") == "1\n"
 
@@ -568,7 +568,7 @@ def test_required_privileges():
     node1.query("GRANT BACKUP ON tbl TO u1")
     node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}", user="u1")
 
-    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     expected_error = "necessary to have grant INSERT, CREATE TABLE ON default.tbl2"
     assert expected_error in node1.query_and_get_error(
@@ -582,7 +582,7 @@ def test_required_privileges():
 
     assert node2.query("SELECT * FROM tbl2") == "100\n"
 
-    node1.query(f"DROP TABLE tbl2 ON CLUSTER 'cluster' NO DELAY")
+    node1.query(f"DROP TABLE tbl2 ON CLUSTER 'cluster' SYNC")
     node1.query("REVOKE ALL FROM u1")
 
     expected_error = "necessary to have grant INSERT, CREATE TABLE ON default.tbl"
@@ -703,7 +703,7 @@ def test_projection():
     backup_name = new_backup_name()
     node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
 
-    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     assert (
         node1.query(
@@ -755,7 +755,7 @@ def test_replicated_table_with_not_synced_def():
     backup_name = new_backup_name()
     node2.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
 
-    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     # But synced after RESTORE anyway
     node1.query(
@@ -768,7 +768,7 @@ def test_replicated_table_with_not_synced_def():
         "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'"
     ) == TSV([["x", "String"], ["y", "String"]])
 
-    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     node2.query(
         f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=2"
@@ -795,7 +795,7 @@ def test_table_in_replicated_database_with_not_synced_def():
     backup_name = new_backup_name()
     node2.query(f"BACKUP DATABASE mydb ON CLUSTER 'cluster' TO {backup_name}")
 
-    node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' SYNC")
 
     # But synced after RESTORE anyway
     node1.query(
@@ -808,7 +808,7 @@ def test_table_in_replicated_database_with_not_synced_def():
         "SELECT name, type FROM system.columns WHERE database='mydb' AND table='tbl'"
     ) == TSV([["x", "String"], ["y", "String"]])
 
-    node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' SYNC")
 
     node2.query(
         f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=2"
@@ -870,7 +870,7 @@ def test_mutation():
     assert has_mutation_in_backup("0000000002", backup_name, "default", "tbl")
     assert not has_mutation_in_backup("0000000003", backup_name, "default", "tbl")
 
-    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
 
     node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
 
@@ -1006,7 +1006,7 @@ def test_stop_other_host_during_backup(kill):
     node2.start_clickhouse()
 
     if status == "BACKUP_CREATED":
-        node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+        node1.query("DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
         node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
         assert node1.query("SELECT * FROM tbl ORDER BY x") == TSV([3, 5])
     elif status == "BACKUP_FAILED":
diff --git a/tests/integration/test_backup_restore_on_cluster/test_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_concurrency.py
index a28a1fa142b..aea82c6b559 100644
--- a/tests/integration/test_backup_restore_on_cluster/test_concurrency.py
+++ b/tests/integration/test_backup_restore_on_cluster/test_concurrency.py
@@ -62,8 +62,8 @@ def drop_after_test():
     try:
         yield
     finally:
-        node0.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY")
-        node0.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster' NO DELAY")
+        node0.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' SYNC")
+        node0.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster' SYNC")
 
 
 backup_id_counter = 0
@@ -95,7 +95,7 @@ def test_replicated_table():
     backup_name = new_backup_name()
     node0.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
 
-    node0.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+    node0.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
     node0.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
     node0.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
 
@@ -131,7 +131,7 @@ def test_concurrent_backups_on_same_node():
     ) == TSV([["BACKUP_CREATED", ""]] * num_concurrent_backups)
 
     for backup_name in backup_names:
-        node0.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+        node0.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
         node0.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
         node0.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
         for i in range(num_nodes):
@@ -166,7 +166,7 @@ def test_concurrent_backups_on_different_nodes():
         ) == TSV([["BACKUP_CREATED", ""]])
 
     for i in range(num_concurrent_backups):
-        nodes[i].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+        nodes[i].query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
         nodes[i].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_names[i]}")
         nodes[i].query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
         for j in range(num_nodes):
@@ -214,7 +214,7 @@ def test_create_or_drop_tables_during_backup(db_engine, table_engine):
         while time.time() < end_time:
             table_name = f"mydb.tbl{randint(1, num_nodes)}"
             node = nodes[randint(0, num_nodes - 1)]
-            node.query(f"DROP TABLE IF EXISTS {table_name} NO DELAY")
+            node.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
 
     def rename_tables():
         while time.time() < end_time:
@@ -229,7 +229,7 @@ def test_create_or_drop_tables_during_backup(db_engine, table_engine):
         while time.time() < end_time:
             table_name = f"mydb.tbl{randint(1, num_nodes)}"
             node = nodes[randint(0, num_nodes - 1)]
-            node.query(f"TRUNCATE TABLE IF EXISTS {table_name} NO DELAY")
+            node.query(f"TRUNCATE TABLE IF EXISTS {table_name} SYNC")
 
     def make_backups():
         ids = []
@@ -320,8 +320,8 @@ def test_kill_mutation_during_backup():
             TSV([["BACKUP_CREATED", ""]]),
         )
 
-        node0.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+        node0.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
         node0.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
 
         if n != repeat_count - 1:
-            node0.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
+            node0.query(f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC")
diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
index 0d8fad96438..e5cd9ade68b 100644
--- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
+++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py
@@ -84,7 +84,7 @@ def drop_after_test():
         yield
     finally:
         node0.query(
-            "DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY",
+            "DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' SYNC",
             settings={
                 "distributed_ddl_task_timeout": 360,
             },
@@ -154,7 +154,7 @@ def test_concurrent_backups_on_same_node():
     # This restore part is added to confirm creating an internal backup & restore work
     # even when a concurrent backup is stopped
     nodes[0].query(
-        f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
+        f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC",
         settings={
             "distributed_ddl_task_timeout": 360,
         },
@@ -206,7 +206,7 @@ def test_concurrent_restores_on_same_node():
     nodes[0].query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
 
     nodes[0].query(
-        f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
+        f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC",
         settings={
             "distributed_ddl_task_timeout": 360,
         },
@@ -251,7 +251,7 @@ def test_concurrent_restores_on_different_node():
     nodes[0].query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}")
 
     nodes[0].query(
-        f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
+        f"DROP TABLE tbl ON CLUSTER 'cluster' SYNC",
         settings={
             "distributed_ddl_task_timeout": 360,
         },
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py
index 0696d51136a..30c7bad7b83 100644
--- a/tests/integration/test_backup_restore_s3/test.py
+++ b/tests/integration/test_backup_restore_s3/test.py
@@ -37,7 +37,7 @@ def new_backup_name():
 def check_backup_and_restore(storage_policy, backup_destination, size=1000):
     node.query(
         f"""
-    DROP TABLE IF EXISTS data NO DELAY;
+    DROP TABLE IF EXISTS data SYNC;
     CREATE TABLE data (key Int, value String, array Array(String)) Engine=MergeTree() ORDER BY tuple() SETTINGS storage_policy='{storage_policy}';
     INSERT INTO data SELECT * FROM generateRandom('key Int, value String, array Array(String)') LIMIT {size};
     BACKUP TABLE data TO {backup_destination};
@@ -47,8 +47,8 @@ def check_backup_and_restore(storage_policy, backup_destination, size=1000):
         (SELECT count(), sum(sipHash64(*)) FROM data_restored),
         'Data does not matched after BACKUP/RESTORE'
     );
-    DROP TABLE data NO DELAY;
-    DROP TABLE data_restored NO DELAY;
+    DROP TABLE data SYNC;
+    DROP TABLE data_restored SYNC;
     """
     )
 
diff --git a/tests/integration/test_concurrent_backups_s3/test.py b/tests/integration/test_concurrent_backups_s3/test.py
index 73692e52cce..b29058865c0 100644
--- a/tests/integration/test_concurrent_backups_s3/test.py
+++ b/tests/integration/test_concurrent_backups_s3/test.py
@@ -25,7 +25,7 @@ def start_cluster():
 
 
 def test_concurrent_backups(start_cluster):
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    node.query("DROP TABLE IF EXISTS s3_test SYNC")
     columns = [f"column_{i} UInt64" for i in range(1000)]
     columns_str = ", ".join(columns)
     node.query(
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/ssl_verification.xml b/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/ssl_verification.xml
new file mode 100644
index 00000000000..3efe98e7045
--- /dev/null
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/ssl_verification.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+    <openSSL>
+        <client>
+            <!-- For self-signed certificate -->
+            <verificationMode>none</verificationMode>
+        </client>
+    </openSSL>
+</clickhouse>
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py
index 55639877ba0..973dbfc0429 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py
@@ -17,14 +17,71 @@ ranged_tester = None
 test_name = "mongo"
 
 
-def setup_module(module):
-    global cluster
-    global node
-    global simple_tester
-    global complex_tester
-    global ranged_tester
+@pytest.fixture(scope="module")
+def secure_connection(request):
+    return request.param
 
-    cluster = ClickHouseCluster(__file__)
+
+@pytest.fixture(scope="module")
+def cluster(secure_connection):
+    return ClickHouseCluster(__file__)
+
+
+@pytest.fixture(scope="module")
+def source(secure_connection, cluster):
+    return SourceMongo(
+        "MongoDB",
+        "localhost",
+        cluster.mongo_port,
+        cluster.mongo_host,
+        "27017",
+        "root",
+        "clickhouse",
+        secure=secure_connection,
+    )
+
+
+@pytest.fixture(scope="module")
+def simple_tester(source):
+    tester = SimpleLayoutTester(test_name)
+    tester.cleanup()
+    tester.create_dictionaries(source)
+    return tester
+
+
+@pytest.fixture(scope="module")
+def complex_tester(source):
+    tester = ComplexLayoutTester(test_name)
+    tester.create_dictionaries(source)
+    return tester
+
+
+@pytest.fixture(scope="module")
+def ranged_tester(source):
+    tester = RangedLayoutTester(test_name)
+    tester.create_dictionaries(source)
+    return tester
+
+
+@pytest.fixture(scope="module")
+def main_config(secure_connection):
+    main_config = []
+    if secure_connection:
+        main_config.append(os.path.join("configs", "disable_ssl_verification.xml"))
+    else:
+        main_config.append(os.path.join("configs", "ssl_verification.xml"))
+    return main_config
+
+
+@pytest.fixture(scope="module")
+def started_cluster(
+    secure_connection,
+    cluster,
+    main_config,
+    simple_tester,
+    ranged_tester,
+    complex_tester,
+):
     SOURCE = SourceMongo(
         "MongoDB",
         "localhost",
@@ -33,35 +90,18 @@ def setup_module(module):
         "27017",
         "root",
         "clickhouse",
+        secure=secure_connection,
     )
-
-    simple_tester = SimpleLayoutTester(test_name)
-    simple_tester.cleanup()
-    simple_tester.create_dictionaries(SOURCE)
-
-    complex_tester = ComplexLayoutTester(test_name)
-    complex_tester.create_dictionaries(SOURCE)
-
-    ranged_tester = RangedLayoutTester(test_name)
-    ranged_tester.create_dictionaries(SOURCE)
-    # Since that all .xml configs were created
-
-    main_configs = []
-    main_configs.append(os.path.join("configs", "disable_ssl_verification.xml"))
-
     dictionaries = simple_tester.list_dictionaries()
 
     node = cluster.add_instance(
-        "node", main_configs=main_configs, dictionaries=dictionaries, with_mongo=True
+        "node",
+        main_configs=main_config,
+        dictionaries=dictionaries,
+        with_mongo=True,
+        with_mongo_secure=secure_connection,
     )
 
-
-def teardown_module(module):
-    simple_tester.cleanup()
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
     try:
         cluster.start()
 
@@ -75,16 +115,25 @@ def started_cluster():
         cluster.shutdown()
 
 
+@pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"])
 @pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
-def test_simple(started_cluster, layout_name):
-    simple_tester.execute(layout_name, node)
+def test_simple(secure_connection, started_cluster, layout_name, simple_tester):
+    simple_tester.execute(layout_name, started_cluster.instances["node"])
 
 
+@pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"])
 @pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX))
-def test_complex(started_cluster, layout_name):
-    complex_tester.execute(layout_name, node)
+def test_complex(secure_connection, started_cluster, layout_name, complex_tester):
+    complex_tester.execute(layout_name, started_cluster.instances["node"])
 
 
+@pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"])
 @pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED))
-def test_ranged(started_cluster, layout_name):
-    ranged_tester.execute(layout_name, node)
+def test_ranged(secure_connection, started_cluster, layout_name, ranged_tester):
+    ranged_tester.execute(layout_name, started_cluster.instances["node"])
+
+
+@pytest.mark.parametrize("secure_connection", [True], indirect=["secure_connection"])
+@pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE))
+def test_simple_ssl(secure_connection, started_cluster, layout_name, simple_tester):
+    simple_tester.execute(layout_name, started_cluster.instances["node"])
diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py
index 84c547b7a6b..22541432259 100644
--- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py
+++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py
@@ -8,25 +8,22 @@ from helpers.cluster import ClickHouseCluster
 from helpers.dictionary import Field, Row, Dictionary, DictionaryStructure, Layout
 from helpers.external_sources import SourceMongoURI
 
-SOURCE = None
-cluster = None
-node = None
-simple_tester = None
-complex_tester = None
-ranged_tester = None
 test_name = "mongo_uri"
 
 
-def setup_module(module):
-    global cluster
-    global node
-    global simple_tester
-    global complex_tester
-    global ranged_tester
+@pytest.fixture(scope="module")
+def secure_connection(request):
+    return request.param
 
-    cluster = ClickHouseCluster(__file__)
 
-    SOURCE = SourceMongoURI(
+@pytest.fixture(scope="module")
+def cluster(secure_connection):
+    return ClickHouseCluster(__file__)
+
+
+@pytest.fixture(scope="module")
+def source(secure_connection, cluster):
+    return SourceMongoURI(
         "MongoDB",
         "localhost",
         cluster.mongo_port,
@@ -34,52 +31,55 @@ def setup_module(module):
         "27017",
         "root",
         "clickhouse",
+        secure=secure_connection,
     )
 
-    simple_tester = SimpleLayoutTester(test_name)
-    simple_tester.cleanup()
-    simple_tester.create_dictionaries(SOURCE)
 
-    complex_tester = ComplexLayoutTester(test_name)
-    complex_tester.create_dictionaries(SOURCE)
+@pytest.fixture(scope="module")
+def simple_tester(source):
+    tester = SimpleLayoutTester(test_name)
+    tester.cleanup()
+    tester.create_dictionaries(source)
+    return tester
 
-    ranged_tester = RangedLayoutTester(test_name)
-    ranged_tester.create_dictionaries(SOURCE)
-    # Since that all .xml configs were created
 
-    main_configs = []
-    main_configs.append(os.path.join("configs", "disable_ssl_verification.xml"))
+@pytest.fixture(scope="module")
+def main_config(secure_connection):
+    main_config = []
+    if secure_connection:
+        main_config.append(os.path.join("configs", "disable_ssl_verification.xml"))
+    else:
+        main_config.append(os.path.join("configs", "ssl_verification.xml"))
+    return main_config
 
+
+@pytest.fixture(scope="module")
+def started_cluster(secure_connection, cluster, main_config, simple_tester):
     dictionaries = simple_tester.list_dictionaries()
 
     node = cluster.add_instance(
         "uri_node",
-        main_configs=main_configs,
+        main_configs=main_config,
         dictionaries=dictionaries,
         with_mongo=True,
+        with_mongo_secure=secure_connection,
     )
-
-
-def teardown_module(module):
-    simple_tester.cleanup()
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
     try:
         cluster.start()
-
         simple_tester.prepare(cluster)
-        complex_tester.prepare(cluster)
-        ranged_tester.prepare(cluster)
-
         yield cluster
-
     finally:
         cluster.shutdown()
 
 
 # See comment in SourceMongoURI
+@pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"])
 @pytest.mark.parametrize("layout_name", ["flat"])
-def test_simple(started_cluster, layout_name):
-    simple_tester.execute(layout_name, node)
+def test_simple(secure_connection, started_cluster, simple_tester, layout_name):
+    simple_tester.execute(layout_name, started_cluster.instances["uri_node"])
+
+
+@pytest.mark.parametrize("secure_connection", [True], indirect=["secure_connection"])
+@pytest.mark.parametrize("layout_name", ["flat"])
+def test_simple_ssl(secure_connection, started_cluster, simple_tester, layout_name):
+    simple_tester.execute(layout_name, started_cluster.instances["uri_node"])
diff --git a/tests/integration/test_disk_configuration/test.py b/tests/integration/test_disk_configuration/test.py
index 6ebe994dc68..3fe8286fa43 100644
--- a/tests/integration/test_disk_configuration/test.py
+++ b/tests/integration/test_disk_configuration/test.py
@@ -390,7 +390,7 @@ def test_merge_tree_setting_override(start_cluster):
 
     node.query(
         f"""
-        DROP TABLE IF EXISTS {TABLE_NAME} NO DELAY;
+        DROP TABLE IF EXISTS {TABLE_NAME} SYNC;
         CREATE TABLE {TABLE_NAME} (a Int32)
         ENGINE = MergeTree()
         ORDER BY tuple()
@@ -412,7 +412,7 @@ def test_merge_tree_setting_override(start_cluster):
 
     node.query(
         f"""
-        DROP TABLE IF EXISTS {TABLE_NAME} NO DELAY;
+        DROP TABLE IF EXISTS {TABLE_NAME} SYNC;
         CREATE TABLE {TABLE_NAME} (a Int32)
         ENGINE = MergeTree()
         ORDER BY tuple()
diff --git a/tests/integration/test_encrypted_disk/test.py b/tests/integration/test_encrypted_disk/test.py
index 681df89dd0f..8187f2ff6a8 100644
--- a/tests/integration/test_encrypted_disk/test.py
+++ b/tests/integration/test_encrypted_disk/test.py
@@ -30,7 +30,7 @@ def cleanup_after_test():
     try:
         yield
     finally:
-        node.query("DROP TABLE IF EXISTS encrypted_test NO DELAY")
+        node.query("DROP TABLE IF EXISTS encrypted_test SYNC")
 
 
 @pytest.mark.parametrize(
@@ -294,4 +294,4 @@ def test_restart():
 
         assert node.query(select_query) == "(0,'data'),(1,'data')"
 
-        node.query("DROP TABLE encrypted_test NO DELAY;")
+        node.query("DROP TABLE encrypted_test SYNC;")
diff --git a/tests/integration/test_encrypted_disk_replication/test.py b/tests/integration/test_encrypted_disk_replication/test.py
index f68c534ed43..c2aa710ba91 100644
--- a/tests/integration/test_encrypted_disk_replication/test.py
+++ b/tests/integration/test_encrypted_disk_replication/test.py
@@ -44,7 +44,7 @@ def cleanup_after_test():
     try:
         yield
     finally:
-        node1.query("DROP TABLE IF EXISTS encrypted_test ON CLUSTER 'cluster' NO DELAY")
+        node1.query("DROP TABLE IF EXISTS encrypted_test ON CLUSTER 'cluster' SYNC")
 
 
 def create_table(
@@ -52,9 +52,7 @@ def create_table(
 ):
     engine = "ReplicatedMergeTree('/clickhouse/tables/encrypted_test/', '{replica}')"
 
-    settings = f"storage_policy='{storage_policy}'"
-    if zero_copy_replication:
-        settings += ", allow_remote_fs_zero_copy_replication=true"
+    settings = f"storage_policy='{storage_policy}', allow_remote_fs_zero_copy_replication={int(zero_copy_replication)}"
 
     node1.query(
         f"""
diff --git a/tests/integration/test_failed_async_inserts/test.py b/tests/integration/test_failed_async_inserts/test.py
index 6d66ac97006..ecb506c36bc 100644
--- a/tests/integration/test_failed_async_inserts/test.py
+++ b/tests/integration/test_failed_async_inserts/test.py
@@ -51,4 +51,4 @@ def test_failed_async_inserts(started_cluster):
 
     assert node.query(select_query) == "4\n"
 
-    node.query("DROP TABLE IF EXISTS async_insert_30_10_2022 NO DELAY")
+    node.query("DROP TABLE IF EXISTS async_insert_30_10_2022 SYNC")
diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py
index 4ad046fe5d2..ee5d4b5df93 100644
--- a/tests/integration/test_grant_and_revoke/test.py
+++ b/tests/integration/test_grant_and_revoke/test.py
@@ -20,6 +20,9 @@ def start_cluster():
         instance.query(
             "CREATE TABLE test.table(x UInt32, y UInt32) ENGINE = MergeTree ORDER BY tuple()"
         )
+        instance.query(
+            "CREATE TABLE test.table2(x UInt32, y UInt32) ENGINE = MergeTree ORDER BY tuple()"
+        )
         instance.query("INSERT INTO test.table VALUES (1,5), (2,10)")
 
         yield cluster
@@ -585,3 +588,134 @@ def test_grant_with_replace_option():
     assert instance.query("SHOW GRANTS FOR B") == TSV(
         ["GRANT INSERT ON test.table TO B"]
     )
+
+
+def test_grant_current_grants():
+    instance.query("CREATE USER A")
+    instance.query(
+        "GRANT SELECT, CREATE TABLE, CREATE VIEW ON test.* TO A WITH GRANT OPTION"
+    )
+    assert instance.query("SHOW GRANTS FOR A") == TSV(
+        ["GRANT SELECT, CREATE TABLE, CREATE VIEW ON test.* TO A WITH GRANT OPTION"]
+    )
+
+    instance.query("CREATE USER B")
+    instance.query("GRANT CURRENT GRANTS ON *.* TO B", user="A")
+    assert instance.query("SHOW GRANTS FOR B") == TSV(
+        ["GRANT SELECT, CREATE TABLE, CREATE VIEW ON test.* TO B"]
+    )
+
+    instance.query("CREATE USER C")
+    instance.query("GRANT CURRENT GRANTS(CREATE ON test.*) TO C", user="A")
+    assert instance.query("SHOW GRANTS FOR C") == TSV(
+        ["GRANT CREATE TABLE, CREATE VIEW ON test.* TO C"]
+    )
+
+    instance.query("DROP USER IF EXISTS C")
+    instance.query("CREATE USER C")
+    instance.query("GRANT CURRENT GRANTS(NONE ON *.*) TO C", user="A")
+    assert instance.query("SHOW GRANTS FOR C") == TSV([])
+
+
+def test_grant_current_grants_with_partial_revoke():
+    instance.query("CREATE USER A")
+    instance.query("GRANT CREATE TABLE ON *.* TO A")
+    instance.query("REVOKE CREATE TABLE ON test.* FROM A")
+    instance.query("GRANT CREATE TABLE ON test.table TO A WITH GRANT OPTION")
+    instance.query("GRANT SELECT ON *.* TO A WITH GRANT OPTION")
+    instance.query("REVOKE SELECT ON test.* FROM A")
+    instance.query("GRANT SELECT ON test.table TO A WITH GRANT OPTION")
+    instance.query("GRANT SELECT ON test.table2 TO A")
+
+    assert instance.query("SHOW GRANTS FOR A") == TSV(
+        [
+            "GRANT CREATE TABLE ON *.* TO A",
+            "GRANT SELECT ON *.* TO A WITH GRANT OPTION",
+            "REVOKE SELECT, CREATE TABLE ON test.* FROM A",
+            "GRANT SELECT, CREATE TABLE ON test.table TO A WITH GRANT OPTION",
+            "GRANT SELECT ON test.table2 TO A",
+        ]
+    )
+
+    instance.query("CREATE USER B")
+    instance.query("GRANT CURRENT GRANTS ON *.* TO B", user="A")
+    assert instance.query("SHOW GRANTS FOR B") == TSV(
+        [
+            "GRANT SELECT ON *.* TO B",
+            "REVOKE SELECT ON test.* FROM B",
+            "GRANT SELECT, CREATE TABLE ON test.table TO B",
+        ]
+    )
+
+    instance.query("DROP USER IF EXISTS B")
+    instance.query("CREATE USER B")
+    instance.query("GRANT CURRENT GRANTS ON *.* TO B WITH GRANT OPTION", user="A")
+    assert instance.query("SHOW GRANTS FOR B") == TSV(
+        [
+            "GRANT SELECT ON *.* TO B WITH GRANT OPTION",
+            "REVOKE SELECT ON test.* FROM B",
+            "GRANT SELECT, CREATE TABLE ON test.table TO B WITH GRANT OPTION",
+        ]
+    )
+
+    instance.query("DROP USER IF EXISTS C")
+    instance.query("CREATE USER C")
+    instance.query("GRANT SELECT ON test.* TO B")
+    instance.query("GRANT CURRENT GRANTS ON *.* TO C", user="B")
+    assert instance.query("SHOW GRANTS FOR C") == TSV(
+        [
+            "GRANT SELECT ON *.* TO C",
+            "GRANT CREATE TABLE ON test.table TO C",
+        ]
+    )
+
+    instance.query("DROP USER IF EXISTS B")
+    instance.query("CREATE USER B")
+    instance.query("GRANT CURRENT GRANTS ON test.* TO B WITH GRANT OPTION", user="A")
+    assert instance.query("SHOW GRANTS FOR B") == TSV(
+        [
+            "GRANT SELECT, CREATE TABLE ON test.table TO B WITH GRANT OPTION",
+        ]
+    )
+
+
+def test_current_grants_override():
+    instance.query("CREATE USER A")
+    instance.query("GRANT SELECT ON *.* TO A WITH GRANT OPTION")
+    instance.query("REVOKE SELECT ON test.* FROM A")
+    assert instance.query("SHOW GRANTS FOR A") == TSV(
+        [
+            "GRANT SELECT ON *.* TO A WITH GRANT OPTION",
+            "REVOKE SELECT ON test.* FROM A",
+        ]
+    )
+
+    instance.query("CREATE USER B")
+    instance.query("GRANT SELECT ON test.table TO B")
+    assert instance.query("SHOW GRANTS FOR B") == TSV(
+        ["GRANT SELECT ON test.table TO B"]
+    )
+
+    instance.query("GRANT CURRENT GRANTS ON *.* TO B", user="A")
+    assert instance.query("SHOW GRANTS FOR B") == TSV(
+        [
+            "GRANT SELECT ON *.* TO B",
+            "REVOKE SELECT ON test.* FROM B",
+            "GRANT SELECT ON test.table TO B",
+        ]
+    )
+
+    instance.query("DROP USER IF EXISTS B")
+    instance.query("CREATE USER B")
+    instance.query("GRANT SELECT ON test.table TO B")
+    assert instance.query("SHOW GRANTS FOR B") == TSV(
+        ["GRANT SELECT ON test.table TO B"]
+    )
+
+    instance.query("GRANT CURRENT GRANTS ON *.* TO B WITH REPLACE OPTION", user="A")
+    assert instance.query("SHOW GRANTS FOR B") == TSV(
+        [
+            "GRANT SELECT ON *.* TO B",
+            "REVOKE SELECT ON test.* FROM B",
+        ]
+    )
diff --git a/tests/integration/test_hedged_requests/test.py b/tests/integration/test_hedged_requests/test.py
index 88371f6908d..2ca37fbb7ee 100644
--- a/tests/integration/test_hedged_requests/test.py
+++ b/tests/integration/test_hedged_requests/test.py
@@ -128,12 +128,12 @@ def check_changing_replica_events(expected_count):
     assert int(result) >= expected_count
 
 
-def check_if_query_sending_was_suspended(minimum_count):
+def check_if_query_sending_was_suspended():
     result = NODES["node"].query(
         "SELECT value FROM system.events WHERE event='SuspendSendingQueryToShard'"
     )
 
-    assert int(result) >= minimum_count
+    assert int(result) >= 1
 
 
 def check_if_query_sending_was_not_suspended():
@@ -381,7 +381,7 @@ def test_async_connect(started_cluster):
         "SELECT hostName(), id FROM distributed_connect ORDER BY id LIMIT 1 SETTINGS prefer_localhost_replica = 0, connect_timeout_with_failover_ms=5000, async_query_sending_for_remote=1, max_threads=1"
     )
     check_changing_replica_events(2)
-    check_if_query_sending_was_suspended(2)
+    check_if_query_sending_was_suspended()
 
     NODES["node"].query("DROP TABLE distributed_connect")
 
@@ -406,7 +406,7 @@ def test_async_query_sending(started_cluster):
     NODES["node"].query("DROP TABLE IF EXISTS tmp")
     NODES["node"].query(
         "CREATE TEMPORARY TABLE tmp (number UInt64, s String) "
-        "as select number, randomString(number % 1000) from numbers(1000000)"
+        "as select number, randomString(number % 1000) from numbers(10000000)"
     )
 
     NODES["node"].query(
@@ -419,6 +419,6 @@ def test_async_query_sending(started_cluster):
         "SELECT hostName(), id FROM distributed_query_sending ORDER BY id LIMIT 1 SETTINGS"
         " prefer_localhost_replica = 0, async_query_sending_for_remote=1, max_threads = 1"
     )
-    check_if_query_sending_was_suspended(3)
+    check_if_query_sending_was_suspended()
 
     NODES["node"].query("DROP TABLE distributed_query_sending")
diff --git a/tests/integration/test_mask_sensitive_info/configs/named_collections.xml b/tests/integration/test_mask_sensitive_info/configs/named_collections.xml
index a4b58f6f812..3d294874d68 100644
--- a/tests/integration/test_mask_sensitive_info/configs/named_collections.xml
+++ b/tests/integration/test_mask_sensitive_info/configs/named_collections.xml
@@ -1,4 +1,5 @@
 <clickhouse>
+    <display_secrets_in_show_and_select>1</display_secrets_in_show_and_select>
     <named_collections>
         <named_collection_1/>
         <named_collection_2/>
diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py
index 92232f7e6a8..2131a76b5be 100644
--- a/tests/integration/test_mask_sensitive_info/test.py
+++ b/tests/integration/test_mask_sensitive_info/test.py
@@ -65,14 +65,15 @@ def system_query_log_contains_search_pattern(search_pattern):
     )
 
 
-# Generates a random string.
 def new_password(len=16):
     return "".join(
         random.choice(string.ascii_uppercase + string.digits) for _ in range(len)
     )
 
 
-# Passwords in CREATE/ALTER queries must be hidden in logs.
+show_secrets = "SETTINGS format_display_secrets_in_show_and_select"
+
+
 def test_create_alter_user():
     password = new_password()
 
@@ -95,21 +96,37 @@ def test_create_alter_user():
 
     check_logs(
         must_contain=[
-            "CREATE USER u1 IDENTIFIED WITH sha256_password",
-            "ALTER USER u1 IDENTIFIED WITH sha256_password",
+            "CREATE USER u1 IDENTIFIED",
+            "ALTER USER u1 IDENTIFIED",
             "CREATE USER u2 IDENTIFIED WITH plaintext_password",
         ],
         must_not_contain=[
             password,
-            "IDENTIFIED WITH sha256_password BY",
-            "IDENTIFIED WITH sha256_hash BY",
+            "IDENTIFIED BY",
+            "IDENTIFIED BY",
             "IDENTIFIED WITH plaintext_password BY",
         ],
     )
 
+    assert "BY" in node.query(f"SHOW CREATE USER u1 {show_secrets}=1")
+    assert "BY" in node.query(f"SHOW CREATE USER u2 {show_secrets}=1")
+
     node.query("DROP USER u1, u2")
 
 
+def check_secrets_for_tables(tables, table_name_prefix, password):
+    for i, table in enumerate(tables):
+        table_name = table_name_prefix + str(i)
+        if password in table:
+            assert password in node.query(
+                f"SHOW CREATE TABLE {table_name} {show_secrets}=1"
+            )
+            assert password in node.query(
+                f"SELECT create_table_query, engine_full FROM system.tables WHERE name = '{table_name}' "
+                f"{show_secrets}=1"
+            )
+
+
 def test_create_table():
     password = new_password()
 
@@ -133,21 +150,25 @@ def test_create_table():
     for i, table_engine in enumerate(table_engines):
         node.query(f"CREATE TABLE table{i} (x int) ENGINE = {table_engine}")
 
-    assert (
-        node.query("SHOW CREATE TABLE table0")
-        == "CREATE TABLE default.table0\\n(\\n    `x` Int32\\n)\\nENGINE = MySQL(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')\n"
-    )
+    for toggle, secret in enumerate(["[HIDDEN]", password]):
+        assert (
+            node.query(f"SHOW CREATE TABLE table0 {show_secrets}={toggle}")
+            == "CREATE TABLE default.table0\\n(\\n    `x` Int32\\n)\\n"
+            "ENGINE = MySQL(\\'mysql57:3306\\', \\'mysql_db\\', "
+            f"\\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')\n"
+        )
 
-    assert node.query(
-        "SELECT create_table_query, engine_full FROM system.tables WHERE name = 'table0'"
-    ) == TSV(
-        [
+        assert node.query(
+            f"SELECT create_table_query, engine_full FROM system.tables WHERE name = 'table0' {show_secrets}={toggle}"
+        ) == TSV(
             [
-                "CREATE TABLE default.table0 (`x` Int32) ENGINE = MySQL(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')",
-                "MySQL(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')",
-            ],
-        ]
-    )
+                [
+                    "CREATE TABLE default.table0 (`x` Int32) ENGINE = MySQL(\\'mysql57:3306\\', \\'mysql_db\\', "
+                    f"\\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')",
+                    f"MySQL(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')",
+                ],
+            ]
+        )
 
     check_logs(
         must_contain=[
@@ -169,7 +190,9 @@ def test_create_table():
         must_not_contain=[password],
     )
 
-    for i in range(0, len(table_engines)):
+    check_secrets_for_tables(table_engines, "table", password)
+
+    for i in range(len(table_engines)):
         node.query(f"DROP TABLE table{i}")
 
 
@@ -198,7 +221,7 @@ def test_create_database():
         must_not_contain=[password],
     )
 
-    for i in range(0, len(database_engines)):
+    for i in range(len(database_engines)):
         node.query(f"DROP DATABASE IF EXISTS database{i}")
 
 
@@ -241,21 +264,26 @@ def test_table_functions():
     for i, table_function in enumerate(table_functions):
         node.query(f"CREATE TABLE tablefunc{i} (x int) AS {table_function}")
 
-    assert (
-        node.query("SHOW CREATE TABLE tablefunc0")
-        == "CREATE TABLE default.tablefunc0\\n(\\n    `x` Int32\\n) AS mysql(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')\n"
-    )
+    for toggle, secret in enumerate(["[HIDDEN]", password]):
+        assert (
+            node.query(f"SHOW CREATE TABLE tablefunc0 {show_secrets}={toggle}")
+            == "CREATE TABLE default.tablefunc0\\n(\\n    `x` Int32\\n) AS "
+            "mysql(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', "
+            f"\\'mysql_user\\', \\'{secret}\\')\n"
+        )
 
-    assert node.query(
-        "SELECT create_table_query, engine_full FROM system.tables WHERE name = 'tablefunc0'"
-    ) == TSV(
-        [
+        assert node.query(
+            "SELECT create_table_query, engine_full FROM system.tables WHERE name = 'tablefunc0' "
+            f"{show_secrets}={toggle}"
+        ) == TSV(
             [
-                "CREATE TABLE default.tablefunc0 (`x` Int32) AS mysql(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')",
-                "",
-            ],
-        ]
-    )
+                [
+                    "CREATE TABLE default.tablefunc0 (`x` Int32) AS mysql(\\'mysql57:3306\\', "
+                    f"\\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')",
+                    "",
+                ],
+            ]
+        )
 
     check_logs(
         must_contain=[
@@ -293,7 +321,9 @@ def test_table_functions():
         must_not_contain=[password],
     )
 
-    for i in range(0, len(table_functions)):
+    check_secrets_for_tables(table_functions, "tablefunc", password)
+
+    for i in range(len(table_functions)):
         node.query(f"DROP TABLE tablefunc{i}")
 
 
@@ -369,15 +399,18 @@ def test_create_dictionary():
         f"LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())"
     )
 
-    assert (
-        node.query("SHOW CREATE TABLE dict1")
-        == "CREATE DICTIONARY default.dict1\\n(\\n    `n` int DEFAULT 0,\\n    `m` int DEFAULT 1\\n)\\nPRIMARY KEY n\\nSOURCE(CLICKHOUSE(HOST \\'localhost\\' PORT 9000 USER \\'user1\\' TABLE \\'test\\' PASSWORD \\'[HIDDEN]\\' DB \\'default\\'))\\nLIFETIME(MIN 0 MAX 10)\\nLAYOUT(FLAT())\n"
-    )
+    for toggle, secret in enumerate(["[HIDDEN]", password]):
+        assert (
+            node.query(f"SHOW CREATE TABLE dict1 {show_secrets}={toggle}")
+            == f"CREATE DICTIONARY default.dict1\\n(\\n    `n` int DEFAULT 0,\\n    `m` int DEFAULT 1\\n)\\nPRIMARY KEY n\\nSOURCE(CLICKHOUSE(HOST \\'localhost\\' PORT 9000 USER \\'user1\\' TABLE \\'test\\' PASSWORD \\'{secret}\\' DB \\'default\\'))\\nLIFETIME(MIN 0 MAX 10)\\nLAYOUT(FLAT())\n"
+        )
 
-    assert (
-        node.query("SELECT create_table_query FROM system.tables WHERE name = 'dict1'")
-        == "CREATE DICTIONARY default.dict1 (`n` int DEFAULT 0, `m` int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST \\'localhost\\' PORT 9000 USER \\'user1\\' TABLE \\'test\\' PASSWORD \\'[HIDDEN]\\' DB \\'default\\')) LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())\n"
-    )
+        assert (
+            node.query(
+                f"SELECT create_table_query FROM system.tables WHERE name = 'dict1' {show_secrets}={toggle}"
+            )
+            == f"CREATE DICTIONARY default.dict1 (`n` int DEFAULT 0, `m` int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST \\'localhost\\' PORT 9000 USER \\'user1\\' TABLE \\'test\\' PASSWORD \\'{secret}\\' DB \\'default\\')) LIFETIME(MIN 0 MAX 10) LAYOUT(FLAT())\n"
+        )
 
     check_logs(
         must_contain=[
@@ -448,4 +481,4 @@ def test_on_cluster():
         "%CREATE TABLE default.table_oncl UUID \\'%\\' (`x` Int32) ENGINE = MySQL(\\'mysql57:3307\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')"
     )
 
-    node.query(f"DROP TABLE table_oncl")
+    node.query("DROP TABLE table_oncl")
diff --git a/tests/integration/test_max_rows_to_read_leaf_with_view/__init__.py b/tests/integration/test_max_rows_to_read_leaf_with_view/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_max_rows_to_read_leaf_with_view/configs/remote_servers.xml b/tests/integration/test_max_rows_to_read_leaf_with_view/configs/remote_servers.xml
new file mode 100644
index 00000000000..9ce90edb727
--- /dev/null
+++ b/tests/integration/test_max_rows_to_read_leaf_with_view/configs/remote_servers.xml
@@ -0,0 +1,18 @@
+<clickhouse>
+    <remote_servers>
+        <two_shards>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </two_shards>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_max_rows_to_read_leaf_with_view/test.py b/tests/integration/test_max_rows_to_read_leaf_with_view/test.py
new file mode 100755
index 00000000000..6957534ce0d
--- /dev/null
+++ b/tests/integration/test_max_rows_to_read_leaf_with_view/test.py
@@ -0,0 +1,76 @@
+from contextlib import contextmanager
+
+import pytest
+from helpers.cluster import ClickHouseCluster
+from helpers.client import QueryRuntimeException
+
+cluster = ClickHouseCluster(__file__)
+
+node1 = cluster.add_instance(
+    "node1",
+    main_configs=["configs/remote_servers.xml"],
+    with_zookeeper=True,
+)
+
+node2 = cluster.add_instance(
+    "node2",
+    main_configs=["configs/remote_servers.xml"],
+    with_zookeeper=True,
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+
+        for node in (node1, node2):
+            node.query(
+                f"""
+                CREATE TABLE local_table(id UInt32, d DateTime) ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/max_rows_read_leaf', '{node}') PARTITION BY toYYYYMM(d) ORDER BY d;
+
+                CREATE TABLE distributed_table(id UInt32, d DateTime) ENGINE = Distributed(two_shards, default, local_table);
+
+                CREATE OR REPLACE VIEW test_view AS select id from distributed_table;
+"""
+            )
+        node1.query(
+            "INSERT INTO local_table (id) select * from system.numbers limit 200"
+        )
+        node2.query(
+            "INSERT INTO local_table (id) select * from system.numbers limit 200"
+        )
+
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_max_rows_to_read_leaf_via_view(started_cluster):
+    """
+    Asserts the expected behaviour that we should be able to select
+    the total amount of rows (400 -  200 from each shard) from a
+    view that selects from a distributed table.
+    """
+    assert (
+        node1.query(
+            "SELECT count() from test_view SETTINGS max_rows_to_read_leaf=200"
+        ).rstrip()
+        == "400"
+    )
+    with pytest.raises(
+        QueryRuntimeException, match="controlled by 'max_rows_to_read_leaf'"
+    ):
+        # insert some more data and ensure we get a legitimate failure
+        node2.query(
+            "INSERT INTO local_table (id) select * from system.numbers limit 10"
+        )
+        node2.query("SELECT count() from test_view SETTINGS max_rows_to_read_leaf=200")
+
+
+if __name__ == "__main__":
+    with contextmanager(started_cluster)() as cluster:
+        for name, instance in list(cluster.instances.items()):
+            print(name, instance.ip_address)
+        input("Cluster created, press any key to destroy...")
diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py
index d1b7b64b56f..bcb62c3181d 100644
--- a/tests/integration/test_merge_tree_azure_blob_storage/test.py
+++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py
@@ -461,7 +461,7 @@ def test_move_replace_partition_to_another_table(cluster):
         == "(512)"
     )
 
-    azure_query(node, f"DROP TABLE {table_clone_name} NO DELAY")
+    azure_query(node, f"DROP TABLE {table_clone_name} SYNC")
     assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
     assert (
         azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values")
@@ -470,7 +470,7 @@ def test_move_replace_partition_to_another_table(cluster):
 
     azure_query(node, f"ALTER TABLE {TABLE_NAME} FREEZE")
 
-    azure_query(node, f"DROP TABLE {TABLE_NAME} NO DELAY")
+    azure_query(node, f"DROP TABLE {TABLE_NAME} SYNC")
 
 
 def test_freeze_unfreeze(cluster):
diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py
index 782237539fa..c79986c34f0 100644
--- a/tests/integration/test_merge_tree_hdfs/test.py
+++ b/tests/integration/test_merge_tree_hdfs/test.py
@@ -224,14 +224,22 @@ def test_attach_detach_partition(cluster):
     wait_for_delete_empty_parts(node, "hdfs_test")
     wait_for_delete_inactive_parts(node, "hdfs_test")
     wait_for_delete_hdfs_objects(
-        cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
+        cluster,
+        FILES_OVERHEAD
+        + FILES_OVERHEAD_PER_PART_WIDE * 2
+        - FILES_OVERHEAD_METADATA_VERSION,
     )
 
     node.query("ALTER TABLE hdfs_test ATTACH PARTITION '2020-01-03'")
     assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)"
 
     hdfs_objects = fs.listdir("/clickhouse")
-    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
+    assert (
+        len(hdfs_objects)
+        == FILES_OVERHEAD
+        + FILES_OVERHEAD_PER_PART_WIDE * 2
+        - FILES_OVERHEAD_METADATA_VERSION
+    )
 
     node.query("ALTER TABLE hdfs_test DROP PARTITION '2020-01-03'")
     assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)"
@@ -355,7 +363,14 @@ def test_move_replace_partition_to_another_table(cluster):
 
     # Number of objects in HDFS should be unchanged.
     hdfs_objects = fs.listdir("/clickhouse")
-    assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4
+    for obj in hdfs_objects:
+        print("Object in HDFS after move", obj)
+    wait_for_delete_hdfs_objects(
+        cluster,
+        FILES_OVERHEAD * 2
+        + FILES_OVERHEAD_PER_PART_WIDE * 4
+        - FILES_OVERHEAD_METADATA_VERSION * 2,
+    )
 
     # Add new partitions to source table, but with different values and replace them from copied table.
     node.query(
@@ -370,7 +385,15 @@ def test_move_replace_partition_to_another_table(cluster):
     assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(16384)"
 
     hdfs_objects = fs.listdir("/clickhouse")
-    assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6
+    for obj in hdfs_objects:
+        print("Object in HDFS after insert", obj)
+
+    wait_for_delete_hdfs_objects(
+        cluster,
+        FILES_OVERHEAD * 2
+        + FILES_OVERHEAD_PER_PART_WIDE * 6
+        - FILES_OVERHEAD_METADATA_VERSION * 2,
+    )
 
     node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-03' FROM hdfs_clone")
     node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-05' FROM hdfs_clone")
@@ -381,13 +404,25 @@ def test_move_replace_partition_to_another_table(cluster):
 
     # Wait for outdated partitions deletion.
     wait_for_delete_hdfs_objects(
-        cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4
+        cluster,
+        FILES_OVERHEAD * 2
+        + FILES_OVERHEAD_PER_PART_WIDE * 4
+        - FILES_OVERHEAD_METADATA_VERSION * 2,
     )
 
-    node.query("DROP TABLE hdfs_clone NO DELAY")
+    node.query("DROP TABLE hdfs_clone SYNC")
     assert node.query("SELECT sum(id) FROM hdfs_test FORMAT Values") == "(0)"
     assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(16384)"
 
     # Data should remain in hdfs
     hdfs_objects = fs.listdir("/clickhouse")
-    assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4
+
+    for obj in hdfs_objects:
+        print("Object in HDFS after drop", obj)
+
+    wait_for_delete_hdfs_objects(
+        cluster,
+        FILES_OVERHEAD
+        + FILES_OVERHEAD_PER_PART_WIDE * 4
+        - FILES_OVERHEAD_METADATA_VERSION * 2,
+    )
diff --git a/tests/integration/test_merge_tree_load_parts/test.py b/tests/integration/test_merge_tree_load_parts/test.py
index dfbe00c8e28..049dd516647 100644
--- a/tests/integration/test_merge_tree_load_parts/test.py
+++ b/tests/integration/test_merge_tree_load_parts/test.py
@@ -6,12 +6,14 @@ from helpers.corrupt_part_data_on_disk import corrupt_part_data_on_disk
 
 
 cluster = helpers.cluster.ClickHouseCluster(__file__)
+
 node1 = cluster.add_instance(
     "node1",
     main_configs=["configs/fast_background_pool.xml"],
     with_zookeeper=True,
     stay_alive=True,
 )
+
 node2 = cluster.add_instance(
     "node2",
     main_configs=["configs/fast_background_pool.xml"],
@@ -19,6 +21,12 @@ node2 = cluster.add_instance(
     stay_alive=True,
 )
 
+node3 = cluster.add_instance(
+    "node3",
+    with_zookeeper=True,
+    stay_alive=True,
+)
+
 
 @pytest.fixture(scope="module")
 def started_cluster():
@@ -194,3 +202,54 @@ def test_merge_tree_load_parts_corrupted(started_cluster):
         )
         == "111\t1\n222\t1\n333\t1\n"
     )
+
+
+def test_merge_tree_load_parts_filesystem_error(started_cluster):
+    if node3.is_built_with_sanitizer() or node3.is_debug_build():
+        pytest.skip(
+            "Skip with debug build and sanitizers. \
+            This test intentionally triggers LOGICAL_ERROR which leads to crash with those builds"
+        )
+
+    node3.query(
+        """
+        CREATE TABLE mt_load_parts (id UInt32)
+        ENGINE = MergeTree ORDER BY id
+        SETTINGS index_granularity_bytes = 0"""
+    )
+
+    node3.query("SYSTEM STOP MERGES mt_load_parts")
+
+    for i in range(2):
+        node3.query(f"INSERT INTO mt_load_parts VALUES ({i})")
+
+    # We want to somehow check that exception thrown on part creation is handled during part loading.
+    # It can be a filesystem exception triggered at initialization of part storage but it hard
+    # to trigger it because it should be an exception on stat/listDirectory.
+    # The most easy way to trigger such exception is to use chmod but clickhouse server
+    # is run with root user in integration test and this won't work. So let's do some
+    # some stupid things: create a table without adaptive granularity and change mark
+    # extensions of data files in part to make clickhouse think that it's a compact part which
+    # cannot be created in such table. This will trigger a LOGICAL_ERROR on part creation.
+
+    def corrupt_part(table, part_name):
+        part_path = node3.query(
+            "SELECT path FROM system.parts WHERE table = '{}' and name = '{}'".format(
+                table, part_name
+            )
+        ).strip()
+
+        node3.exec_in_container(
+            ["bash", "-c", f"mv {part_path}id.mrk {part_path}id.mrk3"], privileged=True
+        )
+
+    corrupt_part("mt_load_parts", "all_1_1_0")
+    node3.restart_clickhouse(kill=True)
+
+    assert node3.query("SELECT * FROM mt_load_parts") == "1\n"
+    assert (
+        node3.query(
+            "SELECT name FROM system.detached_parts WHERE table = 'mt_load_parts'"
+        )
+        == "broken-on-start_all_1_1_0\n"
+    )
diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index c2e00dc0cb8..e4bfffd70cc 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -101,44 +101,45 @@ def run_s3_mocks(cluster):
     )
 
 
-def list_objects(cluster, path="data/"):
+def list_objects(cluster, path="data/", hint="list_objects"):
     minio = cluster.minio_client
     objects = list(minio.list_objects(cluster.minio_bucket, path, recursive=True))
-    logging.info(f"list_objects ({len(objects)}): {[x.object_name for x in objects]}")
+    logging.info(f"{hint} ({len(objects)}): {[x.object_name for x in objects]}")
     return objects
 
 
 def wait_for_delete_s3_objects(cluster, expected, timeout=30):
-    minio = cluster.minio_client
     while timeout > 0:
-        if (
-            len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-            == expected
-        ):
+        if len(list_objects(cluster, "data/")) == expected:
             return
         timeout -= 1
         time.sleep(1)
-    assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == expected
-    )
+    assert len(list_objects(cluster, "data/")) == expected
 
 
-@pytest.fixture(autouse=True)
-@pytest.mark.parametrize("node_name", ["node"])
-def drop_table(cluster, node_name):
-    yield
-    node = cluster.instances[node_name]
+def remove_all_s3_objects(cluster):
     minio = cluster.minio_client
+    for obj in list_objects(cluster, "data/"):
+        minio.remove_object(cluster.minio_bucket, obj.object_name)
 
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
 
+@pytest.fixture(autouse=True, scope="function")
+def clear_minio(cluster):
     try:
-        wait_for_delete_s3_objects(cluster, 0)
-    finally:
+        # CH do some writes to the S3 at start. For example, file data/clickhouse_access_check_{server_uuid}.
+        # Set the timeout there as 10 sec in order to resolve the race with that file exists.
+        wait_for_delete_s3_objects(cluster, 0, timeout=10)
+    except:
         # Remove extra objects to prevent tests cascade failing
-        for obj in list_objects(cluster, "data/"):
-            minio.remove_object(cluster.minio_bucket, obj.object_name)
+        remove_all_s3_objects(cluster)
+
+    yield
+
+
+def check_no_objects_after_drop(cluster, table_name="s3_test", node_name="node"):
+    node = cluster.instances[node_name]
+    node.query(f"DROP TABLE IF EXISTS {table_name} SYNC")
+    wait_for_delete_s3_objects(cluster, 0, timeout=0)
 
 
 @pytest.mark.parametrize(
@@ -158,10 +159,7 @@ def test_simple_insert_select(
     values1 = generate_values("2020-01-03", 4096)
     node.query("INSERT INTO s3_test VALUES {}".format(values1))
     assert node.query("SELECT * FROM s3_test order by dt, id FORMAT Values") == values1
-    assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD + files_per_part
-    )
+    assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD + files_per_part
 
     values2 = generate_values("2020-01-04", 4096)
     node.query("INSERT INTO s3_test VALUES {}".format(values2))
@@ -169,15 +167,14 @@ def test_simple_insert_select(
         node.query("SELECT * FROM s3_test ORDER BY dt, id FORMAT Values")
         == values1 + "," + values2
     )
-    assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD + files_per_part * 2
-    )
+    assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD + files_per_part * 2
 
     assert (
         node.query("SELECT count(*) FROM s3_test where id = 1 FORMAT Values") == "(2)"
     )
 
+    check_no_objects_after_drop(cluster)
+
 
 @pytest.mark.parametrize("merge_vertical,node_name", [(True, "node"), (False, "node")])
 def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name):
@@ -188,7 +185,6 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name):
 
     node = cluster.instances[node_name]
     create_table(node, "s3_test", **settings)
-    minio = cluster.minio_client
 
     node.query("SYSTEM STOP MERGES s3_test")
     node.query(
@@ -214,7 +210,7 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name):
         node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)"
     )
     assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
+        len(list_objects(cluster, "data/"))
         == FILES_OVERHEAD_PER_PART_WIDE * 6 + FILES_OVERHEAD
     )
 
@@ -242,6 +238,8 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name):
         cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD, timeout=45
     )
 
+    check_no_objects_after_drop(cluster)
+
 
 @pytest.mark.parametrize("node_name", ["node"])
 def test_alter_table_columns(cluster, node_name):
@@ -287,12 +285,13 @@ def test_alter_table_columns(cluster, node_name):
         cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + 2
     )
 
+    check_no_objects_after_drop(cluster)
+
 
 @pytest.mark.parametrize("node_name", ["node"])
 def test_attach_detach_partition(cluster, node_name):
     node = cluster.instances[node_name]
     create_table(node, "s3_test")
-    minio = cluster.minio_client
 
     node.query(
         "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096))
@@ -312,14 +311,18 @@ def test_attach_detach_partition(cluster, node_name):
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)"
     assert (
         len(list_objects(cluster, "data/"))
-        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
+        == FILES_OVERHEAD
+        + FILES_OVERHEAD_PER_PART_WIDE * 2
+        - FILES_OVERHEAD_METADATA_VERSION
     )
 
     node.query("ALTER TABLE s3_test ATTACH PARTITION '2020-01-03'")
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
     assert (
         len(list_objects(cluster, "data/"))
-        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
+        == FILES_OVERHEAD
+        + FILES_OVERHEAD_PER_PART_WIDE * 2
+        - FILES_OVERHEAD_METADATA_VERSION
     )
 
     node.query("ALTER TABLE s3_test DROP PARTITION '2020-01-03'")
@@ -337,7 +340,9 @@ def test_attach_detach_partition(cluster, node_name):
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)"
     assert (
         len(list_objects(cluster, "data/"))
-        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 1
+        == FILES_OVERHEAD
+        + FILES_OVERHEAD_PER_PART_WIDE * 1
+        - FILES_OVERHEAD_METADATA_VERSION
     )
     node.query(
         "ALTER TABLE s3_test DROP DETACHED PARTITION '2020-01-04'",
@@ -349,12 +354,13 @@ def test_attach_detach_partition(cluster, node_name):
         == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 0
     )
 
+    check_no_objects_after_drop(cluster)
+
 
 @pytest.mark.parametrize("node_name", ["node"])
 def test_move_partition_to_another_disk(cluster, node_name):
     node = cluster.instances[node_name]
     create_table(node, "s3_test")
-    minio = cluster.minio_client
 
     node.query(
         "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096))
@@ -364,30 +370,31 @@ def test_move_partition_to_another_disk(cluster, node_name):
     )
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
     assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
+        len(list_objects(cluster, "data/"))
         == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
     )
 
     node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-04' TO DISK 'hdd'")
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
     assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
+        len(list_objects(cluster, "data/"))
         == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE
     )
 
     node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-04' TO DISK 's3'")
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
     assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
+        len(list_objects(cluster, "data/"))
         == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
     )
 
+    check_no_objects_after_drop(cluster)
+
 
 @pytest.mark.parametrize("node_name", ["node"])
 def test_table_manipulations(cluster, node_name):
     node = cluster.instances[node_name]
     create_table(node, "s3_test")
-    minio = cluster.minio_client
 
     node.query(
         "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096))
@@ -399,9 +406,10 @@ def test_table_manipulations(cluster, node_name):
     node.query("RENAME TABLE s3_test TO s3_renamed")
     assert node.query("SELECT count(*) FROM s3_renamed FORMAT Values") == "(8192)"
     assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
+        len(list_objects(cluster, "data/"))
         == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
     )
+
     node.query("RENAME TABLE s3_renamed TO s3_test")
 
     assert node.query("CHECK TABLE s3_test FORMAT Values") == "(1)"
@@ -410,7 +418,7 @@ def test_table_manipulations(cluster, node_name):
     node.query("ATTACH TABLE s3_test")
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
     assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
+        len(list_objects(cluster, "data/"))
         == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
     )
 
@@ -418,17 +426,15 @@ def test_table_manipulations(cluster, node_name):
     wait_for_delete_empty_parts(node, "s3_test")
     wait_for_delete_inactive_parts(node, "s3_test")
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)"
-    assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD
-    )
+    assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD
+
+    check_no_objects_after_drop(cluster)
 
 
 @pytest.mark.parametrize("node_name", ["node"])
 def test_move_replace_partition_to_another_table(cluster, node_name):
     node = cluster.instances[node_name]
     create_table(node, "s3_test")
-    minio = cluster.minio_client
 
     node.query(
         "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096))
@@ -444,11 +450,11 @@ def test_move_replace_partition_to_another_table(cluster, node_name):
     )
     assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)"
+
     assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
+        len(list_objects(cluster, "data/", "Objects at start"))
         == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4
     )
-
     create_table(node, "s3_clone")
 
     node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-03' TO TABLE s3_clone")
@@ -457,10 +463,14 @@ def test_move_replace_partition_to_another_table(cluster, node_name):
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)"
     assert node.query("SELECT sum(id) FROM s3_clone FORMAT Values") == "(0)"
     assert node.query("SELECT count(*) FROM s3_clone FORMAT Values") == "(8192)"
+
+    list_objects(cluster, "data/", "Object after move partition")
     # Number of objects in S3 should be unchanged.
-    assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4
+    wait_for_delete_s3_objects(
+        cluster,
+        FILES_OVERHEAD * 2
+        + FILES_OVERHEAD_PER_PART_WIDE * 4
+        - FILES_OVERHEAD_METADATA_VERSION * 2,
     )
 
     # Add new partitions to source table, but with different values and replace them from copied table.
@@ -472,9 +482,13 @@ def test_move_replace_partition_to_another_table(cluster, node_name):
     )
     assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)"
-    assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6
+
+    list_objects(cluster, "data/", "Object after insert")
+    wait_for_delete_s3_objects(
+        cluster,
+        FILES_OVERHEAD * 2
+        + FILES_OVERHEAD_PER_PART_WIDE * 6
+        - FILES_OVERHEAD_METADATA_VERSION * 2,
     )
 
     node.query("ALTER TABLE s3_test REPLACE PARTITION '2020-01-03' FROM s3_clone")
@@ -486,39 +500,48 @@ def test_move_replace_partition_to_another_table(cluster, node_name):
 
     # Wait for outdated partitions deletion.
     wait_for_delete_s3_objects(
-        cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4
+        cluster,
+        FILES_OVERHEAD * 2
+        + FILES_OVERHEAD_PER_PART_WIDE * 4
+        - FILES_OVERHEAD_METADATA_VERSION * 2,
     )
 
-    node.query("DROP TABLE s3_clone NO DELAY")
+    node.query("DROP TABLE s3_clone SYNC")
     assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)"
     assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)"
-    # Data should remain in S3
-    assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4
+
+    list_objects(cluster, "data/", "Object after drop")
+    wait_for_delete_s3_objects(
+        cluster,
+        FILES_OVERHEAD
+        + FILES_OVERHEAD_PER_PART_WIDE * 4
+        - FILES_OVERHEAD_METADATA_VERSION * 2,
     )
 
     node.query("ALTER TABLE s3_test FREEZE")
     # Number S3 objects should be unchanged.
-    assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4
+    list_objects(cluster, "data/", "Object after freeze")
+    wait_for_delete_s3_objects(
+        cluster,
+        FILES_OVERHEAD
+        + FILES_OVERHEAD_PER_PART_WIDE * 4
+        - FILES_OVERHEAD_METADATA_VERSION * 2,
     )
 
-    node.query("DROP TABLE s3_test NO DELAY")
+    node.query("DROP TABLE s3_test SYNC")
     # Backup data should remain in S3.
 
-    wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE * 4)
+    wait_for_delete_s3_objects(
+        cluster, FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 4
+    )
 
-    for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)):
-        minio.remove_object(cluster.minio_bucket, obj.object_name)
+    remove_all_s3_objects(cluster)
 
 
 @pytest.mark.parametrize("node_name", ["node"])
 def test_freeze_unfreeze(cluster, node_name):
     node = cluster.instances[node_name]
     create_table(node, "s3_test")
-    minio = cluster.minio_client
 
     node.query(
         "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096))
@@ -533,8 +556,9 @@ def test_freeze_unfreeze(cluster, node_name):
     wait_for_delete_empty_parts(node, "s3_test")
     wait_for_delete_inactive_parts(node, "s3_test")
     assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
+        len(list_objects(cluster, "data/"))
+        == FILES_OVERHEAD
+        + (FILES_OVERHEAD_PER_PART_WIDE - FILES_OVERHEAD_METADATA_VERSION) * 2
     )
 
     # Unfreeze single partition from backup1.
@@ -544,13 +568,10 @@ def test_freeze_unfreeze(cluster, node_name):
     # Unfreeze all partitions from backup2.
     node.query("ALTER TABLE s3_test UNFREEZE WITH NAME 'backup2'")
 
+    # Data should be removed from S3.
     wait_for_delete_s3_objects(cluster, FILES_OVERHEAD)
 
-    # Data should be removed from S3.
-    assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD
-    )
+    check_no_objects_after_drop(cluster)
 
 
 @pytest.mark.parametrize("node_name", ["node"])
@@ -558,7 +579,6 @@ def test_freeze_system_unfreeze(cluster, node_name):
     node = cluster.instances[node_name]
     create_table(node, "s3_test")
     create_table(node, "s3_test_removed")
-    minio = cluster.minio_client
 
     node.query(
         "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-04", 4096))
@@ -572,22 +592,20 @@ def test_freeze_system_unfreeze(cluster, node_name):
     node.query("TRUNCATE TABLE s3_test")
     wait_for_delete_empty_parts(node, "s3_test")
     wait_for_delete_inactive_parts(node, "s3_test")
-    node.query("DROP TABLE s3_test_removed NO DELAY")
+    node.query("DROP TABLE s3_test_removed SYNC")
     assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
+        len(list_objects(cluster, "data/"))
+        == FILES_OVERHEAD
+        + (FILES_OVERHEAD_PER_PART_WIDE - FILES_OVERHEAD_METADATA_VERSION) * 2
     )
 
     # Unfreeze all data from backup3.
     node.query("SYSTEM UNFREEZE WITH NAME 'backup3'")
 
+    # Data should be removed from S3.
     wait_for_delete_s3_objects(cluster, FILES_OVERHEAD)
 
-    # Data should be removed from S3.
-    assert (
-        len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)))
-        == FILES_OVERHEAD
-    )
+    check_no_objects_after_drop(cluster)
 
 
 @pytest.mark.parametrize("node_name", ["node"])
@@ -633,6 +651,8 @@ def test_s3_disk_apply_new_settings(cluster, node_name):
     # There should be 3 times more S3 requests because multi-part upload mode uses 3 requests to upload object.
     assert get_s3_requests() - s3_requests_before == s3_requests_to_write_partition * 3
 
+    check_no_objects_after_drop(cluster)
+
 
 @pytest.mark.parametrize("node_name", ["node"])
 def test_s3_no_delete_objects(cluster, node_name):
@@ -641,6 +661,7 @@ def test_s3_no_delete_objects(cluster, node_name):
         node, "s3_test_no_delete_objects", storage_policy="no_delete_objects_s3"
     )
     node.query("DROP TABLE s3_test_no_delete_objects SYNC")
+    remove_all_s3_objects(cluster)
 
 
 @pytest.mark.parametrize("node_name", ["node"])
@@ -655,32 +676,33 @@ def test_s3_disk_reads_on_unstable_connection(cluster, node_name):
         assert node.query("SELECT sum(id) FROM s3_test").splitlines() == [
             "40499995500000"
         ]
+    check_no_objects_after_drop(cluster)
 
 
 @pytest.mark.parametrize("node_name", ["node"])
 def test_lazy_seek_optimization_for_async_read(cluster, node_name):
     node = cluster.instances[node_name]
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    node.query("DROP TABLE IF EXISTS s3_test SYNC")
     node.query(
         "CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3';"
     )
+    node.query("SYSTEM STOP MERGES s3_test")
     node.query(
         "INSERT INTO s3_test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000000"
     )
     node.query("SELECT * FROM s3_test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10")
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
-    minio = cluster.minio_client
-    for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)):
-        minio.remove_object(cluster.minio_bucket, obj.object_name)
+
+    check_no_objects_after_drop(cluster)
 
 
 @pytest.mark.parametrize("node_name", ["node_with_limited_disk"])
 def test_cache_with_full_disk_space(cluster, node_name):
     node = cluster.instances[node_name]
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    node.query("DROP TABLE IF EXISTS s3_test SYNC")
     node.query(
         "CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY value SETTINGS storage_policy='s3_with_cache_and_jbod';"
     )
+    node.query("SYSTEM STOP MERGES s3_test")
     node.query(
         "INSERT INTO s3_test SELECT number, toString(number) FROM numbers(100000000)"
     )
@@ -699,7 +721,7 @@ def test_cache_with_full_disk_space(cluster, node_name):
     assert node.contains_in_log(
         "Insert into cache is skipped due to insufficient disk space"
     )
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    check_no_objects_after_drop(cluster, node_name=node_name)
 
 
 @pytest.mark.parametrize("node_name", ["node"])
@@ -724,13 +746,14 @@ def test_store_cleanup_disk_s3(cluster, node_name):
         "CREATE TABLE s3_test UUID '00000000-1000-4000-8000-000000000001' (n UInt64) Engine=MergeTree() ORDER BY n SETTINGS storage_policy='s3';"
     )
     node.query("INSERT INTO s3_test SELECT 1")
+    check_no_objects_after_drop(cluster)
 
 
 @pytest.mark.parametrize("node_name", ["node"])
 def test_cache_setting_compatibility(cluster, node_name):
     node = cluster.instances[node_name]
 
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    node.query("DROP TABLE IF EXISTS s3_test SYNC")
 
     node.query(
         "CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_r', compress_marks=false, compress_primary_key=false;"
@@ -800,3 +823,5 @@ def test_cache_setting_compatibility(cluster, node_name):
     node.query("SELECT * FROM s3_test FORMAT Null")
 
     assert not node.contains_in_log("No such file or directory: Cache info:")
+
+    check_no_objects_after_drop(cluster)
diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
index 976933b2d21..74af657c783 100644
--- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml
@@ -11,6 +11,7 @@
                 <skip_access_check>true</skip_access_check>
                 <!-- Avoid extra retries to speed up tests -->
                 <retry_attempts>0</retry_attempts>
+                <connect_timeout_ms>20000</connect_timeout_ms>
             </s3>
             <s3_retryable>
                 <type>s3</type>
@@ -20,6 +21,7 @@
                 <secret_access_key>minio123</secret_access_key>
                 <!-- ClickHouse starts earlier than custom S3 endpoint. Skip access check to avoid fail on start-up -->
                 <skip_access_check>true</skip_access_check>
+                <connect_timeout_ms>20000</connect_timeout_ms>
             </s3_retryable>
             <s3_no_retries>
                 <type>s3</type>
@@ -32,6 +34,7 @@
                 <!-- Avoid extra retries to speed up tests -->
                 <s3_retry_attempts>1</s3_retry_attempts>
                 <s3_max_single_read_retries>1</s3_max_single_read_retries>
+                <connect_timeout_ms>20000</connect_timeout_ms>
             </s3_no_retries>
             <default/>
         </disks>
diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py
index 3cc2b17dce2..05aeeff2ec1 100644
--- a/tests/integration/test_merge_tree_s3_failover/test.py
+++ b/tests/integration/test_merge_tree_s3_failover/test.py
@@ -85,7 +85,7 @@ def cluster():
 def drop_table(cluster):
     yield
     node = cluster.instances["node"]
-    node.query("DROP TABLE IF EXISTS s3_failover_test NO DELAY")
+    node.query("DROP TABLE IF EXISTS s3_failover_test SYNC")
 
 
 # S3 request will be failed for an appropriate part file write.
diff --git a/tests/integration/test_merge_tree_s3_with_cache/test.py b/tests/integration/test_merge_tree_s3_with_cache/test.py
index 89b5a400b1b..067ed4f9679 100644
--- a/tests/integration/test_merge_tree_s3_with_cache/test.py
+++ b/tests/integration/test_merge_tree_s3_with_cache/test.py
@@ -77,7 +77,7 @@ def test_write_is_cached(cluster, min_rows_for_wide_part, read_requests):
     # stat = get_query_stat(node, select_query)
     # assert stat["S3ReadRequestsCount"] == read_requests  # Only .bin files should be accessed from S3.
 
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    node.query("DROP TABLE IF EXISTS s3_test SYNC")
 
 
 @pytest.mark.parametrize(
@@ -126,4 +126,4 @@ def test_read_after_cache_is_wiped(
     # stat = get_query_stat(node, select_query)
     # assert stat["S3ReadRequestsCount"] == bin_files
 
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    node.query("DROP TABLE IF EXISTS s3_test SYNC")
diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py
index 5a972b58f99..93f03f4420e 100644
--- a/tests/integration/test_partition/test.py
+++ b/tests/integration/test_partition/test.py
@@ -70,7 +70,7 @@ def partition_complex_assert_columns_txt():
         )
 
 
-def partition_complex_assert_checksums():
+def partition_complex_assert_checksums(after_detach=False):
     # Do not check increment.txt - it can be changed by other tests with FREEZE
     cmd = [
         "bash",
@@ -80,36 +80,67 @@ def partition_complex_assert_checksums():
         " | sed 's shadow/[0-9]*/data/[a-z0-9_-]*/ shadow/1/data/test/ g' | sort | uniq",
     ]
 
-    checksums = (
-        "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.bin\n"
-        "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.bin\n"
-        "13cae8e658e0ca4f75c56b1fc424e150\tshadow/1/data/test/partition_complex/19700102_2_2_0/minmax_p.idx\n"
-        "25daad3d9e60b45043a70c4ab7d3b1c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/partition.dat\n"
-        "3726312af62aec86b64a7708d5751787\tshadow/1/data/test/partition_complex/19700201_1_1_0/partition.dat\n"
-        "37855b06a39b79a67ea4e86e4a3299aa\tshadow/1/data/test/partition_complex/19700102_2_2_0/checksums.txt\n"
-        "38e62ff37e1e5064e9a3f605dfe09d13\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.bin\n"
-        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.mrk\n"
-        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.mrk\n"
-        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.mrk\n"
-        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.mrk\n"
-        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.mrk\n"
-        "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.mrk\n"
-        "55a54008ad1ba589aa210d2629c1df41\tshadow/1/data/test/partition_complex/19700201_1_1_0/primary.idx\n"
-        "5f087cb3e7071bf9407e095821e2af8f\tshadow/1/data/test/partition_complex/19700201_1_1_0/checksums.txt\n"
-        "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700102_2_2_0/columns.txt\n"
-        "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700201_1_1_0/columns.txt\n"
-        "88cdc31ded355e7572d68d8cde525d3a\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.bin\n"
-        "9e688c58a5487b8eaf69c9e1005ad0bf\tshadow/1/data/test/partition_complex/19700102_2_2_0/primary.idx\n"
-        "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700102_2_2_0/default_compression_codec.txt\n"
-        "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700201_1_1_0/default_compression_codec.txt\n"
-        "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700102_2_2_0/count.txt\n"
-        "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700201_1_1_0/count.txt\n"
-        "cfcb770c3ecd0990dcceb1bde129e6c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.bin\n"
-        "cfcd208495d565ef66e7dff9f98764da\tshadow/1/data/test/partition_complex/19700102_2_2_0/metadata_version.txt\n"
-        "cfcd208495d565ef66e7dff9f98764da\tshadow/1/data/test/partition_complex/19700201_1_1_0/metadata_version.txt\n"
-        "e2af3bef1fd129aea73a890ede1e7a30\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.bin\n"
-        "f2312862cc01adf34a93151377be2ddf\tshadow/1/data/test/partition_complex/19700201_1_1_0/minmax_p.idx\n"
-    )
+    # no metadata version
+    if after_detach:
+        checksums = (
+            "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.bin\n"
+            "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.bin\n"
+            "13cae8e658e0ca4f75c56b1fc424e150\tshadow/1/data/test/partition_complex/19700102_2_2_0/minmax_p.idx\n"
+            "25daad3d9e60b45043a70c4ab7d3b1c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/partition.dat\n"
+            "3726312af62aec86b64a7708d5751787\tshadow/1/data/test/partition_complex/19700201_1_1_0/partition.dat\n"
+            "37855b06a39b79a67ea4e86e4a3299aa\tshadow/1/data/test/partition_complex/19700102_2_2_0/checksums.txt\n"
+            "38e62ff37e1e5064e9a3f605dfe09d13\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.bin\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.mrk\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.mrk\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.mrk\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.mrk\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.mrk\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.mrk\n"
+            "55a54008ad1ba589aa210d2629c1df41\tshadow/1/data/test/partition_complex/19700201_1_1_0/primary.idx\n"
+            "5f087cb3e7071bf9407e095821e2af8f\tshadow/1/data/test/partition_complex/19700201_1_1_0/checksums.txt\n"
+            "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700102_2_2_0/columns.txt\n"
+            "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700201_1_1_0/columns.txt\n"
+            "88cdc31ded355e7572d68d8cde525d3a\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.bin\n"
+            "9e688c58a5487b8eaf69c9e1005ad0bf\tshadow/1/data/test/partition_complex/19700102_2_2_0/primary.idx\n"
+            "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700102_2_2_0/default_compression_codec.txt\n"
+            "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700201_1_1_0/default_compression_codec.txt\n"
+            "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700102_2_2_0/count.txt\n"
+            "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700201_1_1_0/count.txt\n"
+            "cfcb770c3ecd0990dcceb1bde129e6c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.bin\n"
+            "e2af3bef1fd129aea73a890ede1e7a30\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.bin\n"
+            "f2312862cc01adf34a93151377be2ddf\tshadow/1/data/test/partition_complex/19700201_1_1_0/minmax_p.idx\n"
+        )
+    else:
+        checksums = (
+            "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.bin\n"
+            "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.bin\n"
+            "13cae8e658e0ca4f75c56b1fc424e150\tshadow/1/data/test/partition_complex/19700102_2_2_0/minmax_p.idx\n"
+            "25daad3d9e60b45043a70c4ab7d3b1c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/partition.dat\n"
+            "3726312af62aec86b64a7708d5751787\tshadow/1/data/test/partition_complex/19700201_1_1_0/partition.dat\n"
+            "37855b06a39b79a67ea4e86e4a3299aa\tshadow/1/data/test/partition_complex/19700102_2_2_0/checksums.txt\n"
+            "38e62ff37e1e5064e9a3f605dfe09d13\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.bin\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.mrk\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.mrk\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.mrk\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.mrk\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.mrk\n"
+            "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.mrk\n"
+            "55a54008ad1ba589aa210d2629c1df41\tshadow/1/data/test/partition_complex/19700201_1_1_0/primary.idx\n"
+            "5f087cb3e7071bf9407e095821e2af8f\tshadow/1/data/test/partition_complex/19700201_1_1_0/checksums.txt\n"
+            "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700102_2_2_0/columns.txt\n"
+            "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700201_1_1_0/columns.txt\n"
+            "88cdc31ded355e7572d68d8cde525d3a\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.bin\n"
+            "9e688c58a5487b8eaf69c9e1005ad0bf\tshadow/1/data/test/partition_complex/19700102_2_2_0/primary.idx\n"
+            "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700102_2_2_0/default_compression_codec.txt\n"
+            "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700201_1_1_0/default_compression_codec.txt\n"
+            "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700102_2_2_0/count.txt\n"
+            "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700201_1_1_0/count.txt\n"
+            "cfcb770c3ecd0990dcceb1bde129e6c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.bin\n"
+            "cfcd208495d565ef66e7dff9f98764da\tshadow/1/data/test/partition_complex/19700102_2_2_0/metadata_version.txt\n"
+            "cfcd208495d565ef66e7dff9f98764da\tshadow/1/data/test/partition_complex/19700201_1_1_0/metadata_version.txt\n"
+            "e2af3bef1fd129aea73a890ede1e7a30\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.bin\n"
+            "f2312862cc01adf34a93151377be2ddf\tshadow/1/data/test/partition_complex/19700201_1_1_0/minmax_p.idx\n"
+        )
 
     assert TSV(instance.exec_in_container(cmd).replace("  ", "\t")) == TSV(checksums)
 
@@ -134,7 +165,7 @@ def test_partition_complex(partition_table_complex):
 
     q("ALTER TABLE test.partition_complex FREEZE")
 
-    partition_complex_assert_checksums()
+    partition_complex_assert_checksums(True)
 
     q("ALTER TABLE test.partition_complex DETACH PARTITION 197001")
     q("ALTER TABLE test.partition_complex ATTACH PARTITION 197001")
@@ -144,7 +175,7 @@ def test_partition_complex(partition_table_complex):
     q("ALTER TABLE test.partition_complex MODIFY COLUMN v1 Int8")
 
     # Check the backup hasn't changed
-    partition_complex_assert_checksums()
+    partition_complex_assert_checksums(True)
 
     q("OPTIMIZE TABLE test.partition_complex")
 
diff --git a/tests/integration/test_password_constraints/configs/default_password_type.xml b/tests/integration/test_password_constraints/configs/default_password_type.xml
new file mode 100644
index 00000000000..4b23ea31df0
--- /dev/null
+++ b/tests/integration/test_password_constraints/configs/default_password_type.xml
@@ -0,0 +1,3 @@
+<clickhouse>
+    <default_password_type>double_sha1_password</default_password_type>
+</clickhouse>
diff --git a/tests/integration/test_password_constraints/test.py b/tests/integration/test_password_constraints/test.py
index 9cdff51caa1..94e10ed5f9e 100644
--- a/tests/integration/test_password_constraints/test.py
+++ b/tests/integration/test_password_constraints/test.py
@@ -5,6 +5,9 @@ from helpers.cluster import ClickHouseCluster
 cluster = ClickHouseCluster(__file__)
 
 node = cluster.add_instance("node", main_configs=["configs/complexity_rules.xml"])
+node2 = cluster.add_instance(
+    "node2", main_configs=["configs/default_password_type.xml"]
+)
 
 
 @pytest.fixture(scope="module")
@@ -39,3 +42,10 @@ def test_complexity_rules(start_cluster):
 
     node.query("CREATE USER u_5 IDENTIFIED WITH plaintext_password BY 'aA!000000000'")
     node.query("DROP USER u_5")
+
+
+def test_default_password_type(start_cluster):
+    node2.query("CREATE USER u1 IDENTIFIED BY 'pwd'")
+
+    required_type = "double_sha1_password"
+    assert required_type in node2.query("SHOW CREATE USER u1")
diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py
index de6c9ad2cf9..63e85afb1d4 100644
--- a/tests/integration/test_postgresql_database_engine/test.py
+++ b/tests/integration/test_postgresql_database_engine/test.py
@@ -327,6 +327,17 @@ def test_predefined_connection_configuration(started_cluster):
         node1.query(f"SELECT count() FROM postgres_database.test_table").rstrip()
         == "100"
     )
+    node1.query(
+        """
+        DROP DATABASE postgres_database;
+        CREATE DATABASE postgres_database ENGINE = PostgreSQL(postgres1, use_tables_cache=1);
+        """
+    )
+    assert (
+        node1.query(f"SELECT count() FROM postgres_database.test_table").rstrip()
+        == "100"
+    )
+    assert node1.contains_in_log("Cached table `test_table`")
 
     node1.query("DROP DATABASE postgres_database")
     cursor.execute(f"DROP TABLE test_table ")
diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py
index 1ba278b357e..90d19e9532c 100644
--- a/tests/integration/test_postgresql_replica_database_engine_2/test.py
+++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py
@@ -624,7 +624,7 @@ def test_table_override(started_cluster):
     time.sleep(5)
     query = f"select * from {materialized_database}.{table_name} order by key"
     expected = instance.query(f"select * from {table_name} order by key")
-    instance.query(f"drop table {table_name} no delay")
+    instance.query(f"drop table {table_name} sync")
     assert_eq_with_retry(instance, query, expected)
 
 
diff --git a/tests/integration/test_rename_column/test.py b/tests/integration/test_rename_column/test.py
index 33343da8f6d..6bab0a28259 100644
--- a/tests/integration/test_rename_column/test.py
+++ b/tests/integration/test_rename_column/test.py
@@ -40,7 +40,7 @@ def started_cluster():
 
 def drop_table(nodes, table_name):
     for node in nodes:
-        node.query("DROP TABLE IF EXISTS {} NO DELAY".format(table_name))
+        node.query("DROP TABLE IF EXISTS {} SYNC".format(table_name))
 
 
 def create_table(
diff --git a/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py b/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py
index aea41fc0684..05d7bbb7282 100644
--- a/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py
+++ b/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py
@@ -42,7 +42,7 @@ def copy_keys(instance, keys_file_name):
 
 
 def create_table():
-    node1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' SYNC")
     node1.query(
         """
         CREATE TABLE tbl ON CLUSTER 'cluster' (
diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/test.py b/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
index ffe9c056f1e..a50f8341ee7 100644
--- a/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
+++ b/tests/integration/test_replicated_merge_tree_encryption_codec/test.py
@@ -40,7 +40,7 @@ def copy_keys(instance, keys_file_name):
 
 
 def create_table():
-    node1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY")
+    node1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' SYNC")
     node1.query(
         """
         CREATE TABLE tbl ON CLUSTER 'cluster' (
diff --git a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py
index 1f81421f93c..bd1c890950a 100644
--- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py
+++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/test.py
@@ -111,8 +111,8 @@ def test_hdfs_zero_copy_replication_insert(cluster):
             SHARDS * FILES_OVERHEAD_PER_TABLE + FILES_OVERHEAD_PER_PART_COMPACT,
         )
     finally:
-        node1.query("DROP TABLE IF EXISTS hdfs_test NO DELAY")
-        node2.query("DROP TABLE IF EXISTS hdfs_test NO DELAY")
+        node1.query("DROP TABLE IF EXISTS hdfs_test SYNC")
+        node2.query("DROP TABLE IF EXISTS hdfs_test SYNC")
 
 
 @pytest.mark.parametrize(
@@ -173,7 +173,7 @@ def test_hdfs_zero_copy_replication_single_move(cluster, storage_policy, init_ob
             == "(10),(11)"
         )
     finally:
-        node1.query("DROP TABLE IF EXISTS single_node_move_test NO DELAY")
+        node1.query("DROP TABLE IF EXISTS single_node_move_test SYNC")
 
 
 @pytest.mark.parametrize(
@@ -244,8 +244,8 @@ def test_hdfs_zero_copy_replication_move(cluster, storage_policy, init_objects):
             cluster, "/clickhouse1", init_objects + FILES_OVERHEAD_PER_PART_COMPACT
         )
     finally:
-        node1.query("DROP TABLE IF EXISTS move_test NO DELAY")
-        node2.query("DROP TABLE IF EXISTS move_test NO DELAY")
+        node1.query("DROP TABLE IF EXISTS move_test SYNC")
+        node2.query("DROP TABLE IF EXISTS move_test SYNC")
 
 
 @pytest.mark.parametrize(("storage_policy"), ["hybrid", "tiered", "tiered_copy"])
@@ -282,8 +282,8 @@ def test_hdfs_zero_copy_with_ttl_move(cluster, storage_policy):
             == "(10),(11)"
         )
     finally:
-        node1.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")
-        node2.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")
+        node1.query("DROP TABLE IF EXISTS ttl_move_test SYNC")
+        node2.query("DROP TABLE IF EXISTS ttl_move_test SYNC")
 
 
 def test_hdfs_zero_copy_with_ttl_delete(cluster):
@@ -318,5 +318,5 @@ def test_hdfs_zero_copy_with_ttl_delete(cluster):
             == "(11)"
         )
     finally:
-        node1.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")
-        node2.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")
+        node1.query("DROP TABLE IF EXISTS ttl_delete_test SYNC")
+        node2.query("DROP TABLE IF EXISTS ttl_delete_test SYNC")
diff --git a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py
index cf76d47157a..5a514be58dd 100644
--- a/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py
+++ b/tests/integration/test_replicated_merge_tree_with_auxiliary_zookeepers/test.py
@@ -37,7 +37,7 @@ def started_cluster():
 
 def drop_table(nodes, table_name):
     for node in nodes:
-        node.query("DROP TABLE IF EXISTS {} NO DELAY".format(table_name))
+        node.query("DROP TABLE IF EXISTS {} SYNC".format(table_name))
 
 
 # Create table with default zookeeper.
diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py
index 241b90cac3f..237a81da0f5 100644
--- a/tests/integration/test_s3_cluster/test.py
+++ b/tests/integration/test_s3_cluster/test.py
@@ -247,9 +247,10 @@ def test_skip_unavailable_shards(started_cluster):
     assert result == "10\n"
 
 
-def test_unskip_unavailable_shards(started_cluster):
+def test_unset_skip_unavailable_shards(started_cluster):
+    # Although skip_unavailable_shards is not set, cluster table functions should always skip unavailable shards.
     node = started_cluster.instances["s0_0_0"]
-    error = node.query_and_get_error(
+    result = node.query(
         """
     SELECT count(*) from s3Cluster(
         'cluster_non_existent_port',
@@ -258,7 +259,7 @@ def test_unskip_unavailable_shards(started_cluster):
     """
     )
 
-    assert "NETWORK_ERROR" in error
+    assert result == "10\n"
 
 
 def test_distributed_insert_select_with_replicated(started_cluster):
diff --git a/tests/integration/test_s3_with_https/test.py b/tests/integration/test_s3_with_https/test.py
index 46e281251a0..6db5b7da930 100644
--- a/tests/integration/test_s3_with_https/test.py
+++ b/tests/integration/test_s3_with_https/test.py
@@ -56,7 +56,7 @@ def test_s3_with_https(cluster, policy):
         == "(0,'data'),(1,'data')"
     )
 
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    node.query("DROP TABLE IF EXISTS s3_test SYNC")
 
     if policy.find("proxy") != -1:
         check_proxy_logs(cluster, "proxy1")
diff --git a/tests/integration/test_s3_with_proxy/test.py b/tests/integration/test_s3_with_proxy/test.py
index 1af040c3c30..e5624d4e056 100644
--- a/tests/integration/test_s3_with_proxy/test.py
+++ b/tests/integration/test_s3_with_proxy/test.py
@@ -72,7 +72,7 @@ def test_s3_with_proxy_list(cluster, policy):
         == "(0,'data'),(1,'data')"
     )
 
-    node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    node.query("DROP TABLE IF EXISTS s3_test SYNC")
 
     for proxy in ["proxy1", "proxy2"]:
         check_proxy_logs(cluster, proxy, ["PUT", "GET"])
diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py
index 1c559312105..100f062de2f 100644
--- a/tests/integration/test_s3_zero_copy_replication/test.py
+++ b/tests/integration/test_s3_zero_copy_replication/test.py
@@ -149,8 +149,8 @@ def test_s3_zero_copy_replication(started_cluster, policy):
     # Based on version 21.x - after cleanup - only one merged part
     wait_for_large_objects_count(cluster, 1, timeout=60)
 
-    node1.query("DROP TABLE IF EXISTS s3_test NO DELAY")
-    node2.query("DROP TABLE IF EXISTS s3_test NO DELAY")
+    node1.query("DROP TABLE IF EXISTS s3_test SYNC")
+    node2.query("DROP TABLE IF EXISTS s3_test SYNC")
 
 
 @pytest.mark.skip(reason="Test is flaky (and never was stable)")
@@ -239,8 +239,8 @@ def test_s3_zero_copy_on_hybrid_storage(started_cluster):
         == "(0,'data'),(1,'data')"
     )
 
-    node1.query("DROP TABLE IF EXISTS hybrid_test NO DELAY")
-    node2.query("DROP TABLE IF EXISTS hybrid_test NO DELAY")
+    node1.query("DROP TABLE IF EXISTS hybrid_test SYNC")
+    node2.query("DROP TABLE IF EXISTS hybrid_test SYNC")
 
 
 def insert_data_time(node, table, number_of_mb, time, start=0):
@@ -275,8 +275,8 @@ def test_s3_zero_copy_with_ttl_move(
     node1 = cluster.instances["node1"]
     node2 = cluster.instances["node2"]
 
-    node1.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")
-    node2.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")
+    node1.query("DROP TABLE IF EXISTS ttl_move_test SYNC")
+    node2.query("DROP TABLE IF EXISTS ttl_move_test SYNC")
 
     for i in range(iterations):
         node1.query(
@@ -325,8 +325,8 @@ def test_s3_zero_copy_with_ttl_move(
                 == "(10),(11)"
             )
 
-        node1.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")
-        node2.query("DROP TABLE IF EXISTS ttl_move_test NO DELAY")
+        node1.query("DROP TABLE IF EXISTS ttl_move_test SYNC")
+        node2.query("DROP TABLE IF EXISTS ttl_move_test SYNC")
 
 
 @pytest.mark.parametrize(
@@ -340,8 +340,8 @@ def test_s3_zero_copy_with_ttl_delete(started_cluster, large_data, iterations):
     node1 = cluster.instances["node1"]
     node2 = cluster.instances["node2"]
 
-    node1.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")
-    node2.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")
+    node1.query("DROP TABLE IF EXISTS ttl_delete_test SYNC")
+    node2.query("DROP TABLE IF EXISTS ttl_delete_test SYNC")
 
     for i in range(iterations):
         node1.query(
@@ -398,8 +398,8 @@ def test_s3_zero_copy_with_ttl_delete(started_cluster, large_data, iterations):
                 == "(11)"
             )
 
-        node1.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")
-        node2.query("DROP TABLE IF EXISTS ttl_delete_test NO DELAY")
+        node1.query("DROP TABLE IF EXISTS ttl_delete_test SYNC")
+        node2.query("DROP TABLE IF EXISTS ttl_delete_test SYNC")
 
 
 def wait_mutations(node, table, seconds):
@@ -438,8 +438,8 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template):
     node1 = cluster.instances["node1"]
     node2 = cluster.instances["node2"]
 
-    node1.query("DROP TABLE IF EXISTS unfreeze_test NO DELAY")
-    node2.query("DROP TABLE IF EXISTS unfreeze_test NO DELAY")
+    node1.query("DROP TABLE IF EXISTS unfreeze_test SYNC")
+    node2.query("DROP TABLE IF EXISTS unfreeze_test SYNC")
 
     node1.query(
         """
@@ -489,8 +489,8 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template):
 
     check_objects_not_exisis(cluster, objects12)
 
-    node1.query("DROP TABLE IF EXISTS unfreeze_test NO DELAY")
-    node2.query("DROP TABLE IF EXISTS unfreeze_test NO DELAY")
+    node1.query("DROP TABLE IF EXISTS unfreeze_test SYNC")
+    node2.query("DROP TABLE IF EXISTS unfreeze_test SYNC")
 
 
 def test_s3_zero_copy_unfreeze_alter(started_cluster):
@@ -505,8 +505,8 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template):
     node1 = cluster.instances["node1"]
     node2 = cluster.instances["node2"]
 
-    node1.query("DROP TABLE IF EXISTS drop_detached_test NO DELAY")
-    node2.query("DROP TABLE IF EXISTS drop_detached_test NO DELAY")
+    node1.query("DROP TABLE IF EXISTS drop_detached_test SYNC")
+    node2.query("DROP TABLE IF EXISTS drop_detached_test SYNC")
 
     node1.query(
         """
@@ -600,8 +600,8 @@ def test_s3_zero_copy_concurrent_merge(started_cluster):
     node1 = cluster.instances["node1"]
     node2 = cluster.instances["node2"]
 
-    node1.query("DROP TABLE IF EXISTS concurrent_merge NO DELAY")
-    node2.query("DROP TABLE IF EXISTS concurrent_merge NO DELAY")
+    node1.query("DROP TABLE IF EXISTS concurrent_merge SYNC")
+    node2.query("DROP TABLE IF EXISTS concurrent_merge SYNC")
 
     for node in (node1, node2):
         node.query(
@@ -647,8 +647,8 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster):
     node1 = cluster.instances["node1"]
     node2 = cluster.instances["node2"]
 
-    node1.query("DROP TABLE IF EXISTS zero_copy_mutation NO DELAY")
-    node2.query("DROP TABLE IF EXISTS zero_copy_mutation NO DELAY")
+    node1.query("DROP TABLE IF EXISTS zero_copy_mutation SYNC")
+    node2.query("DROP TABLE IF EXISTS zero_copy_mutation SYNC")
 
     node1.query(
         """
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index d4752d6cf2e..edf5344e887 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -788,6 +788,7 @@ def test_schema_inference_cache(started_cluster):
 
 
 def test_hdfsCluster_skip_unavailable_shards(started_cluster):
+    # Although skip_unavailable_shards is not set, cluster table functions should always skip unavailable shards.
     hdfs_api = started_cluster.hdfs_api
     node = started_cluster.instances["node1"]
     data = "1\tSerialize\t555.222\n2\tData\t777.333\n"
@@ -801,16 +802,18 @@ def test_hdfsCluster_skip_unavailable_shards(started_cluster):
     )
 
 
-def test_hdfsCluster_unskip_unavailable_shards(started_cluster):
+def test_hdfsCluster_unset_skip_unavailable_shards(started_cluster):
     hdfs_api = started_cluster.hdfs_api
     node = started_cluster.instances["node1"]
     data = "1\tSerialize\t555.222\n2\tData\t777.333\n"
     hdfs_api.write_data("/unskip_unavailable_shards", data)
-    error = node.query_and_get_error(
-        "select * from hdfsCluster('cluster_non_existent_port', 'hdfs://hdfs1:9000/unskip_unavailable_shards', 'TSV', 'id UInt64, text String, number Float64')"
-    )
 
-    assert "NETWORK_ERROR" in error
+    assert (
+        node1.query(
+            "select * from hdfsCluster('cluster_non_existent_port', 'hdfs://hdfs1:9000/skip_unavailable_shards', 'TSV', 'id UInt64, text String, number Float64')"
+        )
+        == data
+    )
 
 
 if __name__ == "__main__":
diff --git a/tests/integration/test_storage_nats/test.py b/tests/integration/test_storage_nats/test.py
index 2988c67bf63..1d7e046864b 100644
--- a/tests/integration/test_storage_nats/test.py
+++ b/tests/integration/test_storage_nats/test.py
@@ -94,7 +94,7 @@ def nats_cluster():
 def nats_setup_teardown():
     print("NATS is available - running test")
     yield  # run test
-    instance.query("DROP DATABASE test NO DELAY")
+    instance.query("DROP DATABASE test SYNC")
     instance.query("CREATE DATABASE test")
 
 
diff --git a/tests/integration/test_storage_postgresql_replica/test.py b/tests/integration/test_storage_postgresql_replica/test.py
index 8666d7ae58c..61a9dd5687b 100644
--- a/tests/integration/test_storage_postgresql_replica/test.py
+++ b/tests/integration/test_storage_postgresql_replica/test.py
@@ -179,7 +179,7 @@ def test_initial_load_from_snapshot(started_cluster):
 
     cursor.execute("DROP TABLE postgresql_replica;")
     postgresql_replica_check_result(result, True)
-    instance.query(f"DROP TABLE test.postgresql_replica NO DELAY")
+    instance.query(f"DROP TABLE test.postgresql_replica SYNC")
 
 
 @pytest.mark.timeout(320)
@@ -216,7 +216,7 @@ def test_no_connection_at_startup(started_cluster):
     result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;")
     cursor.execute("DROP TABLE postgresql_replica;")
     postgresql_replica_check_result(result, True)
-    instance.query(f"DROP TABLE test.postgresql_replica NO DELAY")
+    instance.query(f"DROP TABLE test.postgresql_replica SYNC")
 
 
 @pytest.mark.timeout(320)
@@ -255,7 +255,7 @@ def test_detach_attach_is_ok(started_cluster):
 
     cursor.execute("DROP TABLE postgresql_replica;")
     postgresql_replica_check_result(result, True)
-    instance.query(f"DROP TABLE test.postgresql_replica NO DELAY")
+    instance.query(f"DROP TABLE test.postgresql_replica SYNC")
 
 
 @pytest.mark.timeout(320)
@@ -309,7 +309,7 @@ def test_replicating_insert_queries(started_cluster):
     result = instance.query("SELECT * FROM test.postgresql_replica ORDER BY key;")
     cursor.execute("DROP TABLE postgresql_replica;")
     postgresql_replica_check_result(result, True)
-    instance.query(f"DROP TABLE test.postgresql_replica NO DELAY")
+    instance.query(f"DROP TABLE test.postgresql_replica SYNC")
 
 
 @pytest.mark.timeout(320)
@@ -667,7 +667,7 @@ def test_virtual_columns(started_cluster):
     )
     print(result)
     cursor.execute("DROP TABLE postgresql_replica;")
-    instance.query(f"DROP TABLE test.postgresql_replica NO DELAY")
+    instance.query(f"DROP TABLE test.postgresql_replica SYNC")
 
 
 def test_abrupt_connection_loss_while_heavy_replication(started_cluster):
@@ -702,7 +702,7 @@ def test_abrupt_connection_loss_while_heavy_replication(started_cluster):
 
     result = instance.query("SELECT count() FROM test.postgresql_replica")
     print(result)  # Just debug
-    instance.query(f"DROP TABLE test.postgresql_replica NO DELAY")
+    instance.query(f"DROP TABLE test.postgresql_replica SYNC")
 
 
 def test_abrupt_server_restart_while_heavy_replication(started_cluster):
@@ -720,7 +720,7 @@ def test_abrupt_server_restart_while_heavy_replication(started_cluster):
     create_postgres_table(cursor, table_name)
 
     instance.query(f"INSERT INTO postgres_database.{table_name} SELECT -1, 1")
-    instance.query(f"DROP TABLE IF EXISTS test.{table_name} NO DELAY")
+    instance.query(f"DROP TABLE IF EXISTS test.{table_name} SYNC")
     create_materialized_table(
         ip=started_cluster.postgres_ip,
         port=started_cluster.postgres_port,
@@ -747,7 +747,7 @@ def test_abrupt_server_restart_while_heavy_replication(started_cluster):
 
     result = instance.query(f"SELECT count() FROM test.{table_name}")
     print(result)  # Just debug
-    instance.query(f"DROP TABLE test.{table_name} NO DELAY")
+    instance.query(f"DROP TABLE test.{table_name} SYNC")
 
 
 def test_drop_table_immediately(started_cluster):
@@ -771,7 +771,7 @@ def test_drop_table_immediately(started_cluster):
         ip=started_cluster.postgres_ip, port=started_cluster.postgres_port
     )
     check_tables_are_synchronized("postgresql_replica")
-    instance.query(f"DROP TABLE test.postgresql_replica NO DELAY")
+    instance.query(f"DROP TABLE test.postgresql_replica SYNC")
 
 
 if __name__ == "__main__":
diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 4ca49e32c55..4e1e28373e3 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -95,7 +95,7 @@ def rabbitmq_cluster():
 def rabbitmq_setup_teardown():
     print("RabbitMQ is available - running test")
     yield  # run test
-    instance.query("DROP DATABASE test NO DELAY")
+    instance.query("DROP DATABASE test SYNC")
     instance.query("CREATE DATABASE test")
 
 
@@ -1097,10 +1097,10 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster):
 
     instance.query(
         """
-        DROP TABLE test.consumer_overload NO DELAY;
-        DROP TABLE test.view_overload NO DELAY;
-        DROP TABLE test.rabbitmq_consume NO DELAY;
-        DROP TABLE test.rabbitmq_overload NO DELAY;
+        DROP TABLE test.consumer_overload SYNC;
+        DROP TABLE test.view_overload SYNC;
+        DROP TABLE test.rabbitmq_consume SYNC;
+        DROP TABLE test.rabbitmq_overload SYNC;
     """
     )
 
@@ -2745,7 +2745,7 @@ def test_rabbitmq_drop_mv(rabbitmq_cluster):
     result = instance.query("SELECT * FROM test.view ORDER BY key")
     rabbitmq_check_result(result, True)
 
-    instance.query("DROP VIEW test.consumer NO DELAY")
+    instance.query("DROP VIEW test.consumer SYNC")
     time.sleep(10)
     for i in range(50, 60):
         channel.basic_publish(
diff --git a/tests/integration/test_system_metrics/test.py b/tests/integration/test_system_metrics/test.py
index 439e8b66db1..8539828a8b8 100644
--- a/tests/integration/test_system_metrics/test.py
+++ b/tests/integration/test_system_metrics/test.py
@@ -157,3 +157,57 @@ def test_metrics_storage_buffer_size(start_cluster):
         )
         == "0\n"
     )
+
+
+def test_attach_without_zk_incr_readonly_metric(start_cluster):
+    assert (
+        node1.query("SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'")
+        == "0\n"
+    )
+
+    node1.query(
+        "ATTACH TABLE test.test_no_zk UUID 'a50b7933-59b2-49ce-8db6-59da3c9b4413' (i Int8, d Date) ENGINE = ReplicatedMergeTree('no_zk', 'replica') ORDER BY tuple()"
+    )
+    assert_eq_with_retry(
+        node1,
+        "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'",
+        "1\n",
+        retry_count=300,
+        sleep_time=1,
+    )
+
+    node1.query("DETACH TABLE test.test_no_zk")
+    assert_eq_with_retry(
+        node1,
+        "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'",
+        "0\n",
+        retry_count=300,
+        sleep_time=1,
+    )
+
+    node1.query("ATTACH TABLE test.test_no_zk")
+    assert_eq_with_retry(
+        node1,
+        "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'",
+        "1\n",
+        retry_count=300,
+        sleep_time=1,
+    )
+
+    node1.query("SYSTEM RESTORE REPLICA test.test_no_zk")
+    assert_eq_with_retry(
+        node1,
+        "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'",
+        "0\n",
+        retry_count=300,
+        sleep_time=1,
+    )
+
+    node1.query("DROP TABLE test.test_no_zk")
+    assert_eq_with_retry(
+        node1,
+        "SELECT value FROM system.metrics WHERE metric = 'ReadonlyReplica'",
+        "0\n",
+        retry_count=300,
+        sleep_time=1,
+    )
diff --git a/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml b/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml
index acf0f765c6c..b527c74e8de 100644
--- a/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml
+++ b/tests/integration/test_temporary_data_in_cache/configs/config.d/storage_configuration.xml
@@ -2,7 +2,7 @@
     <storage_configuration>
         <disks>
             <local_disk>
-                <type>local</type>
+                <type>local_blob_storage</type>
                 <path>/local_disk/</path>
             </local_disk>
 
diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py
index 89824293320..7635d784fef 100644
--- a/tests/integration/test_ttl_move/test.py
+++ b/tests/integration/test_ttl_move/test.py
@@ -151,7 +151,7 @@ def test_rule_with_invalid_destination(started_cluster, name, engine, alter):
                 get_command("TTL d1 TO DISK 'unknown'", "small_jbod_with_external")
             )
 
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
         if alter:
             node1.query(get_command(None, "small_jbod_with_external"))
@@ -161,7 +161,7 @@ def test_rule_with_invalid_destination(started_cluster, name, engine, alter):
                 get_command("TTL d1 TO VOLUME 'unknown'", "small_jbod_with_external")
             )
 
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
         if alter:
             node1.query(get_command(None, "only_jbod2"))
@@ -169,7 +169,7 @@ def test_rule_with_invalid_destination(started_cluster, name, engine, alter):
         with pytest.raises(QueryRuntimeException):
             node1.query(get_command("TTL d1 TO DISK 'jbod1'", "only_jbod2"))
 
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
         if alter:
             node1.query(get_command(None, "only_jbod2"))
@@ -178,7 +178,7 @@ def test_rule_with_invalid_destination(started_cluster, name, engine, alter):
             node1.query(get_command("TTL d1 TO VOLUME 'external'", "only_jbod2"))
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 @pytest.mark.parametrize(
@@ -253,7 +253,7 @@ def test_inserts_to_disk_work(started_cluster, name, engine, positive):
 
     finally:
         try:
-            node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+            node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
         except:
             pass
 
@@ -330,7 +330,7 @@ def test_moves_work_after_storage_policy_change(started_cluster, name, engine):
         )
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 @pytest.mark.parametrize(
@@ -418,7 +418,7 @@ def test_moves_to_disk_work(started_cluster, name, engine, positive):
         )
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 @pytest.mark.parametrize(
@@ -489,7 +489,7 @@ def test_moves_to_volume_work(started_cluster, name, engine):
         )
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 @pytest.mark.parametrize(
@@ -570,7 +570,7 @@ def test_inserts_to_volume_work(started_cluster, name, engine, positive):
         )
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 @pytest.mark.parametrize(
@@ -649,7 +649,7 @@ def test_moves_to_disk_eventually_work(started_cluster, name, engine):
         used_disks = get_used_disks_for_table(node1, name)
         assert set(used_disks) == {"jbod1"}
 
-        node1.query("DROP TABLE {} NO DELAY".format(name_temp))
+        node1.query("DROP TABLE {} SYNC".format(name_temp))
 
         wait_parts_mover(node1, name)
 
@@ -661,8 +661,8 @@ def test_moves_to_disk_eventually_work(started_cluster, name, engine):
         )
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name_temp))
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name_temp))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 def test_replicated_download_ttl_info(started_cluster):
@@ -702,7 +702,7 @@ def test_replicated_download_ttl_info(started_cluster):
     finally:
         for node in (node1, node2):
             try:
-                node.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+                node.query("DROP TABLE IF EXISTS {} SYNC".format(name))
             except:
                 continue
 
@@ -818,7 +818,7 @@ def test_merges_to_disk_work(started_cluster, name, engine, positive):
         )
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 @pytest.mark.parametrize(
@@ -932,8 +932,8 @@ def test_merges_with_full_disk_work(started_cluster, name, engine):
         )
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name_temp))
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name_temp))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 @pytest.mark.parametrize(
@@ -1035,7 +1035,7 @@ def test_moves_after_merges_work(started_cluster, name, engine, positive):
         )
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 @pytest.mark.parametrize(
@@ -1150,7 +1150,7 @@ def test_ttls_do_not_work_after_alter(started_cluster, name, engine, positive, b
         )
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 @pytest.mark.parametrize(
@@ -1255,7 +1255,7 @@ def test_materialize_ttl_in_partition(started_cluster, name, engine):
         ).strip() == str(len(data))
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+        node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
 
 
 @pytest.mark.parametrize(
@@ -1378,7 +1378,7 @@ def test_alter_multiple_ttls(started_cluster, name, engine, positive):
             assert rows_count == 3
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {name} NO DELAY".format(name=name))
+        node1.query("DROP TABLE IF EXISTS {name} SYNC".format(name=name))
 
 
 @pytest.mark.parametrize(
@@ -1526,7 +1526,7 @@ def test_concurrent_alter_with_ttl_move(started_cluster, name, engine):
         assert node1.query("SELECT 1") == "1\n"
         assert node1.query("SELECT COUNT() FROM {}".format(name)) == "150\n"
     finally:
-        node1.query("DROP TABLE IF EXISTS {name} NO DELAY".format(name=name))
+        node1.query("DROP TABLE IF EXISTS {name} SYNC".format(name=name))
 
 
 @pytest.mark.skip(reason="Flacky test")
@@ -1626,7 +1626,7 @@ def test_double_move_while_select(started_cluster, name, positive):
         ).splitlines() == ["1", "2", "3", "4"]
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {name} NO DELAY".format(name=name))
+        node1.query("DROP TABLE IF EXISTS {name} SYNC".format(name=name))
 
 
 @pytest.mark.parametrize(
@@ -1745,7 +1745,7 @@ def test_alter_with_merge_work(started_cluster, name, engine, positive):
             assert node1.query("SELECT count() FROM {name}".format(name=name)) == "6\n"
 
     finally:
-        node1.query("DROP TABLE IF EXISTS {name} NO DELAY".format(name=name))
+        node1.query("DROP TABLE IF EXISTS {name} SYNC".format(name=name))
 
 
 @pytest.mark.parametrize(
@@ -1826,7 +1826,7 @@ def test_disabled_ttl_move_on_insert(started_cluster, name, dest_type, engine):
 
     finally:
         try:
-            node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+            node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
         except:
             pass
 
@@ -1909,7 +1909,7 @@ def test_ttl_move_if_exists(started_cluster, name, dest_type):
 
     finally:
         try:
-            node1.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
-            node2.query("DROP TABLE IF EXISTS {} NO DELAY".format(name))
+            node1.query("DROP TABLE IF EXISTS {} SYNC".format(name))
+            node2.query("DROP TABLE IF EXISTS {} SYNC".format(name))
         except:
             pass
diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py
index 13fea0455d0..a3e7d6e4b8b 100644
--- a/tests/integration/test_ttl_replicated/test.py
+++ b/tests/integration/test_ttl_replicated/test.py
@@ -55,7 +55,7 @@ def started_cluster():
 
 def drop_table(nodes, table_name):
     for node in nodes:
-        node.query("DROP TABLE IF EXISTS {} NO DELAY".format(table_name))
+        node.query("DROP TABLE IF EXISTS {} SYNC".format(table_name))
 
 
 # Column TTL works only with wide parts, because it's very expensive to apply it for compact parts
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
index cd9da983785..11b660b54a3 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
@@ -2,7 +2,7 @@ runtime messages	0.001
 runtime exceptions	0.05
 messages shorter than 10	1
 messages shorter than 16	3
-exceptions shorter than 30	30
+exceptions shorter than 30	3
 noisy messages	0.3
 noisy Trace messages	0.16
 noisy Debug messages	0.09
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index 480effec065..7796785afb5 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -49,7 +49,14 @@ create temporary table known_short_messages (s String) as select * from (select
 'Column ''{}'' already exists', 'No macro {} in config', 'Invalid origin H3 index: {}',
 'Invalid session timeout: ''{}''', 'Tuple cannot be empty', 'Database name is empty',
 'Table {} is not a Dictionary', 'Expected function, got: {}', 'Unknown identifier: ''{}''',
-'Failed to {} input ''{}''', '{}.{} is not a VIEW', 'Cannot convert NULL to {}', 'Dictionary {} doesn''t exist'
+'Failed to {} input ''{}''', '{}.{} is not a VIEW', 'Cannot convert NULL to {}', 'Dictionary {} doesn''t exist',
+'Write file: {}', 'Unable to parse JSONPath', 'Host is empty in S3 URI.', 'Expected end of line',
+'inflate failed: {}{}', 'Center is not valid', 'Column ''{}'' is ambiguous', 'Cannot parse object', 'Invalid date: {}',
+'There is no cache by name: {}', 'No part {} in table', '`{}` should be a String', 'There are duplicate id {}',
+'Invalid replica name: {}', 'Unexpected value {} in enum', 'Unknown BSON type: {}', 'Point is not valid',
+'Invalid qualified name: {}', 'INTO OUTFILE is not allowed', 'Arguments must not be NaN', 'Cell is not valid',
+'brotli decode error{}', 'Invalid H3 index: {}', 'Too large node state size', 'No additional keys found.',
+'Attempt to read after EOF.', 'Replication was stopped', '{}	building file infos', 'Cannot parse uuid {}'
 ] as arr) array join arr;
 
 -- Check that we don't have too many short meaningless message patterns.
@@ -59,7 +66,7 @@ select 'messages shorter than 10', max2(countDistinctOrDefault(message_format_st
 select 'messages shorter than 16', max2(countDistinctOrDefault(message_format_string), 3) from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
 
 -- Same as above, but exceptions must be more informative. Feel free to update the threshold or remove this query if really necessary
-select 'exceptions shorter than 30', max2(countDistinctOrDefault(message_format_string), 30) from logs where length(message_format_string) < 30 and message ilike '%DB::Exception%' and message_format_string not in known_short_messages;
+select 'exceptions shorter than 30', max2(countDistinctOrDefault(message_format_string), 3) from logs where length(message_format_string) < 30 and message ilike '%DB::Exception%' and message_format_string not in known_short_messages;
 
 
 -- Avoid too noisy messages: top 1 message frequency must be less than 30%. We should reduce the threshold
@@ -98,7 +105,9 @@ select 'incorrect patterns', max2(countDistinct(message_format_string), 15) from
     where ((rand() % 8) = 0)
     and message not like (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') as s)
     and message not like (s || ' (skipped % similar messages)')
-    and message not like ('%Exception: '||s||'%') group by message_format_string
+    and message not like ('%Exception: '||s||'%')
+    and message not like ('%(skipped % similar messages)%')
+    group by message_format_string
 ) where any_message not like '%Poco::Exception%';
 
 drop table logs;
diff --git a/tests/queries/0_stateless/00155_long_merges.sh b/tests/queries/0_stateless/00155_long_merges.sh
index 83d89c57cfa..9ed0f2c6de1 100755
--- a/tests/queries/0_stateless/00155_long_merges.sh
+++ b/tests/queries/0_stateless/00155_long_merges.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long
+# Tags: long, no-debug
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/00189_time_zones_long.reference b/tests/queries/0_stateless/00189_time_zones_long.reference
index 8717a662771..d41c925bbe5 100644
--- a/tests/queries/0_stateless/00189_time_zones_long.reference
+++ b/tests/queries/0_stateless/00189_time_zones_long.reference
@@ -246,18 +246,18 @@ toUnixTimestamp
 1426415400
 1426415400
 date_trunc
-2019-01-01
-2020-01-01
-2020-01-01
-2019-10-01
-2020-01-01
-2020-01-01
-2019-12-01
-2020-01-01
-2020-01-01
-2019-12-30
-2019-12-30
-2019-12-30
+2019-01-01 00:00:00
+2020-01-01 00:00:00
+2020-01-01 00:00:00
+2019-10-01 00:00:00
+2020-01-01 00:00:00
+2020-01-01 00:00:00
+2019-12-01 00:00:00
+2020-01-01 00:00:00
+2020-01-01 00:00:00
+2019-12-30 00:00:00
+2019-12-30 00:00:00
+2019-12-30 00:00:00
 2019-12-31 00:00:00
 2020-01-01 00:00:00
 2020-01-02 00:00:00
@@ -270,18 +270,18 @@ date_trunc
 2019-12-31 20:11:22
 2020-01-01 12:11:22
 2020-01-02 05:11:22
-2019-01-01
-2020-01-01
-2020-01-01
-2019-10-01
-2020-01-01
-2020-01-01
-2019-12-01
-2020-01-01
-2020-01-01
-2019-12-30
-2019-12-30
-2019-12-30
+2019-01-01 00:00:00
+2020-01-01 00:00:00
+2020-01-01 00:00:00
+2019-10-01 00:00:00
+2020-01-01 00:00:00
+2020-01-01 00:00:00
+2019-12-01 00:00:00
+2020-01-01 00:00:00
+2020-01-01 00:00:00
+2019-12-30 00:00:00
+2019-12-30 00:00:00
+2019-12-30 00:00:00
 2019-12-31 00:00:00
 2020-01-01 00:00:00
 2020-01-02 00:00:00
@@ -294,8 +294,8 @@ date_trunc
 2019-12-31 20:11:22
 2020-01-01 12:11:22
 2020-01-02 05:11:22
-2020-01-01
-2020-01-01
-2020-01-01
-2019-12-30
+2020-01-01 00:00:00
+2020-01-01 00:00:00
+2020-01-01 00:00:00
+2019-12-30 00:00:00
 2020-01-01 00:00:00
diff --git a/tests/queries/0_stateless/00322_disable_checksumming.sh b/tests/queries/0_stateless/00322_disable_checksumming.sh
index c044a5c6650..e04ec076f80 100755
--- a/tests/queries/0_stateless/00322_disable_checksumming.sh
+++ b/tests/queries/0_stateless/00322_disable_checksumming.sh
@@ -4,10 +4,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-# use big-endian version of binary data for s390x
-if [[ $(uname -a | grep s390x) ]]; then
-echo -ne '\xdb\x8a\xe9\x59\xf2\x32\x74\x50\x39\xc4\x22\xfb\xa7\x4a\xc6\x37''\x82\x13\x00\x00\x00\x09\x00\x00\x00''\x90SELECT 1\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&decompress=1" --data-binary @-
-else
 echo -ne '\x50\x74\x32\xf2\x59\xe9\x8a\xdb\x37\xc6\x4a\xa7\xfb\x22\xc4\x39''\x82\x13\x00\x00\x00\x09\x00\x00\x00''\x90SELECT 1\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&decompress=1" --data-binary @-
-fi
 echo -ne 'xxxxxxxxxxxxxxxx''\x82\x13\x00\x00\x00\x09\x00\x00\x00''\x90SELECT 1\n' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&decompress=1&http_native_compression_disable_checksumming_on_decompress=1" --data-binary @-
diff --git a/tests/queries/0_stateless/00700_decimal_aggregates.reference b/tests/queries/0_stateless/00700_decimal_aggregates.reference
index acf41546f5c..79195312867 100644
--- a/tests/queries/0_stateless/00700_decimal_aggregates.reference
+++ b/tests/queries/0_stateless/00700_decimal_aggregates.reference
@@ -5,7 +5,7 @@
 -1275	-424.99999983	-255	-1275	-424.99999983	-255
 101	101	101	101	101	101
 -101	-101	-101	-101	-101	-101
-(101,101,101)	(101,101,101)	(101,101,101)	(101,101,101)	(102,100,101)
+(101,101,101)	(101,101,101)	(101,101,101)	(101,101,101)	(1,1,1,1,1,1)
 5	5	5
 10	10	10
 -50	-50	-16.66666666	-16.66666666	-10	-10
diff --git a/tests/queries/0_stateless/00700_decimal_aggregates.sql b/tests/queries/0_stateless/00700_decimal_aggregates.sql
index a1814fc866f..6ca37e06918 100644
--- a/tests/queries/0_stateless/00700_decimal_aggregates.sql
+++ b/tests/queries/0_stateless/00700_decimal_aggregates.sql
@@ -24,7 +24,7 @@ SELECT (uniq(a), uniq(b), uniq(c)),
     (uniqCombined(a), uniqCombined(b), uniqCombined(c)),
     (uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)),
     (uniqExact(a), uniqExact(b), uniqExact(c)),
-    (uniqHLL12(a), uniqHLL12(b), uniqHLL12(c))
+    (102 - uniqHLL12(a) >= 0, 102 - uniqHLL12(b) >= 0, 102 - uniqHLL12(c) >= 0, uniqHLL12(a) - 99 >= 0, uniqHLL12(b) - 99 >= 0, uniqHLL12(c) - 99 >= 0)
 FROM (SELECT * FROM decimal ORDER BY a);
 
 SELECT uniqUpTo(10)(a), uniqUpTo(10)(b), uniqUpTo(10)(c) FROM decimal WHERE a >= 0 AND a < 5;
diff --git a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh
index 19d9b006cd7..71acc11b971 100755
--- a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh
+++ b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh
@@ -59,7 +59,7 @@ timeout $TIMEOUT bash -c thread4 2> /dev/null &
 
 wait
 
-echo "DROP TABLE concurrent_alter_column NO DELAY" | ${CLICKHOUSE_CLIENT}   # NO DELAY has effect only for Atomic database
+echo "DROP TABLE concurrent_alter_column SYNC" | ${CLICKHOUSE_CLIENT}   # SYNC has effect only for Atomic database
 
 # Wait for alters and check for deadlocks (in case of deadlock this loop will not finish)
 while true; do
diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
index 62de3a149a7..4f964f2478f 100644
--- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
+++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
@@ -135,13 +135,13 @@ Code: 43
 ------------------------------------------
 SELECT date_trunc(\'year\', N, \'Asia/Istanbul\')
 Code: 43
-"Date","2019-01-01"
-"Date","2019-01-01"
+"DateTime('Asia/Istanbul')","2019-01-01 00:00:00"
+"DateTime('Asia/Istanbul')","2019-01-01 00:00:00"
 ------------------------------------------
 SELECT date_trunc(\'month\', N, \'Asia/Istanbul\')
 Code: 43
-"Date","2019-09-01"
-"Date","2019-09-01"
+"DateTime('Asia/Istanbul')","2019-09-01 00:00:00"
+"DateTime('Asia/Istanbul')","2019-09-01 00:00:00"
 ------------------------------------------
 SELECT date_trunc(\'day\', N, \'Asia/Istanbul\')
 "DateTime('Asia/Istanbul')","2019-09-16 00:00:00"
diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh
index d49f63e143d..5b1c50262bf 100755
--- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh
+++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh
@@ -13,8 +13,8 @@ $CLICKHOUSE_CLIENT -n -q "
     DROP TABLE IF EXISTS alter_table0;
     DROP TABLE IF EXISTS alter_table1;
 
-    CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50));
-    CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50));
+    CREATE TABLE alter_table0 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50 + 100));
+    CREATE TABLE alter_table1 (a UInt8, b Int16, c Float32, d String, e Array(UInt8), f Nullable(UUID), g Tuple(UInt8, UInt16)) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a PARTITION BY b % 10 SETTINGS old_parts_lifetime = 1, cleanup_delay_period = 1, cleanup_delay_period_random_add = 0, replicated_max_mutations_in_one_entry = $(($RANDOM / 50 + 200));
 "
 
 function thread1()
diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh
index bceda77c7f8..f4f38ad9c83 100755
--- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh
+++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh
@@ -63,7 +63,6 @@ function thread6()
     done
 }
 
-
 # https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
 export -f thread1;
 export -f thread2;
diff --git a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh
index 4c0afc4c439..8ef03be02b6 100755
--- a/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh
+++ b/tests/queries/0_stateless/01035_concurrent_move_partition_from_table_zookeeper.sh
@@ -70,8 +70,8 @@ timeout $TIMEOUT bash -c thread5 2> /dev/null &
 
 wait
 
-echo "DROP TABLE src NO DELAY" | ${CLICKHOUSE_CLIENT}
-echo "DROP TABLE dst NO DELAY" | ${CLICKHOUSE_CLIENT}
+echo "DROP TABLE src SYNC" | ${CLICKHOUSE_CLIENT}
+echo "DROP TABLE dst SYNC" | ${CLICKHOUSE_CLIENT}
 sleep 5
 
 # Check for deadlocks
diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql
index f49fbc251fd..c52a6fefacb 100644
--- a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql
+++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql
@@ -6,29 +6,29 @@ DROP TABLE IF EXISTS mt;
 CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple();
 
 SELECT '---WATERMARK---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid;
 
 SELECT '---With w_end---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(tumble(timestamp, INTERVAL '3' SECOND)) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid;
 
 SELECT '---WithOut w_end---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid;
 
 SELECT '---WITH---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end, date_time FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid;
 
 SELECT '---WHERE---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid;
 
 SELECT '---ORDER_BY---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start;
 
 SELECT '---With now---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(tumble(now(), INTERVAL '3' SECOND)) AS w_end FROM mt GROUP BY tumble(now(), INTERVAL '3' SECOND) AS wid;
diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.sql b/tests/queries/0_stateless/01051_window_view_parser_hop.sql
index 45877cf0647..b37e4ed3095 100644
--- a/tests/queries/0_stateless/01051_window_view_parser_hop.sql
+++ b/tests/queries/0_stateless/01051_window_view_parser_hop.sql
@@ -6,29 +6,29 @@ DROP TABLE IF EXISTS mt;
 CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple();
 
 SELECT '---WATERMARK---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid;
 
 SELECT '---With w_end---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid;
 
 SELECT '---WithOut w_end---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid;
 
 SELECT '---WITH---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end, date_time FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid;
 
 SELECT '---WHERE---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid;
 
 SELECT '---ORDER_BY---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start;
 
 SELECT '---With now---';
-DROP TABLE IF EXISTS wv NO DELAY;
+DROP TABLE IF EXISTS wv SYNC;
 CREATE WINDOW VIEW wv ENGINE Memory AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND)) as w_end FROM mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid;
diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py
index f9a465be2c8..9adff06442e 100755
--- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py
+++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py
@@ -31,7 +31,7 @@ with client(name="client1>", log=log) as client1, client(
     client1.expect(prompt)
     client1.send("DROP TABLE IF EXISTS db_01059_event_hop_watch_strict_asc.mt")
     client1.expect(prompt)
-    client1.send("DROP TABLE IF EXISTS db_01059_event_hop_watch_strict_asc.wv NO DELAY")
+    client1.send("DROP TABLE IF EXISTS db_01059_event_hop_watch_strict_asc.wv SYNC")
     client1.expect(prompt)
 
     client1.send(
@@ -71,7 +71,7 @@ with client(name="client1>", log=log) as client1, client(
     if match.groups()[1]:
         client1.send(client1.command)
         client1.expect(prompt)
-    client1.send("DROP TABLE db_01059_event_hop_watch_strict_asc.wv NO DELAY")
+    client1.send("DROP TABLE db_01059_event_hop_watch_strict_asc.wv SYNC")
     client1.expect(prompt)
     client1.send("DROP TABLE db_01059_event_hop_watch_strict_asc.mt")
     client1.expect(prompt)
diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py
index 92298fa7ad6..bb40b1df2f0 100755
--- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py
+++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py
@@ -33,9 +33,7 @@ with client(name="client1>", log=log) as client1, client(
     client1.expect(prompt)
     client1.send("DROP TABLE IF EXISTS 01062_window_view_event_hop_watch_asc.mt")
     client1.expect(prompt)
-    client1.send(
-        "DROP TABLE IF EXISTS 01062_window_view_event_hop_watch_asc.wv NO DELAY"
-    )
+    client1.send("DROP TABLE IF EXISTS 01062_window_view_event_hop_watch_asc.wv SYNC")
     client1.expect(prompt)
 
     client1.send(
@@ -77,7 +75,7 @@ with client(name="client1>", log=log) as client1, client(
     if match.groups()[1]:
         client1.send(client1.command)
         client1.expect(prompt)
-    client1.send("DROP TABLE 01062_window_view_event_hop_watch_asc.wv NO DELAY")
+    client1.send("DROP TABLE 01062_window_view_event_hop_watch_asc.wv SYNC")
     client1.expect(prompt)
     client1.send("DROP TABLE 01062_window_view_event_hop_watch_asc.mt")
     client1.expect(prompt)
diff --git a/tests/queries/0_stateless/01066_bit_count.reference b/tests/queries/0_stateless/01066_bit_count.reference
index 4a3b084b4a2..9a1a9a69216 100644
--- a/tests/queries/0_stateless/01066_bit_count.reference
+++ b/tests/queries/0_stateless/01066_bit_count.reference
@@ -19,3 +19,7 @@
 1	10	000000000000F03F
 -1	11	000000000000F0BF
 inf	11	000000000000F07F
+Hello, world!!!!	55
+67
+67
+1
diff --git a/tests/queries/0_stateless/01066_bit_count.sql b/tests/queries/0_stateless/01066_bit_count.sql
index d50b2657542..0b1b2dc8247 100644
--- a/tests/queries/0_stateless/01066_bit_count.sql
+++ b/tests/queries/0_stateless/01066_bit_count.sql
@@ -11,3 +11,9 @@ SELECT bitCount(toInt16(-1));
 SELECT bitCount(toInt8(-1));
 
 SELECT x, bitCount(x), hex(reinterpretAsString(x)) FROM VALUES ('x Float64', (1), (-1), (inf));
+
+SELECT toFixedString('Hello, world!!!!', 16) AS x, bitCount(x);
+
+SELECT length(replaceAll(bin('clickhouse cloud'), '0', ''));
+SELECT bitCount('clickhouse cloud');
+SELECT length(replaceAll(bin('clickhouse cloud'), '0', '')) = bitCount('clickhouse cloud');
diff --git a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py
index ff15f14cbc3..eb31b2ccbcf 100755
--- a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py
+++ b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py
@@ -33,7 +33,7 @@ with client(name="client1>", log=log) as client1, client(
     client1.expect(prompt)
     client1.send("DROP TABLE IF EXISTS 01069_window_view_proc_tumble_watch.mt")
     client1.expect(prompt)
-    client1.send("DROP TABLE IF EXISTS 01069_window_view_proc_tumble_watch.wv NO DELAY")
+    client1.send("DROP TABLE IF EXISTS 01069_window_view_proc_tumble_watch.wv SYNC")
     client1.expect(prompt)
 
     client1.send(
@@ -67,7 +67,7 @@ with client(name="client1>", log=log) as client1, client(
     if match.groups()[1]:
         client1.send(client1.command)
         client1.expect(prompt)
-    client1.send("DROP TABLE 01069_window_view_proc_tumble_watch.wv NO DELAY")
+    client1.send("DROP TABLE 01069_window_view_proc_tumble_watch.wv SYNC")
     client1.expect(prompt)
     client1.send("DROP TABLE 01069_window_view_proc_tumble_watch.mt")
     client1.expect(prompt)
diff --git a/tests/queries/0_stateless/01070_window_view_watch_events.py b/tests/queries/0_stateless/01070_window_view_watch_events.py
index bf9f437b8ad..8aeff041cc1 100755
--- a/tests/queries/0_stateless/01070_window_view_watch_events.py
+++ b/tests/queries/0_stateless/01070_window_view_watch_events.py
@@ -31,9 +31,9 @@ with client(name="client1>", log=log) as client1, client(
 
     client1.send("CREATE DATABASE IF NOT EXISTS 01070_window_view_watch_events")
     client1.expect(prompt)
-    client1.send("DROP TABLE IF EXISTS 01070_window_view_watch_events.mt NO DELAY")
+    client1.send("DROP TABLE IF EXISTS 01070_window_view_watch_events.mt SYNC")
     client1.expect(prompt)
-    client1.send("DROP TABLE IF EXISTS 01070_window_view_watch_events.wv NO DELAY")
+    client1.send("DROP TABLE IF EXISTS 01070_window_view_watch_events.wv SYNC")
     client1.expect(prompt)
 
     client1.send(
@@ -65,7 +65,7 @@ with client(name="client1>", log=log) as client1, client(
     if match.groups()[1]:
         client1.send(client1.command)
         client1.expect(prompt)
-    client1.send("DROP TABLE 01070_window_view_watch_events.wv NO DELAY;")
+    client1.send("DROP TABLE 01070_window_view_watch_events.wv SYNC;")
     client1.expect(prompt)
     client1.send("DROP TABLE 01070_window_view_watch_events.mt;")
     client1.expect(prompt)
diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py
index 62a2a4bb4f3..c32e508c5a5 100755
--- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py
+++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py
@@ -35,9 +35,9 @@ with client(name="client1>", log=log) as client1, client(
 
     client1.send("CREATE DATABASE IF NOT EXISTS 01078_window_view_alter_query_watch")
     client1.expect(prompt)
-    client1.send("DROP TABLE IF EXISTS 01078_window_view_alter_query_watch.mt NO DELAY")
+    client1.send("DROP TABLE IF EXISTS 01078_window_view_alter_query_watch.mt SYNC")
     client1.expect(prompt)
-    client1.send("DROP TABLE IF EXISTS 01078_window_view_alter_query_watch.wv NO DELAY")
+    client1.send("DROP TABLE IF EXISTS 01078_window_view_alter_query_watch.wv SYNC")
     client1.expect(prompt)
 
     client1.send(
@@ -89,7 +89,7 @@ with client(name="client1>", log=log) as client1, client(
     if match.groups()[1]:
         client3.send(client3.command)
         client3.expect(prompt)
-    client3.send("DROP TABLE 01078_window_view_alter_query_watch.wv NO DELAY;")
+    client3.send("DROP TABLE 01078_window_view_alter_query_watch.wv SYNC;")
     client3.expect(prompt)
     client3.send("DROP TABLE 01078_window_view_alter_query_watch.mt;")
     client3.expect(prompt)
diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.py b/tests/queries/0_stateless/01082_window_view_watch_limit.py
index 7c2e855ef72..12c8d295591 100755
--- a/tests/queries/0_stateless/01082_window_view_watch_limit.py
+++ b/tests/queries/0_stateless/01082_window_view_watch_limit.py
@@ -32,7 +32,7 @@ with client(name="client1>", log=log) as client1, client(
     client1.expect(prompt)
     client1.send("DROP TABLE IF EXISTS 01082_window_view_watch_limit.mt")
     client1.expect(prompt)
-    client1.send("DROP TABLE IF EXISTS 01082_window_view_watch_limit.wv NO DELAY")
+    client1.send("DROP TABLE IF EXISTS 01082_window_view_watch_limit.wv SYNC")
     client1.expect(prompt)
 
     client1.send(
@@ -61,7 +61,7 @@ with client(name="client1>", log=log) as client1, client(
     client1.expect("1 row" + end_of_block)
     client1.expect(prompt)
 
-    client1.send("DROP TABLE 01082_window_view_watch_limit.wv NO DELAY")
+    client1.send("DROP TABLE 01082_window_view_watch_limit.wv SYNC")
     client1.expect(prompt)
     client1.send("DROP TABLE 01082_window_view_watch_limit.mt")
     client1.expect(prompt)
diff --git a/tests/queries/0_stateless/01085_window_view_attach.sql b/tests/queries/0_stateless/01085_window_view_attach.sql
index 0e4f24dc465..051557a6a76 100644
--- a/tests/queries/0_stateless/01085_window_view_attach.sql
+++ b/tests/queries/0_stateless/01085_window_view_attach.sql
@@ -15,7 +15,7 @@ CREATE WINDOW VIEW test_01085.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT cou
 
 SHOW tables FROM test_01085;
 
-DROP TABLE test_01085.wv NO DELAY;
+DROP TABLE test_01085.wv SYNC;
 SHOW tables FROM test_01085;
 
 CREATE WINDOW VIEW test_01085.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM test_01085.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid, market;
@@ -26,5 +26,5 @@ SHOW tables FROM test_01085;
 ATTACH TABLE test_01085.wv;
 SHOW tables FROM test_01085;
 
-DROP TABLE test_01085.wv NO DELAY;
+DROP TABLE test_01085.wv SYNC;
 SHOW tables FROM test_01085;
diff --git a/tests/queries/0_stateless/01086_window_view_cleanup.sh b/tests/queries/0_stateless/01086_window_view_cleanup.sh
index a25cacd45ec..b078b4718c0 100755
--- a/tests/queries/0_stateless/01086_window_view_cleanup.sh
+++ b/tests/queries/0_stateless/01086_window_view_cleanup.sh
@@ -40,7 +40,7 @@ while true; do
 done
 
 $CLICKHOUSE_CLIENT "${opts[@]}" --query="SELECT market, wid FROM test_01086.\`.inner.wv\` ORDER BY market, \`windowID(timestamp, toIntervalSecond('5'), 'US/Samoa')\` as wid";
-$CLICKHOUSE_CLIENT "${opts[@]}" --query="DROP TABLE test_01086.wv NO DELAY;"
-$CLICKHOUSE_CLIENT "${opts[@]}" --query="DROP TABLE test_01086.mt NO DELAY;"
-$CLICKHOUSE_CLIENT "${opts[@]}" --query="DROP TABLE test_01086.dst NO DELAY;"
-$CLICKHOUSE_CLIENT "${opts[@]}" --query="DROP DATABASE test_01086 NO DELAY;"
+$CLICKHOUSE_CLIENT "${opts[@]}" --query="DROP TABLE test_01086.wv SYNC;"
+$CLICKHOUSE_CLIENT "${opts[@]}" --query="DROP TABLE test_01086.mt SYNC;"
+$CLICKHOUSE_CLIENT "${opts[@]}" --query="DROP TABLE test_01086.dst SYNC;"
+$CLICKHOUSE_CLIENT "${opts[@]}" --query="DROP DATABASE test_01086 SYNC;"
diff --git a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference
index 01efbb7c64b..6d32c20909b 100644
--- a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference
+++ b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.reference
@@ -3,15 +3,27 @@
 1	l	\N	Nullable(String)
 2		\N	Nullable(String)
 -
+1	l	Nullable(String)	\N	Nullable(String)
+0	\N	Nullable(String)	\N	Nullable(String)
+0	\N	Nullable(String)	\N	Nullable(String)
+1	l	Nullable(String)	\N	Nullable(String)
+-
+1	l	LowCardinality(String)	\N	Nullable(String)
+0		LowCardinality(String)	\N	Nullable(String)
+0		LowCardinality(String)	\N	Nullable(String)
+1	l	LowCardinality(String)	\N	Nullable(String)
+-
+1	l	\N	Nullable(String)
+0	\N	\N	Nullable(String)
+0	\N	\N	Nullable(String)
+1	l	\N	Nullable(String)
+-
 1	l	\N	Nullable(String)
 0		\N	Nullable(String)
 0		\N	Nullable(String)
 1	l	\N	Nullable(String)
 -
-1	l	\N	Nullable(String)
-0		\N	Nullable(String)
-0		\N	Nullable(String)
-1	l	\N	Nullable(String)
+0	\N
 -
 0	
 -
diff --git a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql
index 38b72837174..2464b7a57cf 100644
--- a/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql
+++ b/tests/queries/0_stateless/01142_join_lc_and_nullable_in_key.sql
@@ -15,19 +15,37 @@ SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (x) ORD
 
 SELECT '-';
 
-SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x;
-SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x;
-SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x;
+-- lc should be supertype for l.lc and r.lc, so expect Nullable(String)
+SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
+SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
+SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
 
 SELECT '-';
 
-SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x;
-SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x;
-SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x;
+-- old behavior is different
+SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
+SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
+SELECT x, lc, toTypeName(lc), r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
 
 SELECT '-';
 
-SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc);
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
+
+SELECT '-';
+
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
+
+SELECT '-';
+
+SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc) SETTINGS allow_experimental_analyzer = 1;
+
+SELECT '-';
+
+SELECT x, lc FROM t AS l RIGHT JOIN nr AS r USING (lc) SETTINGS allow_experimental_analyzer = 0;
 
 SELECT '-';
 
diff --git a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference
index c6bdcb773b2..bb29ec9becd 100644
--- a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference
+++ b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.reference
@@ -4,6 +4,16 @@
 2		\N	Nullable(String)
 -
 1	l	\N	Nullable(String)
+0	\N	\N	Nullable(String)
+0	\N	\N	Nullable(String)
+1	l	\N	Nullable(String)
+-
+1	l	\N	Nullable(String)
+0	\N	\N	Nullable(String)
+0	\N	\N	Nullable(String)
+1	l	\N	Nullable(String)
+-
+1	l	\N	Nullable(String)
 0		\N	Nullable(String)
 0		\N	Nullable(String)
 1	l	\N	Nullable(String)
diff --git a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql
index dbc2d7c9f5d..718e8358c64 100644
--- a/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql
+++ b/tests/queries/0_stateless/01142_merge_join_lc_and_nullable_in_key.sql
@@ -17,15 +17,27 @@ SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (x) ORD
 
 SELECT '-';
 
-SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x;
-SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x;
-SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
 
 SELECT '-';
 
-SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x;
-SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x;
-SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 1;
+
+SELECT '-';
+
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
+SELECT x, lc, r.lc, toTypeName(r.lc) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
+
+SELECT '-';
+
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l LEFT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l RIGHT JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
+SELECT x, lc, materialize(r.lc) y, toTypeName(y) FROM t AS l FULL JOIN nr AS r USING (lc) ORDER BY x SETTINGS allow_experimental_analyzer = 0;
 
 SELECT '-';
 
diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.reference b/tests/queries/0_stateless/01158_zookeeper_log_long.reference
index a0088610c9d..7ec52cb3366 100644
--- a/tests/queries/0_stateless/01158_zookeeper_log_long.reference
+++ b/tests/queries/0_stateless/01158_zookeeper_log_long.reference
@@ -18,22 +18,18 @@ Response	0	Create	/test/01158/default/rmt/replicas/1/parts/all_0_0_0	0	0	\N	0	4
 Request	0	Exists	/test/01158/default/rmt/replicas/1/parts/all_0_0_0	0	0	\N	0	0	\N	\N	\N		0	0	0	0
 Response	0	Exists	/test/01158/default/rmt/replicas/1/parts/all_0_0_0	0	0	\N	0	0	ZOK	\N	\N		0	0	96	0
 blocks
-Request	0	Multi		0	0	\N	3	0	\N	\N	\N		0	0	0	0
-Request	0	Create	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	\N	0	1	\N	\N	\N		0	0	0	0
-Request	0	Remove	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	-1	0	2	\N	\N	\N		0	0	0	0
-Request	0	Create	/test/01158/default/rmt/block_numbers/all/block-	1	1	\N	0	3	\N	\N	\N		0	0	0	0
-Response	0	Multi		0	0	\N	3	0	ZOK	\N	\N		0	0	0	0
-Response	0	Create	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	\N	0	1	ZOK	\N	\N	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	0	0
-Response	0	Remove	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	-1	0	2	ZOK	\N	\N		0	0	0	0
-Response	0	Create	/test/01158/default/rmt/block_numbers/all/block-	1	1	\N	0	3	ZOK	\N	\N	/test/01158/default/rmt/block_numbers/all/block-0000000000	0	0	0	0
-Request	0	Multi		0	0	\N	3	0	\N	\N	\N		0	0	0	0
-Request	0	Create	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	\N	0	1	\N	\N	\N		0	0	0	0
-Request	0	Remove	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	-1	0	2	\N	\N	\N		0	0	0	0
-Request	0	Create	/test/01158/default/rmt/block_numbers/all/block-	1	1	\N	0	3	\N	\N	\N		0	0	0	0
-Response	0	Multi		0	0	\N	3	0	ZNODEEXISTS	\N	\N		0	0	0	0
-Response	0	Error	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	\N	0	1	ZNODEEXISTS	\N	\N		0	0	0	0
-Response	0	Error	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	-1	0	2	ZRUNTIMEINCONSISTENCY	\N	\N		0	0	0	0
-Response	0	Error	/test/01158/default/rmt/block_numbers/all/block-	1	1	\N	0	3	ZRUNTIMEINCONSISTENCY	\N	\N		0	0	0	0
+Request	0	Multi		0	0	\N	2	0	\N	\N	\N		0	0	0	0
+Request	0	CheckNotExists	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	-1	0	1	\N	\N	\N		0	0	0	0
+Request	0	Create	/test/01158/default/rmt/block_numbers/all/block-	1	1	\N	0	2	\N	\N	\N		0	0	0	0
+Response	0	Multi		0	0	\N	2	0	ZOK	\N	\N		0	0	0	0
+Response	0	CheckNotExists	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	-1	0	1	ZOK	\N	\N		0	0	0	0
+Response	0	Create	/test/01158/default/rmt/block_numbers/all/block-	1	1	\N	0	2	ZOK	\N	\N	/test/01158/default/rmt/block_numbers/all/block-0000000000	0	0	0	0
+Request	0	Multi		0	0	\N	2	0	\N	\N	\N		0	0	0	0
+Request	0	CheckNotExists	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	-1	0	1	\N	\N	\N		0	0	0	0
+Request	0	Create	/test/01158/default/rmt/block_numbers/all/block-	1	1	\N	0	2	\N	\N	\N		0	0	0	0
+Response	0	Multi		0	0	\N	2	0	ZNODEEXISTS	\N	\N		0	0	0	0
+Response	0	Error	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	-1	0	1	ZNODEEXISTS	\N	\N		0	0	0	0
+Response	0	Error	/test/01158/default/rmt/block_numbers/all/block-	1	1	\N	0	2	ZRUNTIMEINCONSISTENCY	\N	\N		0	0	0	0
 Request	0	Get	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	\N	0	0	\N	\N	\N		0	0	0	0
 Response	0	Get	/test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886	0	0	\N	0	0	ZOK	\N	\N		0	0	9	0
 duration_ms
diff --git a/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql b/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql
index fbb67a268ae..6edaaa5c602 100644
--- a/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql
+++ b/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql
@@ -1,8 +1,8 @@
 -- Tags: no-replicated-database
 -- Tag no-replicated-database: ON CLUSTER is not allowed
 
-DROP TABLE IF EXISTS test_repl ON CLUSTER test_shard_localhost SYNC;
+DROP TABLE IF EXISTS test_repl ON CLUSTER test_shard_localhost NO DELAY;
 CREATE TABLE test_repl ON CLUSTER test_shard_localhost (n UInt64) ENGINE ReplicatedMergeTree('/clickhouse/test_01181/{database}/test_repl','r1') ORDER BY tuple();
-DETACH TABLE test_repl ON CLUSTER test_shard_localhost SYNC;
+DETACH TABLE test_repl ON CLUSTER test_shard_localhost NO DELAY;
 ATTACH TABLE test_repl ON CLUSTER test_shard_localhost;
-DROP TABLE test_repl ON CLUSTER test_shard_localhost SYNC;
+DROP TABLE test_repl ON CLUSTER test_shard_localhost NO DELAY;
diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference
index 553fee1f435..41c6a7bd709 100644
--- a/tests/queries/0_stateless/01271_show_privileges.reference
+++ b/tests/queries/0_stateless/01271_show_privileges.reference
@@ -138,6 +138,7 @@ SYSTEM THREAD FUZZER	['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','
 SYSTEM UNFREEZE	['SYSTEM UNFREEZE']	GLOBAL	SYSTEM
 SYSTEM	[]	\N	ALL
 dictGet	['dictHas','dictGetHierarchy','dictIsIn']	DICTIONARY	ALL
+displaySecretsInShowAndSelect	[]	GLOBAL	ALL
 addressToLine	[]	GLOBAL	INTROSPECTION
 addressToLineWithInlines	[]	GLOBAL	INTROSPECTION
 addressToSymbol	[]	GLOBAL	INTROSPECTION
diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference
index f723412c636..eb89a5ed38c 100644
--- a/tests/queries/0_stateless/01292_create_user.reference
+++ b/tests/queries/0_stateless/01292_create_user.reference
@@ -13,6 +13,8 @@ CREATE USER u4_01292 IDENTIFIED WITH sha256_password
 CREATE USER u5_01292 IDENTIFIED WITH sha256_password
 CREATE USER u6_01292 IDENTIFIED WITH double_sha1_password
 CREATE USER u7_01292 IDENTIFIED WITH double_sha1_password
+CREATE USER u8_01292 IDENTIFIED WITH bcrypt_password
+CREATE USER u9_01292 IDENTIFIED WITH bcrypt_password
 CREATE USER u1_01292 IDENTIFIED WITH sha256_password
 CREATE USER u2_01292 IDENTIFIED WITH sha256_password
 CREATE USER u3_01292 IDENTIFIED WITH sha256_password
diff --git a/tests/queries/0_stateless/01292_create_user.sql b/tests/queries/0_stateless/01292_create_user.sql
index d0f157d36b0..a283ce687e6 100644
--- a/tests/queries/0_stateless/01292_create_user.sql
+++ b/tests/queries/0_stateless/01292_create_user.sql
@@ -1,4 +1,4 @@
--- Tags: no-fasttest
+-- Tags: no-fasttest, no-parallel
 
 DROP USER IF EXISTS u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292, u7_01292, u8_01292, u9_01292;
 DROP USER IF EXISTS u10_01292, u11_01292, u12_01292, u13_01292, u14_01292, u15_01292, u16_01292;
@@ -31,6 +31,8 @@ CREATE USER u4_01292 IDENTIFIED WITH sha256_password BY 'qwe123';
 CREATE USER u5_01292 IDENTIFIED WITH sha256_hash BY '18138372FAD4B94533CD4881F03DC6C69296DD897234E0CEE83F727E2E6B1F63';
 CREATE USER u6_01292 IDENTIFIED WITH double_sha1_password BY 'qwe123';
 CREATE USER u7_01292 IDENTIFIED WITH double_sha1_hash BY '8DCDD69CE7D121DE8013062AEAEB2A148910D50E';
+CREATE USER u8_01292 IDENTIFIED WITH bcrypt_password BY 'qwe123';
+CREATE USER u9_01292 IDENTIFIED WITH bcrypt_hash BY '$2a$12$rz5iy2LhuwBezsM88ZzWiemOVUeJ94xHTzwAlLMDhTzwUxOHaY64q';
 SHOW CREATE USER u1_01292;
 SHOW CREATE USER u2_01292;
 SHOW CREATE USER u3_01292;
@@ -38,6 +40,8 @@ SHOW CREATE USER u4_01292;
 SHOW CREATE USER u5_01292;
 SHOW CREATE USER u6_01292;
 SHOW CREATE USER u7_01292;
+SHOW CREATE USER u8_01292;
+SHOW CREATE USER u9_01292;
 ALTER USER u1_01292 IDENTIFIED BY '123qwe';
 ALTER USER u2_01292 IDENTIFIED BY '123qwe';
 ALTER USER u3_01292 IDENTIFIED BY '123qwe';
@@ -48,7 +52,7 @@ SHOW CREATE USER u2_01292;
 SHOW CREATE USER u3_01292;
 SHOW CREATE USER u4_01292;
 SHOW CREATE USER u5_01292;
-DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292, u7_01292;
+DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292, u7_01292, u8_01292, u9_01292;
 
 SELECT '-- host';
 CREATE USER u1_01292 HOST ANY;
diff --git a/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.reference b/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.reference
index 60c6076aef0..a905ea97ae5 100644
--- a/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.reference
+++ b/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.reference
@@ -1,24 +1,24 @@
 hello	test
 hello	test
-1_0_0_0	hello	1
-1_0_0_0	hello	1
+0	0	hello	1
+0	0	hello	1
 hello	test
 goodbye	test
 hello	test
 goodbye	test
-3_0_0_1	goodbye	3
-1_0_0_1	hello	1
-3_0_0_1	goodbye	3
-1_0_0_1	hello	1
+0	0	goodbye	3
+0	0	hello	1
+0	0	goodbye	3
+0	0	hello	1
 1	test
 3	test
 111	abc
 1	test
 3	test
 111	abc
-1_0_0_2	1	1
-111_0_0_1	111	111
-3_0_0_2	3	3
-1_0_0_2	1	1
-111_0_0_1	111	111
-3_0_0_2	3	3
+0	0	1	1
+0	0	111	111
+0	0	3	3
+0	0	1	1
+0	0	111	111
+0	0	3	3
diff --git a/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.sql b/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.sql
index f20156fd9e3..d40bcc15e55 100644
--- a/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.sql
+++ b/tests/queries/0_stateless/01346_alter_enum_partition_key_replicated_zookeeper_long.sql
@@ -13,16 +13,17 @@ INSERT INTO test VALUES ('hello', 'test');
 SELECT * FROM test;
 SYSTEM SYNC REPLICA test2;
 SELECT * FROM test2;
-SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition;
-SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition;
+SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition;
+SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition;
 
 ALTER TABLE test MODIFY COLUMN x Enum('hello' = 1, 'world' = 2, 'goodbye' = 3);
 INSERT INTO test VALUES ('goodbye', 'test');
 OPTIMIZE TABLE test FINAL;
 SELECT * FROM test ORDER BY x;
+SYSTEM SYNC REPLICA test2;
 SELECT * FROM test2 ORDER BY x;
-SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition;
-SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition;
+SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition;
+SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition;
 
 ALTER TABLE test MODIFY COLUMN x Enum('hello' = 1, 'world' = 2); -- { serverError 524 }
 ALTER TABLE test MODIFY COLUMN x Enum('hello' = 1, 'world' = 2, 'test' = 3);
@@ -33,9 +34,10 @@ ALTER TABLE test MODIFY COLUMN x Int8;
 INSERT INTO test VALUES (111, 'abc');
 OPTIMIZE TABLE test FINAL;
 SELECT * FROM test ORDER BY x;
+SYSTEM SYNC REPLICA test2;
 SELECT * FROM test2 ORDER BY x;
-SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition;
-SELECT name, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition;
+SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test' AND active ORDER BY partition;
+SELECT min_block_number, max_block_number, partition, partition_id FROM system.parts WHERE database = currentDatabase() AND table = 'test2' AND active ORDER BY partition;
 
 ALTER TABLE test MODIFY COLUMN x Enum8('' = 1); -- { serverError 524 }
 ALTER TABLE test MODIFY COLUMN x Enum16('' = 1); -- { serverError 524 }
diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
index b3f9fbb42dd..c080dded1c8 100755
--- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
+++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long, no-parallel
+# Tags: long, no-parallel, no-debug
 
 set -e
 
diff --git a/tests/queries/0_stateless/01476_right_full_join_switch.reference b/tests/queries/0_stateless/01476_right_full_join_switch.reference
index 1f839b86013..54f9909762f 100644
--- a/tests/queries/0_stateless/01476_right_full_join_switch.reference
+++ b/tests/queries/0_stateless/01476_right_full_join_switch.reference
@@ -3,6 +3,16 @@
 1	l	\N	LowCardinality(String)	Nullable(String)
 2		\N	LowCardinality(String)	Nullable(String)
 -
+\N	\N		Nullable(String)	LowCardinality(String)
+1	\N	l	Nullable(String)	LowCardinality(String)
+1	\N	l	Nullable(String)	LowCardinality(String)
+\N	\N		Nullable(String)	LowCardinality(String)
+-
+1	l	\N	LowCardinality(String)	Nullable(String)
+2		\N	LowCardinality(String)	Nullable(String)
+1	l	\N	LowCardinality(String)	Nullable(String)
+2		\N	LowCardinality(String)	Nullable(String)
+-
 0	\N		Nullable(String)	LowCardinality(String)
 1	\N	l	Nullable(String)	LowCardinality(String)
 0	\N		Nullable(String)	LowCardinality(String)
diff --git a/tests/queries/0_stateless/01476_right_full_join_switch.sql b/tests/queries/0_stateless/01476_right_full_join_switch.sql
index 5d041843ee2..dfbdec47e1f 100644
--- a/tests/queries/0_stateless/01476_right_full_join_switch.sql
+++ b/tests/queries/0_stateless/01476_right_full_join_switch.sql
@@ -10,8 +10,27 @@ CREATE TABLE nr (`x` Nullable(UInt32), `s` Nullable(String)) ENGINE = Memory;
 INSERT INTO t VALUES (1, 'l');
 INSERT INTO nr VALUES (2, NULL);
 
+
 SET join_use_nulls = 0;
 
+SET allow_experimental_analyzer = 1;
+
+-- t.x is supertupe for `x` from left and right since `x` is inside `USING`.
+SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x;
+SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY t.x;
+SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l FULL JOIN nr AS r USING (x) ORDER BY t.x;
+
+SELECT '-';
+
+SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l LEFT JOIN t AS r USING (x) ORDER BY t.x;
+SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l RIGHT JOIN t AS r USING (x) ORDER BY t.x;
+SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t AS r USING (x) ORDER BY t.x;
+
+SELECT '-';
+
+SET allow_experimental_analyzer = 0;
+
+-- t.x is supertupe for `x` from left and right since `x` is inside `USING`.
 SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x;
 SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY t.x;
 SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l FULL JOIN nr AS r USING (x) ORDER BY t.x;
diff --git a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference
index ac4d0a3d21a..9b6890c01ee 100644
--- a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference
+++ b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.reference
@@ -17,7 +17,7 @@
 1	\N	l	Nullable(String)	LowCardinality(String)
 0	\N		Nullable(String)	LowCardinality(String)
 1	\N	l	Nullable(String)	LowCardinality(String)
--
+- join_use_nulls -
 1	l	\N	LowCardinality(String)	Nullable(String)
 2	\N	\N	LowCardinality(Nullable(String))	Nullable(String)
 1	l	\N	LowCardinality(Nullable(String))	Nullable(String)
@@ -33,3 +33,47 @@
 1	l	\N	LowCardinality(Nullable(String))	Nullable(String)
 \N	\N	\N	LowCardinality(Nullable(String))	Nullable(String)
 -
+\N	\N	\N	Nullable(String)	LowCardinality(Nullable(String))
+1	\N	l	Nullable(String)	LowCardinality(String)
+1	\N	l	Nullable(String)	LowCardinality(Nullable(String))
+\N	\N	\N	Nullable(String)	LowCardinality(Nullable(String))
+- analyzer -
+1	l	\N	LowCardinality(String)	Nullable(String)
+2		\N	LowCardinality(String)	Nullable(String)
+1	l	\N	LowCardinality(String)	Nullable(String)
+2		\N	LowCardinality(String)	Nullable(String)
+-
+\N	\N		Nullable(String)	LowCardinality(String)
+1	\N	l	Nullable(String)	LowCardinality(String)
+1	\N	l	Nullable(String)	LowCardinality(String)
+\N	\N		Nullable(String)	LowCardinality(String)
+-
+1	l	\N	Nullable(String)	Nullable(String)
+0	\N	\N	Nullable(String)	Nullable(String)
+0	\N	\N	Nullable(String)	Nullable(String)
+1	l	\N	Nullable(String)	Nullable(String)
+-
+0	\N	\N	Nullable(String)	Nullable(String)
+1	\N	l	Nullable(String)	Nullable(String)
+0	\N	\N	Nullable(String)	Nullable(String)
+1	\N	l	Nullable(String)	Nullable(String)
+- join_use_nulls -
+1	l	\N	LowCardinality(String)	Nullable(String)
+2	\N	\N	LowCardinality(Nullable(String))	Nullable(String)
+1	l	\N	LowCardinality(Nullable(String))	Nullable(String)
+2	\N	\N	LowCardinality(Nullable(String))	Nullable(String)
+-
+\N	\N	\N	Nullable(String)	LowCardinality(Nullable(String))
+1	\N	l	Nullable(String)	LowCardinality(String)
+1	\N	l	Nullable(String)	LowCardinality(Nullable(String))
+\N	\N	\N	Nullable(String)	LowCardinality(Nullable(String))
+-
+1	l	\N	Nullable(String)	Nullable(String)
+\N	\N	\N	Nullable(String)	Nullable(String)
+1	l	\N	Nullable(String)	Nullable(String)
+\N	\N	\N	Nullable(String)	Nullable(String)
+-
+\N	\N	\N	Nullable(String)	Nullable(String)
+1	\N	l	Nullable(String)	Nullable(String)
+1	\N	l	Nullable(String)	Nullable(String)
+\N	\N	\N	Nullable(String)	Nullable(String)
diff --git a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql.j2
similarity index 83%
rename from tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql
rename to tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql.j2
index 2507613f051..6eafd41b411 100644
--- a/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql
+++ b/tests/queries/0_stateless/01477_lc_in_merge_join_left_key.sql.j2
@@ -10,6 +10,14 @@ CREATE TABLE nr (`x` Nullable(UInt32), `s` Nullable(String)) ENGINE = Memory;
 INSERT INTO t VALUES (1, 'l');
 INSERT INTO nr VALUES (2, NULL);
 
+{% for allow_experimental_analyzer in [0, 1] -%}
+
+SET allow_experimental_analyzer = {{ allow_experimental_analyzer }};
+
+{% if allow_experimental_analyzer -%}
+SELECT '- analyzer -';
+{% endif -%}
+
 SET join_use_nulls = 0;
 
 SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x;
@@ -36,7 +44,7 @@ SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t
 
 SET join_use_nulls = 1;
 
-SELECT '-';
+SELECT '- join_use_nulls -';
 
 SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l LEFT JOIN nr AS r USING (x) ORDER BY t.x;
 SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l RIGHT JOIN nr AS r USING (x) ORDER BY t.x;
@@ -56,10 +64,11 @@ SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM t AS l FULL JOIN nr
 
 SELECT '-';
 
--- TODO
--- SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l LEFT JOIN t AS r USING (s) ORDER BY t.x;
--- SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l RIGHT JOIN t AS r USING (s) ORDER BY t.x;
--- SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t AS r USING (s) ORDER BY t.x;
+SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l LEFT JOIN t AS r USING (s) ORDER BY t.x;
+SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l RIGHT JOIN t AS r USING (s) ORDER BY t.x;
+SELECT t.x, l.s, r.s, toTypeName(l.s), toTypeName(r.s) FROM nr AS l FULL JOIN t AS r USING (s) ORDER BY t.x;
+
+{% endfor %}
 
 DROP TABLE t;
 DROP TABLE nr;
diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql
index f217b6094b2..4b907d5ebb6 100644
--- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql
+++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql
@@ -4,8 +4,8 @@
 
 SET insert_keeper_fault_injection_probability=0; -- disable fault injection; part ids are non-deterministic in case of insert retries
 
-DROP TABLE IF EXISTS execute_on_single_replica_r1 NO DELAY;
-DROP TABLE IF EXISTS execute_on_single_replica_r2 NO DELAY;
+DROP TABLE IF EXISTS execute_on_single_replica_r1 SYNC;
+DROP TABLE IF EXISTS execute_on_single_replica_r2 SYNC;
 
 /* that test requires fixed zookeeper path, so we cannot use ReplicatedMergeTree({database}) */
 CREATE TABLE execute_on_single_replica_r1 (x UInt64) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_01532/execute_on_single_replica', 'r1') ORDER BY tuple() SETTINGS execute_merges_on_single_replica_time_threshold=10;
@@ -130,5 +130,5 @@ GROUP BY part_name
 ORDER BY part_name
 FORMAT Vertical;
 
-DROP TABLE execute_on_single_replica_r1 NO DELAY;
-DROP TABLE execute_on_single_replica_r2 NO DELAY;
+DROP TABLE execute_on_single_replica_r1 SYNC;
+DROP TABLE execute_on_single_replica_r2 SYNC;
diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh
index f47d0863e69..89ce84f6dbc 100755
--- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh
+++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh
@@ -1,5 +1,4 @@
 #!/usr/bin/env bash
-# Tags: long
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -8,7 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # NOTE: database = $CLICKHOUSE_DATABASE is unwanted
 verify_sql="SELECT
     (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics)
-    = (SELECT sum(active), sum(NOT active) FROM system.parts)"
+    = (SELECT sum(active), sum(NOT active) FROM
+    (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts))"
 
 # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time.
 # So, there is inherent race condition. But it should get expected result eventually.
diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh
index 05edf02f7ed..0b9afcf633e 100755
--- a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh
+++ b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long, no-s3-storage
+# Tags: no-s3-storage
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -11,7 +11,8 @@ set -o pipefail
 # NOTE: database = $CLICKHOUSE_DATABASE is unwanted
 verify_sql="SELECT
     (SELECT sumIf(value, metric = 'PartsInMemory'), sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) =
-    (SELECT countIf(part_type == 'InMemory'), countIf(part_type == 'Compact'), countIf(part_type == 'Wide') FROM system.parts)"
+    (SELECT countIf(part_type == 'InMemory'), countIf(part_type == 'Compact'), countIf(part_type == 'Wide')
+    FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))"
 
 # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time.
 # So, there is inherent race condition (especially in fasttest that runs tests in parallel).
diff --git a/tests/queries/0_stateless/01686_rocksdb.sql b/tests/queries/0_stateless/01686_rocksdb.sql
index ad6f56772b0..f3177ce140e 100644
--- a/tests/queries/0_stateless/01686_rocksdb.sql
+++ b/tests/queries/0_stateless/01686_rocksdb.sql
@@ -24,7 +24,7 @@ SELECT * FROM 01686_test WHERE key IN (123, 456, -123) ORDER BY key;
 SELECT '--';
 SELECT * FROM 01686_test WHERE key = 'Hello'; -- { serverError 53 }
 
-DETACH TABLE 01686_test NO DELAY;
+DETACH TABLE 01686_test SYNC;
 ATTACH TABLE 01686_test;
 
 SELECT * FROM 01686_test WHERE key IN (99, 999, 9999, -123) ORDER BY key;
diff --git a/tests/queries/0_stateless/01889_sql_json_functions.reference b/tests/queries/0_stateless/01889_sql_json_functions.reference
index 5ac1ff501e5..cb8e19ea2a0 100644
--- a/tests/queries/0_stateless/01889_sql_json_functions.reference
+++ b/tests/queries/0_stateless/01889_sql_json_functions.reference
@@ -37,6 +37,20 @@ select JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_v
 {"world":"!"}
 SELECT JSON_VALUE('{"hello":["world","world2"]}', '$.hello') settings function_json_value_return_type_allow_complex=true;
 ["world","world2"]
+SELECT JSON_VALUE('{"1key":1}', '$.1key');
+1
+SELECT JSON_VALUE('{"hello":1}', '$[hello]');
+1
+SELECT JSON_VALUE('{"hello":1}', '$["hello"]');
+1
+SELECT JSON_VALUE('{"hello":1}', '$[\'hello\']');
+1
+SELECT JSON_VALUE('{"hello 1":1}', '$["hello 1"]');
+1
+SELECT JSON_VALUE('{"1key":1}', '$..1key'); -- { serverError 36 }
+SELECT JSON_VALUE('{"1key":1}', '$1key'); -- { serverError 36 }
+SELECT JSON_VALUE('{"1key":1}', '$key'); -- { serverError 36 }
+SELECT JSON_VALUE('{"1key":1}', '$.[key]'); -- { serverError 36 }
 SELECT '--JSON_QUERY--';
 --JSON_QUERY--
 SELECT JSON_QUERY('{"hello":1}', '$');
@@ -61,6 +75,20 @@ SELECT JSON_QUERY('', '$.hello');
 
 SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]');
 [0, 1, 4, 0, -1, -4]
+SELECT JSON_QUERY('{"1key":1}', '$.1key');
+[1]
+SELECT JSON_QUERY('{"hello":1}', '$[hello]');
+[1]
+SELECT JSON_QUERY('{"hello":1}', '$["hello"]');
+[1]
+SELECT JSON_QUERY('{"hello":1}', '$[\'hello\']');
+[1]
+SELECT JSON_QUERY('{"hello 1":1}', '$["hello 1"]');
+[1]
+SELECT JSON_QUERY('{"1key":1}', '$..1key'); -- { serverError 36 }
+SELECT JSON_QUERY('{"1key":1}', '$1key'); -- { serverError 36 }
+SELECT JSON_QUERY('{"1key":1}', '$key'); -- { serverError 36 }
+SELECT JSON_QUERY('{"1key":1}', '$.[key]'); -- { serverError 36 }
 SELECT '--JSON_EXISTS--';
 --JSON_EXISTS--
 SELECT JSON_EXISTS('{"hello":1}', '$');
diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql
index f174d04933c..947b0171ec6 100644
--- a/tests/queries/0_stateless/01889_sql_json_functions.sql
+++ b/tests/queries/0_stateless/01889_sql_json_functions.sql
@@ -20,6 +20,15 @@ select JSON_VALUE('{"a":"\\u263a"}', '$.a');
 select JSON_VALUE('{"hello":"world"}', '$.b') settings function_json_value_return_type_allow_nullable=true;
 select JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_value_return_type_allow_complex=true;
 SELECT JSON_VALUE('{"hello":["world","world2"]}', '$.hello') settings function_json_value_return_type_allow_complex=true;
+SELECT JSON_VALUE('{"1key":1}', '$.1key');
+SELECT JSON_VALUE('{"hello":1}', '$[hello]');
+SELECT JSON_VALUE('{"hello":1}', '$["hello"]');
+SELECT JSON_VALUE('{"hello":1}', '$[\'hello\']');
+SELECT JSON_VALUE('{"hello 1":1}', '$["hello 1"]');
+SELECT JSON_VALUE('{"1key":1}', '$..1key'); -- { serverError 36 }
+SELECT JSON_VALUE('{"1key":1}', '$1key'); -- { serverError 36 }
+SELECT JSON_VALUE('{"1key":1}', '$key'); -- { serverError 36 }
+SELECT JSON_VALUE('{"1key":1}', '$.[key]'); -- { serverError 36 }
 
 SELECT '--JSON_QUERY--';
 SELECT JSON_QUERY('{"hello":1}', '$');
@@ -33,6 +42,15 @@ SELECT JSON_QUERY('{"hello":{"world":"!"}}', '$.hello');
 SELECT JSON_QUERY( '{hello:{"world":"!"}}}', '$.hello'); -- invalid json => default value (empty string)
 SELECT JSON_QUERY('', '$.hello');
 SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]');
+SELECT JSON_QUERY('{"1key":1}', '$.1key');
+SELECT JSON_QUERY('{"hello":1}', '$[hello]');
+SELECT JSON_QUERY('{"hello":1}', '$["hello"]');
+SELECT JSON_QUERY('{"hello":1}', '$[\'hello\']');
+SELECT JSON_QUERY('{"hello 1":1}', '$["hello 1"]');
+SELECT JSON_QUERY('{"1key":1}', '$..1key'); -- { serverError 36 }
+SELECT JSON_QUERY('{"1key":1}', '$1key'); -- { serverError 36 }
+SELECT JSON_QUERY('{"1key":1}', '$key'); -- { serverError 36 }
+SELECT JSON_QUERY('{"1key":1}', '$.[key]'); -- { serverError 36 }
 
 SELECT '--JSON_EXISTS--';
 SELECT JSON_EXISTS('{"hello":1}', '$');
diff --git a/tests/queries/0_stateless/01891_partition_hash.reference b/tests/queries/0_stateless/01891_partition_hash.reference
index 56d11075e50..c5814777dfe 100644
--- a/tests/queries/0_stateless/01891_partition_hash.reference
+++ b/tests/queries/0_stateless/01891_partition_hash.reference
@@ -1 +1,2 @@
 6ba51fa36c625adab5d58007c96e32bf
+ebc1c2f37455caea601feeb840757dd3
diff --git a/tests/queries/0_stateless/01891_partition_hash.sql b/tests/queries/0_stateless/01891_partition_hash.sql
index f56ed6a4ff4..894594dd465 100644
--- a/tests/queries/0_stateless/01891_partition_hash.sql
+++ b/tests/queries/0_stateless/01891_partition_hash.sql
@@ -1,7 +1,32 @@
-drop table if exists tab;
-create table tab (i8 Int8, i16 Int16, i32 Int32, i64 Int64, i128 Int128, i256 Int256, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64, u128 UInt128, u256 UInt256, id UUID, s String, fs FixedString(33), a Array(UInt8), t Tuple(UInt16, UInt32), d Date, dt DateTime('Asia/Istanbul'), dt64 DateTime64(3, 'Asia/Istanbul'), dec128 Decimal128(3), dec256 Decimal256(4), lc LowCardinality(String)) engine = MergeTree PARTITION BY (i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128, u256, id, s, fs, a, t, d, dt, dt64, dec128, dec256, lc) order by tuple();
-insert into tab values (-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a');
+DROP TABLE IF EXISTS tab;
+CREATE TABLE tab (
+    i8 Int8,
+    i16 Int16,
+    i32 Int32,
+    i64 Int64,
+    i128 Int128,
+    i256 Int256,
+    u8 UInt8,
+    u16 UInt16,
+    u32 UInt32,
+    u64 UInt64,
+    u128 UInt128,
+    u256 UInt256,
+    id UUID,
+    s String,
+    fs FixedString(33),
+    a Array(UInt8),
+    t Tuple(UInt16, UInt32),
+    d Date,
+    dt DateTime('Asia/Istanbul'),
+    dt64 DateTime64(3, 'Asia/Istanbul'),
+    dec128 Decimal128(3),
+    dec256 Decimal256(4),
+    lc LowCardinality(String))
+engine = MergeTree PARTITION BY (i8, i16, i32, i64, i128, i256, u8, u16, u32, u64, u128, u256, id, s, fs, a, t, d, dt, dt64, dec128, dec256, lc) ORDER BY tuple();
+INSERT INTO tab VALUES (-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a');
+INSERT INTO tab VALUES (123, 12345, 1234567890, 1234567890000000000, 123456789000000000000000000000000000000, 123456789000000000000000000000000000000000000000000000000000000000000000000000, 123, 12345, 1234567890, 1234567890000000000, 123456789000000000000000000000000000000, 123456789000000000000000000000000000000000000000000000000000000000000000000000, '61f0c404-5cb3-11e7-907b-a6006ad3dba0', 'a', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', [1, 2, 3], (-1, -2), '2020-01-01', '2020-01-01 01:01:01', '2020-01-01 01:01:01', '123.456', '78.9101', 'a');
 -- Here we check that partition id did not change.
 -- Different result means Backward Incompatible Change. Old partitions will not be accepted by new server.
-select partition_id from system.parts where table = 'tab' and database = currentDatabase();
-drop table if exists tab;
+SELECT partition_id FROM system.parts WHERE table = 'tab' AND database = currentDatabase();
+DROP TABLE IF EXISTS tab;
diff --git a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh
index c674f21034c..3448f052f51 100755
--- a/tests/queries/0_stateless/02020_alter_table_modify_comment.sh
+++ b/tests/queries/0_stateless/02020_alter_table_modify_comment.sh
@@ -43,7 +43,7 @@ EOF
     get_table_comment_info
 
     echo detach table
-    $CLICKHOUSE_CLIENT --query="DETACH TABLE comment_test_table NO DELAY;"
+    $CLICKHOUSE_CLIENT --query="DETACH TABLE comment_test_table SYNC;"
     get_table_comment_info
 
     echo re-attach table
diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference
index f2524cac115..9cdc8182d67 100644
--- a/tests/queries/0_stateless/02117_show_create_table_system.reference
+++ b/tests/queries/0_stateless/02117_show_create_table_system.reference
@@ -47,7 +47,10 @@ CREATE TABLE system.clusters
     `default_database` String,
     `errors_count` UInt32,
     `slowdowns_count` UInt32,
-    `estimated_recovery_time` UInt32
+    `estimated_recovery_time` UInt32,
+    `database_shard_name` String,
+    `database_replica_name` String,
+    `is_active` Nullable(UInt8)
 )
 ENGINE = SystemClusters
 COMMENT 'SYSTEM TABLE is built on the fly.'
@@ -281,7 +284,12 @@ CREATE TABLE system.functions
     `alias_to` String,
     `create_query` String,
     `origin` Enum8('System' = 0, 'SQLUserDefined' = 1, 'ExecutableUserDefined' = 2),
-    `description` String
+    `description` String,
+    `syntax` String,
+    `arguments` String,
+    `returned_value` String,
+    `examples` String,
+    `categories` String
 )
 ENGINE = SystemFunctions
 COMMENT 'SYSTEM TABLE is built on the fly.'
@@ -289,7 +297,7 @@ CREATE TABLE system.grants
 (
     `user_name` Nullable(String),
     `role_name` Nullable(String),
-    `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'addressToLine' = 140, 'addressToLineWithInlines' = 141, 'addressToSymbol' = 142, 'demangle' = 143, 'INTROSPECTION' = 144, 'FILE' = 145, 'URL' = 146, 'REMOTE' = 147, 'MONGO' = 148, 'MEILISEARCH' = 149, 'MYSQL' = 150, 'POSTGRES' = 151, 'SQLITE' = 152, 'ODBC' = 153, 'JDBC' = 154, 'HDFS' = 155, 'S3' = 156, 'HIVE' = 157, 'SOURCES' = 158, 'CLUSTER' = 159, 'ALL' = 160, 'NONE' = 161),
+    `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'MEILISEARCH' = 150, 'MYSQL' = 151, 'POSTGRES' = 152, 'SQLITE' = 153, 'ODBC' = 154, 'JDBC' = 155, 'HDFS' = 156, 'S3' = 157, 'HIVE' = 158, 'SOURCES' = 159, 'CLUSTER' = 160, 'ALL' = 161, 'NONE' = 162),
     `database` Nullable(String),
     `table` Nullable(String),
     `column` Nullable(String),
@@ -356,6 +364,7 @@ CREATE TABLE system.merges
     `partition_id` String,
     `is_mutation` UInt8,
     `total_size_bytes_compressed` UInt64,
+    `total_size_bytes_uncompressed` UInt64,
     `total_size_marks` UInt64,
     `bytes_read_uncompressed` UInt64,
     `rows_read` UInt64,
@@ -572,10 +581,10 @@ ENGINE = SystemPartsColumns
 COMMENT 'SYSTEM TABLE is built on the fly.'
 CREATE TABLE system.privileges
 (
-    `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'addressToLine' = 140, 'addressToLineWithInlines' = 141, 'addressToSymbol' = 142, 'demangle' = 143, 'INTROSPECTION' = 144, 'FILE' = 145, 'URL' = 146, 'REMOTE' = 147, 'MONGO' = 148, 'MEILISEARCH' = 149, 'MYSQL' = 150, 'POSTGRES' = 151, 'SQLITE' = 152, 'ODBC' = 153, 'JDBC' = 154, 'HDFS' = 155, 'S3' = 156, 'HIVE' = 157, 'SOURCES' = 158, 'CLUSTER' = 159, 'ALL' = 160, 'NONE' = 161),
+    `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'MEILISEARCH' = 150, 'MYSQL' = 151, 'POSTGRES' = 152, 'SQLITE' = 153, 'ODBC' = 154, 'JDBC' = 155, 'HDFS' = 156, 'S3' = 157, 'HIVE' = 158, 'SOURCES' = 159, 'CLUSTER' = 160, 'ALL' = 161, 'NONE' = 162),
     `aliases` Array(String),
     `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)),
-    `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'addressToLine' = 140, 'addressToLineWithInlines' = 141, 'addressToSymbol' = 142, 'demangle' = 143, 'INTROSPECTION' = 144, 'FILE' = 145, 'URL' = 146, 'REMOTE' = 147, 'MONGO' = 148, 'MEILISEARCH' = 149, 'MYSQL' = 150, 'POSTGRES' = 151, 'SQLITE' = 152, 'ODBC' = 153, 'JDBC' = 154, 'HDFS' = 155, 'S3' = 156, 'HIVE' = 157, 'SOURCES' = 158, 'CLUSTER' = 159, 'ALL' = 160, 'NONE' = 161))
+    `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'MEILISEARCH' = 150, 'MYSQL' = 151, 'POSTGRES' = 152, 'SQLITE' = 153, 'ODBC' = 154, 'JDBC' = 155, 'HDFS' = 156, 'S3' = 157, 'HIVE' = 158, 'SOURCES' = 159, 'CLUSTER' = 160, 'ALL' = 161, 'NONE' = 162))
 )
 ENGINE = SystemPrivileges
 COMMENT 'SYSTEM TABLE is built on the fly.'
@@ -850,6 +859,7 @@ CREATE TABLE system.replicas
     `is_session_expired` UInt8,
     `future_parts` UInt32,
     `parts_to_check` UInt32,
+    `zookeeper_name` String,
     `zookeeper_path` String,
     `replica_name` String,
     `replica_path` String,
@@ -1117,7 +1127,7 @@ CREATE TABLE system.users
     `name` String,
     `id` UUID,
     `storage` String,
-    `auth_type` Enum8('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6),
+    `auth_type` Enum8('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7),
     `auth_params` String,
     `host_ip` Array(String),
     `host_names` Array(String),
diff --git a/tests/queries/0_stateless/02125_transform_decimal_bug.reference b/tests/queries/0_stateless/02125_transform_decimal_bug.reference
index 7f59d0ee7bf..d1bf333ec8e 100644
--- a/tests/queries/0_stateless/02125_transform_decimal_bug.reference
+++ b/tests/queries/0_stateless/02125_transform_decimal_bug.reference
@@ -1,3 +1,4 @@
+1
 0
 1
 2
diff --git a/tests/queries/0_stateless/02125_transform_decimal_bug.sql b/tests/queries/0_stateless/02125_transform_decimal_bug.sql
index 4ef471ea875..002f60076e9 100644
--- a/tests/queries/0_stateless/02125_transform_decimal_bug.sql
+++ b/tests/queries/0_stateless/02125_transform_decimal_bug.sql
@@ -1,4 +1,4 @@
-SELECT transform(1, [1], [toDecimal32(1, 2)]); -- { serverError 44 }
+SELECT transform(1, [1], [toDecimal32(1, 2)]);
 SELECT transform(toDecimal32(number, 2), [toDecimal32(3, 2)], [toDecimal32(30, 2)]) FROM system.numbers LIMIT 10;
 SELECT transform(toDecimal32(number, 2), [toDecimal32(3, 2)], [toDecimal32(30, 2)], toDecimal32(1000, 2)) FROM system.numbers LIMIT 10;
 SELECT transform(number, [3, 5, 11], [toDecimal32(30, 2), toDecimal32(50, 2), toDecimal32(70,2)], toDecimal32(1000, 2)) FROM system.numbers LIMIT 10;
diff --git a/tests/queries/0_stateless/02163_operators.sql b/tests/queries/0_stateless/02163_operators.sql
index b2414bb197e..3f2d7d8bbb7 100644
--- a/tests/queries/0_stateless/02163_operators.sql
+++ b/tests/queries/0_stateless/02163_operators.sql
@@ -1,2 +1,2 @@
-WITH 2 AS `b.c`, [4, 5] AS a, 6 AS u, 3 AS v, 2 AS d, TRUE AS e, 1 AS f, 0 AS g, 2 AS h, 'Hello' AS i, 'World' AS j, TIMESTAMP '2022-02-02 02:02:02' AS w, [] AS k, (1, 2) AS l, 2 AS m, 3 AS n, [] AS o, [1] AS p, 1 AS q, q AS r, 1 AS s, 1 AS t
+WITH 2 AS `b.c`, [4, 5] AS a, 6 AS u, 3 AS v, 2 AS d, TRUE AS e, 1 AS f, 0 AS g, 2 AS h, 'Hello' AS i, 'World' AS j, 'hi' AS w, NULL AS k, (1, 2) AS l, 2 AS m, 3 AS n, [] AS o, [1] AS p, 1 AS q, q AS r, 1 AS s, 1 AS t
 SELECT INTERVAL CASE CASE WHEN NOT -a[`b.c`] * u DIV v + d IS NOT NULL AND e OR f BETWEEN g AND h THEN i ELSE j END WHEN w THEN k END || [l, (m, n)] MINUTE IS NULL OR NOT o::Array(INT) = p <> q < r > s != t AS upyachka;
diff --git a/tests/queries/0_stateless/02169_map_functions.reference b/tests/queries/0_stateless/02169_map_functions.reference
index bec2eaec595..10746a70f06 100644
--- a/tests/queries/0_stateless/02169_map_functions.reference
+++ b/tests/queries/0_stateless/02169_map_functions.reference
@@ -40,6 +40,8 @@
 {'key1':1111,'key2':2222,'key5':500,'key6':600}
 {'key1':1112,'key2':2224,'key5':500,'key6':600}
 {'key1':1113,'key2':2226,'key5':500,'key6':600}
+{'key5':500,'key6':600}
+{'key5':500,'key6':600}
 1
 1
 1
diff --git a/tests/queries/0_stateless/02169_map_functions.sql b/tests/queries/0_stateless/02169_map_functions.sql
index 27ceb252022..febaf2bd9d0 100644
--- a/tests/queries/0_stateless/02169_map_functions.sql
+++ b/tests/queries/0_stateless/02169_map_functions.sql
@@ -11,6 +11,8 @@ SELECT mapApply((k, v) -> tuple(v + 9223372036854775806), col) FROM table_map; -
 
 SELECT mapConcat(col, map('key5', 500), map('key6', 600)) FROM table_map ORDER BY id;
 SELECT mapConcat(col, materialize(map('key5', 500)), map('key6', 600)) FROM table_map ORDER BY id;
+SELECT concat(map('key5', 500), map('key6', 600));
+SELECT map('key5', 500) || map('key6', 600);
 
 SELECT mapExists((k, v) -> k LIKE '%3', col) FROM table_map ORDER BY id;
 SELECT mapExists((k, v) -> k LIKE '%2' AND v < 1000, col) FROM table_map ORDER BY id;
diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference
index d895040ef59..2ee0f256949 100644
--- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference
+++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.reference
@@ -1,15 +1,15 @@
 Using storage policy: s3_cache
 1	0	1
 0	1	0
-0
 0	1	0
+0
 Using storage policy: local_cache
 1	0	1
 0	1	0
-0
 0	1	0
+0
 Using storage policy: azure_cache
 1	0	1
 0	1	0
-0
 0	1	0
+0
diff --git a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh
index 96e51a58cc4..f071a570243 100755
--- a/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh
+++ b/tests/queries/0_stateless/02226_filesystem_cache_profile_events.sh
@@ -64,19 +64,6 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do
     set remote_filesystem_read_method='threadpool';
     """
 
-    clickhouse client --multiquery --multiline  --query """
-    SELECT * FROM test_02226 WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null;
-
-    SET enable_filesystem_cache_on_write_operations = 1;
-
-    TRUNCATE TABLE test_02226;
-    SELECT count() FROM test_02226;
-
-    SYSTEM DROP FILESYSTEM CACHE;
-
-    INSERT INTO test_02226 SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000;
-    """
-
     query_id=$(clickhouse client --query "select queryID() from ($query) limit 1")
 
     clickhouse client --multiquery --multiline  --query """
@@ -90,7 +77,20 @@ for STORAGE_POLICY in 's3_cache' 'local_cache' 'azure_cache'; do
     AND current_database = currentDatabase()
     ORDER BY query_start_time DESC
     LIMIT 1;
-
-    DROP TABLE test_02226;
     """
+
+    clickhouse client --multiquery --multiline  --query """
+    SELECT * FROM test_02226 WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null;
+
+    SET enable_filesystem_cache_on_write_operations = 1;
+
+    TRUNCATE TABLE test_02226;
+    SELECT count() FROM test_02226;
+
+    SYSTEM DROP FILESYSTEM CACHE;
+
+    INSERT INTO test_02226 SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000;
+    """
+
+    clickhouse client --query "DROP TABLE test_02226"
 done
diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference
index d3be4855b36..cf2bf5fb521 100644
--- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference
+++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.reference
@@ -1,10 +1,60 @@
 Using storage policy: s3_cache
-0	79	80
-0	745	746
-0	745	746
-0	745	746
+0
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect no cache
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect no cache
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect no cache
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect no cache
 Using storage policy: local_cache
-0	79	80
-0	745	746
-0	745	746
-0	745	746
+0
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect no cache
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect no cache
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect no cache
+Expect cache
+DOWNLOADED	0	79	80
+DOWNLOADED	0	745	746
+2
+Expect no cache
diff --git a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh
index a487f3ca739..c7dc9fbd961 100755
--- a/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh
+++ b/tests/queries/0_stateless/02240_system_filesystem_cache_table.sh
@@ -9,34 +9,69 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 for STORAGE_POLICY in 's3_cache' 'local_cache'; do
     echo "Using storage policy: $STORAGE_POLICY"
+    ${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES"
     ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
+    ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
+    ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.filesystem_cache"
 
     ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy"
-    ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false"
+    ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false"
     ${CLICKHOUSE_CLIENT} --query "SYSTEM STOP MERGES test_02240_storage_policy"
     ${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy SELECT number, toString(number) FROM numbers(100)"
+
+    echo 'Expect cache'
+    ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
     ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null"
-    ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size"
+    ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
+    ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
+
+    echo 'Expect cache'
+    ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
+    ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null"
+    ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
+    ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
 
     ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
+    echo 'Expect no cache'
     ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
+
+    echo 'Expect cache'
+    ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
     ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy FORMAT Null"
-    ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
+    ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
+    ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
+
     ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
+    echo 'Expect no cache'
     ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
 
     ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test_02240_storage_policy_3"
-    ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false"
+    ${CLICKHOUSE_CLIENT} --query "CREATE TABLE test_02240_storage_policy_3 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='${STORAGE_POLICY}_3', min_bytes_for_wide_part = 1000000, compress_marks=false, compress_primary_key=false"
     ${CLICKHOUSE_CLIENT} --enable_filesystem_cache_on_write_operations=0 --query "INSERT INTO test_02240_storage_policy_3 SELECT number, toString(number) FROM numbers(100)"
+
+    echo 'Expect cache'
+    ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
     ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
-    ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size"
+    ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
+    ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
+
+    echo 'Expect cache'
+    ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
     ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
-    ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size"
+    ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
+    ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
+
+    echo 'Expect no cache'
+    ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
+    ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
+
+    echo 'Expect cache'
+    ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP MARK CACHE"
+    ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
+    ${CLICKHOUSE_CLIENT} --query "SELECT state, file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_begin, file_segment_range_end, size"
+    ${CLICKHOUSE_CLIENT} --query "SELECT uniqExact(key) FROM system.filesystem_cache";
 
     ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
-    ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
-    ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_02240_storage_policy_3 FORMAT Null"
-    ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
-    ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP FILESYSTEM CACHE"
+    echo 'Expect no cache'
     ${CLICKHOUSE_CLIENT} --query "SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache"
 done
diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh
index 333be806de0..a6fa0457078 100755
--- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sh
+++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sh
@@ -50,7 +50,7 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do
                                    INNER JOIN system.filesystem_cache AS caches
                                    ON data_paths.cache_path = caches.cache_path"
 
-    $CLICKHOUSE_CLIENT --query "DROP TABLE test_02286 NO DELAY"
+    $CLICKHOUSE_CLIENT --query "DROP TABLE test_02286 SYNC"
     $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache"
 
     $CLICKHOUSE_CLIENT --query "SELECT cache_path FROM system.filesystem_cache"
diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference
index c98e9d263ca..7561b32bae1 100644
--- a/tests/queries/0_stateless/02344_describe_cache.reference
+++ b/tests/queries/0_stateless/02344_describe_cache.reference
@@ -1,2 +1,2 @@
-2147483648	1048576	104857600	1	0	0	0	/var/lib/clickhouse/caches/s3_cache/	0
-2147483648	1048576	104857600	0	0	0	0	/var/lib/clickhouse/caches/s3_cache_2/	0
+134217728	1048576	104857600	1	0	0	0	/var/lib/clickhouse/caches/s3_cache/	0
+134217728	1048576	104857600	0	0	0	0	/var/lib/clickhouse/caches/s3_cache_2/	0
diff --git a/tests/queries/0_stateless/02344_show_caches.reference b/tests/queries/0_stateless/02344_show_caches.reference
deleted file mode 100644
index 2ee4f902ba1..00000000000
--- a/tests/queries/0_stateless/02344_show_caches.reference
+++ /dev/null
@@ -1,14 +0,0 @@
-cached_azure
-s3_cache_2
-s3_cache
-s3_cache_3
-s3_cache_multi
-s3_cache_4
-s3_cache_5
-s3_cache_small_segment_size
-local_cache
-s3_cache_6
-s3_cache_small
-local_cache_2
-local_cache_3
-s3_cache_multi_2
diff --git a/tests/queries/0_stateless/02344_show_caches.sql b/tests/queries/0_stateless/02344_show_caches.sql
deleted file mode 100644
index 56f00b89051..00000000000
--- a/tests/queries/0_stateless/02344_show_caches.sql
+++ /dev/null
@@ -1,2 +0,0 @@
--- Tags: no-fasttest, no-replicated-database, no-cpu-aarch64
-SHOW FILESYSTEM CACHES;
diff --git a/tests/queries/0_stateless/02378_part_log_profile_events_replicated.sql b/tests/queries/0_stateless/02378_part_log_profile_events_replicated.sql
index d61b680bb87..4f52740c498 100644
--- a/tests/queries/0_stateless/02378_part_log_profile_events_replicated.sql
+++ b/tests/queries/0_stateless/02378_part_log_profile_events_replicated.sql
@@ -1,8 +1,8 @@
 
 -- Tags: long, replica, no-replicated-database, no-parallel
 
-DROP TABLE IF EXISTS part_log_profile_events_r1 NO DELAY;
-DROP TABLE IF EXISTS part_log_profile_events_r2 NO DELAY;
+DROP TABLE IF EXISTS part_log_profile_events_r1 SYNC;
+DROP TABLE IF EXISTS part_log_profile_events_r2 SYNC;
 
 CREATE TABLE part_log_profile_events_r1 (x UInt64)
 ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_02378/part_log_profile_events', 'r1')
@@ -36,5 +36,5 @@ WHERE event_time > now() - INTERVAL 10 MINUTE
     AND event_type == 'DownloadPart'
 ;
 
-DROP TABLE part_log_profile_events_r1 NO DELAY;
-DROP TABLE part_log_profile_events_r2 NO DELAY;
+DROP TABLE part_log_profile_events_r1 SYNC;
+DROP TABLE part_log_profile_events_r2 SYNC;
diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
index 3688a649d5e..88fb2cdf9b1 100644
--- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
+++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
@@ -1,5 +1,7 @@
 -- Tags: no-fasttest
 
+SET send_logs_level = 'fatal';
+
 drop table if exists rmt;
 drop table if exists rmt2;
 
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.reference b/tests/queries/0_stateless/02447_drop_database_replica.reference
index 1d65fe66c6e..f2b41569540 100644
--- a/tests/queries/0_stateless/02447_drop_database_replica.reference
+++ b/tests/queries/0_stateless/02447_drop_database_replica.reference
@@ -6,10 +6,16 @@ t
 2
 2
 2
-rdb_default	1	1
-rdb_default	1	2
 2
 2
 2
+2
+rdb_default	1	1	s1	r1	1
+2
+2
+rdb_default	1	1	s1	r1	1
+rdb_default	1	2	s1	r2	0
+2
+2
 t
-rdb_default_3	1	1
+rdb_default_4	1	1	s1	r1	1
diff --git a/tests/queries/0_stateless/02447_drop_database_replica.sh b/tests/queries/0_stateless/02447_drop_database_replica.sh
index 4bfd6243c2e..47a6cf10bda 100755
--- a/tests/queries/0_stateless/02447_drop_database_replica.sh
+++ b/tests/queries/0_stateless/02447_drop_database_replica.sh
@@ -13,35 +13,49 @@ $CLICKHOUSE_CLIENT -q "show tables from $db"
 
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from table t" 2>&1| grep -Fac "SYNTAX_ERROR"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db" 2>&1| grep -Fac "There is a local database"
+$CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's1' from database $db" 2>&1| grep -Fac "There is a local database"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "There is a local database"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'" 2>&1| grep -Fac "There is a local database"
+$CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'" 2>&1| grep -Fac "There is a local database"
 
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/'" 2>&1| grep -Fac "does not look like a path of Replicated database"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's2|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "does not exist"
+$CLICKHOUSE_CLIENT -q "system drop database replica 's1' from shard 'r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "does not exist"
+$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from shard 's1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "does not exist"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's2/r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb'" 2>&1| grep -Fac "Invalid replica name"
 
 db2="${db}_2"
+db3="${db}_3"
 $CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db2 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r2')"
+$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's2', 'r1')"
 $CLICKHOUSE_CLIENT -q "system sync database replica $db"
-$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num from system.clusters where cluster='$db' order by shard_num, replica_num"
+$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db' and shard_num=1 and replica_num=1"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from database $db2" 2>&1| grep -Fac "is active, cannot drop it"
 
+$CLICKHOUSE_CLIENT -q "detach database $db3"
+$CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's2' from database $db"
+$CLICKHOUSE_CLIENT -q "attach database $db3" 2>/dev/null
+$CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db3.t2 as system.query_log" 2>&1| grep -Fac "Database is in readonly mode"   # Suppress style check: current_database=$CLICKHOUSE_DATABASE
+
 $CLICKHOUSE_CLIENT -q "detach database $db2"
+$CLICKHOUSE_CLIENT -q "system sync database replica $db"
+$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db' order by shard_num, replica_num"
 $CLICKHOUSE_CLIENT -q "system drop database replica 's1|r2' from database $db"
 $CLICKHOUSE_CLIENT -q "attach database $db2" 2>/dev/null
 $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db2.t2 as system.query_log" 2>&1| grep -Fac "Database is in readonly mode"   # Suppress style check: current_database=$CLICKHOUSE_DATABASE
 
 $CLICKHOUSE_CLIENT -q "detach database $db"
-$CLICKHOUSE_CLIENT -q "system drop database replica 's1|r1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'"
+$CLICKHOUSE_CLIENT -q "system drop database replica 'r1' from shard 's1' from zkpath '/test/$CLICKHOUSE_DATABASE/rdb/'"
 $CLICKHOUSE_CLIENT -q "attach database $db" 2>/dev/null
 $CLICKHOUSE_CLIENT --distributed_ddl_output_mode=none -q "create table $db.t2 as system.query_log" 2>&1| grep -Fac "Database is in readonly mode"   # Suppress style check: current_database=$CLICKHOUSE_DATABASE
 $CLICKHOUSE_CLIENT -q "show tables from $db"
 
-db3="${db}_3"
-$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db3 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')"
-$CLICKHOUSE_CLIENT -q "system sync database replica $db3"
-$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num from system.clusters where cluster='$db3'"
+db4="${db}_4"
+$CLICKHOUSE_CLIENT --allow_experimental_database_replicated=1 -q "create database $db4 engine=Replicated('/test/$CLICKHOUSE_DATABASE/rdb', 's1', 'r1')"
+$CLICKHOUSE_CLIENT -q "system sync database replica $db4"
+$CLICKHOUSE_CLIENT -q "select cluster, shard_num, replica_num, database_shard_name, database_replica_name, is_active from system.clusters where cluster='$db4'"
 
 $CLICKHOUSE_CLIENT -q "drop database $db"
 $CLICKHOUSE_CLIENT -q "drop database $db2"
 $CLICKHOUSE_CLIENT -q "drop database $db3"
+$CLICKHOUSE_CLIENT -q "drop database $db4"
diff --git a/tests/queries/0_stateless/02456_async_inserts_logs.reference b/tests/queries/0_stateless/02456_async_inserts_logs.reference
index efd8a88eca4..ba1b19fb184 100644
--- a/tests/queries/0_stateless/02456_async_inserts_logs.reference
+++ b/tests/queries/0_stateless/02456_async_inserts_logs.reference
@@ -1,7 +1,10 @@
 5
-	Values	21	1	Ok	1
-t_async_inserts_logs	JSONEachRow	39	1	Ok	1
-t_async_inserts_logs	Values	8	1	Ok	1
-t_async_inserts_logs	JSONEachRow	6	0	ParsingError	1
-t_async_inserts_logs	Values	6	0	ParsingError	1
-t_async_inserts_logs	Values	8	0	FlushError	1
+	Values	21	2	1	Ok	1
+t_async_inserts_logs	JSONEachRow	39	2	1	Ok	1
+t_async_inserts_logs	Values	8	1	1	Ok	1
+t_async_inserts_logs	JSONEachRow	6	0	0	ParsingError	1
+t_async_inserts_logs	Values	6	0	0	ParsingError	1
+t_async_inserts_logs	Values	8	1	0	FlushError	1
+AsyncInsertBytes	1
+AsyncInsertQuery	1
+AsyncInsertRows	1
diff --git a/tests/queries/0_stateless/02456_async_inserts_logs.sh b/tests/queries/0_stateless/02456_async_inserts_logs.sh
index 006455e2d42..43cd73d7231 100755
--- a/tests/queries/0_stateless/02456_async_inserts_logs.sh
+++ b/tests/queries/0_stateless/02456_async_inserts_logs.sh
@@ -30,10 +30,15 @@ ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_async_inserts_logs"
 
 ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
 ${CLICKHOUSE_CLIENT} -q "
-    SELECT table, format, bytes, empty(exception), status,
+    SELECT table, format, bytes, rows, empty(exception), status,
     status = 'ParsingError' ? flush_time_microseconds = 0 : flush_time_microseconds > event_time_microseconds AS time_ok
     FROM system.asynchronous_insert_log
     WHERE database = '$CLICKHOUSE_DATABASE' OR query ILIKE 'INSERT INTO FUNCTION%$CLICKHOUSE_DATABASE%'
     ORDER BY table, status, format"
 
 ${CLICKHOUSE_CLIENT} -q "DROP TABLE t_async_inserts_logs"
+
+${CLICKHOUSE_CLIENT} -q "
+SELECT event, value > 0 FROM system.events
+WHERE event IN ('AsyncInsertQuery', 'AsyncInsertBytes', 'AsyncInsertRows')
+ORDER BY event"
diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.reference b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.reference
new file mode 100644
index 00000000000..ed6ac232d9c
--- /dev/null
+++ b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.reference
@@ -0,0 +1,2 @@
+a	\N
+1	1	\N
diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.sql b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.sql
new file mode 100644
index 00000000000..2d56e315bd1
--- /dev/null
+++ b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.sql
@@ -0,0 +1,11 @@
+drop table if exists test_table;
+create table test_table (A Nullable(String), B Nullable(String)) engine MergeTree order by (A,B) settings index_granularity = 1, allow_nullable_key=1;
+insert into test_table values ('a', 'b'), ('a', null), (null, 'b');
+select * from test_table where B is null;
+drop table test_table;
+
+DROP TABLE IF EXISTS dm_metric_small2;
+CREATE TABLE dm_metric_small2 (`x` Nullable(Int64), `y` Nullable(Int64), `z` Nullable(Int64)) ENGINE = MergeTree() ORDER BY (x, y, z) SETTINGS index_granularity = 1, allow_nullable_key = 1;
+INSERT INTO dm_metric_small2 VALUES (1,1,NULL) (1,1,1) (1,2,0) (1,2,1) (1,2,NULL) (1,2,NULL);
+SELECT * FROM dm_metric_small2 WHERE (x = 1) AND (y = 1) AND z IS NULL;
+DROP TABLE dm_metric_small2;
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.reference b/tests/queries/0_stateless/02479_nullable_primary_key_second_column.reference
deleted file mode 100644
index f0227e1a41e..00000000000
--- a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.reference
+++ /dev/null
@@ -1 +0,0 @@
-a	\N
diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.sql b/tests/queries/0_stateless/02479_nullable_primary_key_second_column.sql
deleted file mode 100644
index ad0c09222c2..00000000000
--- a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.sql
+++ /dev/null
@@ -1,9 +0,0 @@
-drop table if exists test_table;
-
-create table test_table (A Nullable(String), B Nullable(String)) engine MergeTree order by (A,B) settings index_granularity = 1, allow_nullable_key=1;
-
-insert into test_table values ('a', 'b'), ('a', null), (null, 'b');
-
-select * from test_table where B is null;
-
-drop table test_table;
diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python
index 1be2b673b73..9fd82da1038 100644
--- a/tests/queries/0_stateless/02481_async_insert_dedup.python
+++ b/tests/queries/0_stateless/02481_async_insert_dedup.python
@@ -15,7 +15,7 @@ from pure_http_client import ClickHouseClient
 client = ClickHouseClient()
 
 # test table without partition
-client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY")
+client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part SYNC")
 client.query(
     """
 CREATE TABLE t_async_insert_dedup_no_part (
@@ -35,7 +35,7 @@ client.query(
 )
 result = client.query("select count(*) from t_async_insert_dedup_no_part")
 print(result, flush=True)
-client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY")
+client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part SYNC")
 
 
 # generate data and push to queue
@@ -95,7 +95,7 @@ def fetch_and_insert_data(q, client):
 
 
 # main process
-client.query("DROP TABLE IF EXISTS t_async_insert_dedup NO DELAY")
+client.query("DROP TABLE IF EXISTS t_async_insert_dedup SYNC")
 client.query(
     """
 CREATE TABLE t_async_insert_dedup (
@@ -161,6 +161,6 @@ result = int(result.split()[0])
 if result <= 0:
     raise Exception(f"AsyncInsertCacheHits should > 0, but got {result}")
 
-client.query("DROP TABLE IF EXISTS t_async_insert_dedup NO DELAY")
+client.query("DROP TABLE IF EXISTS t_async_insert_dedup SYNC")
 
 os._exit(os.EX_OK)
diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference
index f5284f38b86..a1a653361ee 100644
--- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference
+++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference
@@ -405,16 +405,6 @@ QUERY id: 0
     TABLE id: 7, table_name: system.numbers
   LIMIT
     CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt64
-\N
-\N
-\N
-\N
-\N
-\N
-\N
-\N
-\N
-\N
 SELECT transform(number, [NULL], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\'))
 FROM
 (
@@ -424,56 +414,38 @@ FROM
 )
 QUERY id: 0
   PROJECTION COLUMNS
-    transform(number, [NULL], [\'google\', \'censor.net\', \'yahoo\'], \'other\') Nullable(Nothing)
+    transform(number, [NULL], [\'google\', \'censor.net\', \'yahoo\'], \'other\') String
   PROJECTION
     LIST id: 1, nodes: 1
-      FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: Nullable(Nothing)
+      FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String
         ARGUMENTS
-          LIST id: 3, nodes: 4
-            COLUMN id: 4, column_name: number, result_type: Nullable(Nothing), source_id: 5
-            CONSTANT id: 6, constant_value: Array_[NULL], constant_value_type: Array(Nullable(Nothing))
-            CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String)
-            CONSTANT id: 8, constant_value: \'other\', constant_value_type: String
+          LIST id: 3, nodes: 1
+            FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)
+              ARGUMENTS
+                LIST id: 5, nodes: 4
+                  COLUMN id: 6, column_name: number, result_type: Nullable(Nothing), source_id: 7
+                  CONSTANT id: 8, constant_value: Array_[NULL], constant_value_type: Array(Nullable(Nothing))
+                  FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4))
+                    ARGUMENTS
+                      LIST id: 10, nodes: 2
+                        CONSTANT id: 11, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String)
+                        CONSTANT id: 12, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String
+                  FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)
+                    ARGUMENTS
+                      LIST id: 14, nodes: 2
+                        CONSTANT id: 15, constant_value: \'other\', constant_value_type: String
+                        CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String
   JOIN TREE
-    QUERY id: 5, is_subquery: 1
+    QUERY id: 7, is_subquery: 1
       PROJECTION COLUMNS
         number Nullable(Nothing)
       PROJECTION
-        LIST id: 9, nodes: 1
-          CONSTANT id: 10, constant_value: NULL, constant_value_type: Nullable(Nothing)
+        LIST id: 17, nodes: 1
+          CONSTANT id: 18, constant_value: NULL, constant_value_type: Nullable(Nothing)
       JOIN TREE
-        TABLE id: 11, table_name: system.numbers
+        TABLE id: 19, table_name: system.numbers
       LIMIT
-        CONSTANT id: 12, constant_value: UInt64_10, constant_value_type: UInt64
-\N
-\N
-\N
-\N
-\N
-\N
-\N
-\N
-\N
-\N
-SELECT transform(number, NULL, _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\'))
-FROM system.numbers
-LIMIT 10
-QUERY id: 0
-  PROJECTION COLUMNS
-    transform(number, NULL, [\'google\', \'censor.net\', \'yahoo\'], \'other\') Nullable(Nothing)
-  PROJECTION
-    LIST id: 1, nodes: 1
-      FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: Nullable(Nothing)
-        ARGUMENTS
-          LIST id: 3, nodes: 4
-            COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5
-            CONSTANT id: 6, constant_value: NULL, constant_value_type: Nullable(Nothing)
-            CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String)
-            CONSTANT id: 8, constant_value: \'other\', constant_value_type: String
-  JOIN TREE
-    TABLE id: 5, table_name: system.numbers
-  LIMIT
-    CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt64
+        CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt64
 other
 other
 google
diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql
index c23046c7b20..492d42cb6bc 100644
--- a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql
+++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql
@@ -33,13 +33,13 @@ SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other')
 EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10;
 EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10;
 
-SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10);
+SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); -- { serverError 36 }
 EXPLAIN SYNTAX SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10);
 EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10);
 
-SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10;
-EXPLAIN SYNTAX SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10;
-EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10;
+SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -- { serverError 43 }
+EXPLAIN SYNTAX SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -- { serverError 43 }
+EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -- { serverError 43 }
 
 SET optimize_if_transform_strings_to_enum = 0;
 
diff --git a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh
index 918adc12de6..ed66c36b823 100755
--- a/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh
+++ b/tests/queries/0_stateless/02503_cache_on_write_with_small_segment_size.sh
@@ -33,5 +33,5 @@ select count() from system.filesystem_cache_log where query_id = '$query_id' AND
 
 ${CLICKHOUSE_CLIENT} --multiline --multiquery -q "
 select count() from ttt;
-drop table ttt no delay;
+drop table ttt sync;
 "
diff --git a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh
index 9e22089d5e1..458a5e95faa 100755
--- a/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh
+++ b/tests/queries/0_stateless/02515_cleanup_async_insert_block_ids.sh
@@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 CLICKHOUSE_TEST_ZOOKEEPER_PREFIX="${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}"
 
 $CLICKHOUSE_CLIENT -n --query "
-    DROP TABLE IF EXISTS t_async_insert_cleanup NO DELAY;
+    DROP TABLE IF EXISTS t_async_insert_cleanup SYNC;
     CREATE TABLE t_async_insert_cleanup (
         KeyID UInt32
     ) Engine = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup', '{replica}')
@@ -27,7 +27,7 @@ old_answer=$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper W
 for i in {1..300}; do
     answer=$($CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'")
     if [ $answer == '10' ]; then
-        $CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup NO DELAY;"
+        $CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup SYNC;"
         exit 0
     fi
     sleep 1
@@ -36,4 +36,4 @@ done
 $CLICKHOUSE_CLIENT --query "SELECT count(*) FROM t_async_insert_cleanup"
 echo $old_answer
 $CLICKHOUSE_CLIENT --query "SELECT count(*) FROM system.zookeeper WHERE path like '/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/t_async_insert_cleanup/async_blocks%' settings allow_unrestricted_reads_from_keeper = 'true'"
-$CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup NO DELAY;"
+$CLICKHOUSE_CLIENT -n --query "DROP TABLE t_async_insert_cleanup SYNC;"
diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference
index fd0b223f8e5..19da8828c30 100644
--- a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference
+++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.reference
@@ -2,6 +2,10 @@
 1
 
 0
+1
+1
+
+1
 \N
 
 100000000000000000000
diff --git a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql
index b6e60aa2e1f..6b58d737a3e 100644
--- a/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql
+++ b/tests/queries/0_stateless/02516_join_with_totals_and_subquery_bug.sql
@@ -1,3 +1,5 @@
+SET allow_experimental_analyzer = 1;
+
 SELECT *
 FROM
 (
@@ -12,7 +14,26 @@ INNER JOIN
     SELECT 1
     GROUP BY 1
         WITH TOTALS
-) AS t2 USING (a);
+) AS t2 USING (a)
+SETTINGS allow_experimental_analyzer=0;
+
+SELECT *
+FROM
+(
+    SELECT 1 AS a
+) AS t1
+INNER JOIN
+(
+    SELECT 1 AS a
+    GROUP BY 1
+        WITH TOTALS
+    UNION ALL
+    SELECT 1
+    GROUP BY 1
+        WITH TOTALS
+) AS t2 USING (a)
+SETTINGS allow_experimental_analyzer=1;
+
 
 SELECT a
 FROM
diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference
index c7a02045316..5e50b9e6cbf 100644
--- a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference
+++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference
@@ -5,18 +5,33 @@
 3	Hello World	not_found	x	Hello World
 4	Hello World	[eo]	x	Hello World
 5	Hello World	.	x	Hello World
+1	Hello World	l	x	Hexxo Worxd
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hello World
+5	Hello World	.	x	Hello World
 - const needle, non-const replacement
 1	Hello World	l	xx	Hexxxxo Worxxd
 2	Hello World	l	x	Hexxo Worxd
 3	Hello World	l	x	Hexxo Worxd
 4	Hello World	l	x	Hexxo Worxd
 5	Hello World	l	x	Hexxo Worxd
+1	Hello World	l	xx	Hexxxxo Worxxd
+2	Hello World	l	x	Hexxo Worxd
+3	Hello World	l	x	Hexxo Worxd
+4	Hello World	l	x	Hexxo Worxd
+5	Hello World	l	x	Hexxo Worxd
 - non-const needle, non-const replacement
 1	Hello World	l	xx	Hexxxxo Worxxd
 2	Hello World	ll	x	Hexo World
 3	Hello World	not_found	x	Hello World
 4	Hello World	[eo]	x	Hello World
 5	Hello World	.	x	Hello World
+1	Hello World	l	xx	Hexxxxo Worxxd
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hello World
+5	Hello World	.	x	Hello World
 ** replaceOne() **
 - non-const needle, const replacement
 1	Hello World	l	x	Hexlo World
@@ -24,18 +39,33 @@
 3	Hello World	not_found	x	Hello World
 4	Hello World	[eo]	x	Hello World
 5	Hello World	.	x	Hello World
+1	Hello World	l	x	Hexlo World
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hello World
+5	Hello World	.	x	Hello World
 - const needle, non-const replacement
 1	Hello World	l	xx	Hexxlo World
 2	Hello World	l	x	Hexlo World
 3	Hello World	l	x	Hexlo World
 4	Hello World	l	x	Hexlo World
 5	Hello World	l	x	Hexlo World
+1	Hello World	l	xx	Hexxlo World
+2	Hello World	l	x	Hexlo World
+3	Hello World	l	x	Hexlo World
+4	Hello World	l	x	Hexlo World
+5	Hello World	l	x	Hexlo World
 - non-const needle, non-const replacement
 1	Hello World	l	xx	Hexxlo World
 2	Hello World	ll	x	Hexo World
 3	Hello World	not_found	x	Hello World
 4	Hello World	[eo]	x	Hello World
 5	Hello World	.	x	Hello World
+1	Hello World	l	xx	Hexxlo World
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hello World
+5	Hello World	.	x	Hello World
 ** replaceRegexpAll() **
 - non-const needle, const replacement
 1	Hello World	l	x	Hexxo Worxd
@@ -43,18 +73,33 @@
 3	Hello World	not_found	x	Hello World
 4	Hello World	[eo]	x	Hxllx Wxrld
 5	Hello World	.	x	xxxxxxxxxxx
+1	Hello World	l	x	Hexxo Worxd
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hxllx Wxrld
+5	Hello World	.	x	xxxxxxxxxxx
 - const needle, non-const replacement
 1	Hello World	l	xx	Hexxxxo Worxxd
 2	Hello World	l	x	Hexxo Worxd
 3	Hello World	l	x	Hexxo Worxd
 4	Hello World	l	x	Hexxo Worxd
 5	Hello World	l	x	Hexxo Worxd
+1	Hello World	l	xx	Hexxxxo Worxxd
+2	Hello World	l	x	Hexxo Worxd
+3	Hello World	l	x	Hexxo Worxd
+4	Hello World	l	x	Hexxo Worxd
+5	Hello World	l	x	Hexxo Worxd
 - non-const needle, non-const replacement
 1	Hello World	l	xx	Hexxxxo Worxxd
 2	Hello World	ll	x	Hexo World
 3	Hello World	not_found	x	Hello World
 4	Hello World	[eo]	x	Hxllx Wxrld
 5	Hello World	.	x	xxxxxxxxxxx
+1	Hello World	l	xx	Hexxxxo Worxxd
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hxllx Wxrld
+5	Hello World	.	x	xxxxxxxxxxx
 ** replaceRegexpOne() **
 - non-const needle, const replacement
 1	Hello World	l	x	Hexlo World
@@ -62,16 +107,31 @@
 3	Hello World	not_found	x	Hello World
 4	Hello World	[eo]	x	Hxllo World
 5	Hello World	.	x	xello World
+1	Hello World	l	x	Hexlo World
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hxllo World
+5	Hello World	.	x	xello World
 - const needle, non-const replacement
 1	Hello World	l	xx	Hexxlo World
 2	Hello World	l	x	Hexlo World
 3	Hello World	l	x	Hexlo World
 4	Hello World	l	x	Hexlo World
 5	Hello World	l	x	Hexlo World
+1	Hello World	l	xx	Hexxlo World
+2	Hello World	l	x	Hexlo World
+3	Hello World	l	x	Hexlo World
+4	Hello World	l	x	Hexlo World
+5	Hello World	l	x	Hexlo World
 - non-const needle, non-const replacement
 1	Hello World	l	xx	Hexxlo World
 2	Hello World	ll	x	Hexo World
 3	Hello World	not_found	x	Hello World
 4	Hello World	[eo]	x	Hxllo World
 5	Hello World	.	x	xello World
+1	Hello World	l	xx	Hexxlo World
+2	Hello World	ll	x	Hexo World
+3	Hello World	not_found	x	Hello World
+4	Hello World	[eo]	x	Hxllo World
+5	Hello World	.	x	xello World
 Check that an exception is thrown if the needle is empty
diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql
index 7406f0309bb..926bde3a74b 100644
--- a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql
+++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql
@@ -9,53 +9,63 @@ CREATE TABLE test_tab
 
 INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'xx') (2, 'Hello World', 'll', 'x') (3, 'Hello World', 'not_found', 'x') (4, 'Hello World', '[eo]', 'x') (5, 'Hello World', '.', 'x')
 
+
 SELECT '** replaceAll() **';
 
 SELECT '- non-const needle, const replacement';
 SELECT id, haystack, needle, 'x', replaceAll(haystack, needle, 'x') FROM test_tab ORDER BY id;
+SELECT id, haystack, needle, 'x', replaceAll('Hello World', needle, 'x') FROM test_tab ORDER BY id;
 
 SELECT '- const needle, non-const replacement';
 SELECT id, haystack, 'l', replacement, replaceAll(haystack, 'l', replacement) FROM test_tab ORDER BY id;
+SELECT id, haystack, 'l', replacement, replaceAll('Hello World', 'l', replacement) FROM test_tab ORDER BY id;
 
 SELECT '- non-const needle, non-const replacement';
 SELECT id, haystack, needle, replacement, replaceAll(haystack, needle, replacement) FROM test_tab ORDER BY id;
+SELECT id, haystack, needle, replacement, replaceAll('Hello World', needle, replacement) FROM test_tab ORDER BY id;
+
 
 SELECT '** replaceOne() **';
 
 SELECT '- non-const needle, const replacement';
 SELECT id, haystack, needle, 'x', replaceOne(haystack, needle, 'x') FROM test_tab ORDER BY id;
-
+SELECT id, haystack, needle, 'x', replaceOne('Hello World', needle, 'x') FROM test_tab ORDER BY id;
 
 SELECT '- const needle, non-const replacement';
 SELECT id, haystack, 'l', replacement, replaceOne(haystack, 'l', replacement) FROM test_tab ORDER BY id;
-
+SELECT id, haystack, 'l', replacement, replaceOne('Hello World', 'l', replacement) FROM test_tab ORDER BY id;
 
 SELECT '- non-const needle, non-const replacement';
 SELECT id, haystack, needle, replacement, replaceOne(haystack, needle, replacement) FROM test_tab ORDER BY id;
+SELECT id, haystack, needle, replacement, replaceOne('Hello World', needle, replacement) FROM test_tab ORDER BY id;
 
 SELECT '** replaceRegexpAll() **';
 
 SELECT '- non-const needle, const replacement';
 SELECT id, haystack, needle, 'x', replaceRegexpAll(haystack, needle, 'x') FROM test_tab ORDER BY id;
+SELECT id, haystack, needle, 'x', replaceRegexpAll('Hello World', needle, 'x') FROM test_tab ORDER BY id;
 
 SELECT '- const needle, non-const replacement';
 SELECT id, haystack, 'l', replacement, replaceRegexpAll(haystack, 'l', replacement) FROM test_tab ORDER BY id;
+SELECT id, haystack, 'l', replacement, replaceRegexpAll('Hello World', 'l', replacement) FROM test_tab ORDER BY id;
 
 SELECT '- non-const needle, non-const replacement';
 SELECT id, haystack, needle, replacement, replaceRegexpAll(haystack, needle, replacement) FROM test_tab ORDER BY id;
+SELECT id, haystack, needle, replacement, replaceRegexpAll('Hello World', needle, replacement) FROM test_tab ORDER BY id;
 
 SELECT '** replaceRegexpOne() **';
 
 SELECT '- non-const needle, const replacement';
 SELECT id, haystack, needle, 'x', replaceRegexpOne(haystack, needle, 'x') FROM test_tab ORDER BY id;
-
+SELECT id, haystack, needle, 'x', replaceRegexpOne('Hello World', needle, 'x') FROM test_tab ORDER BY id;
 
 SELECT '- const needle, non-const replacement';
 SELECT id, haystack, 'l', replacement, replaceRegexpOne(haystack, 'l', replacement) FROM test_tab ORDER BY id;
-
+SELECT id, haystack, 'l', replacement, replaceRegexpOne('Hello World', 'l', replacement) FROM test_tab ORDER BY id;
 
 SELECT '- non-const needle, non-const replacement';
 SELECT id, haystack, needle, replacement, replaceRegexpOne(haystack, needle, replacement) FROM test_tab ORDER BY id;
+SELECT id, haystack, needle, replacement, replaceRegexpOne('Hello World', needle, replacement) FROM test_tab ORDER BY id;
 
 DROP TABLE IF EXISTS test_tab;
 
diff --git a/tests/queries/0_stateless/02542_case_no_else.reference b/tests/queries/0_stateless/02542_case_no_else.reference
new file mode 100644
index 00000000000..8f3fdf29168
--- /dev/null
+++ b/tests/queries/0_stateless/02542_case_no_else.reference
@@ -0,0 +1,3 @@
+2
+1	Z
+1	Z
diff --git a/tests/queries/0_stateless/02542_case_no_else.sql b/tests/queries/0_stateless/02542_case_no_else.sql
new file mode 100644
index 00000000000..0c7975a750e
--- /dev/null
+++ b/tests/queries/0_stateless/02542_case_no_else.sql
@@ -0,0 +1,14 @@
+SELECT CASE 1 WHEN 1 THEN 2 END;
+
+SELECT id,
+    CASE id
+         WHEN 1 THEN 'Z'
+    END x
+FROM  (SELECT 1 as id);
+
+SELECT id,
+       CASE id
+            WHEN 1 THEN 'Z'
+            ELSE 'X'
+     END x
+FROM  (SELECT 1 as id);
diff --git a/tests/queries/0_stateless/02542_transform_new.reference b/tests/queries/0_stateless/02542_transform_new.reference
new file mode 100644
index 00000000000..b6eaa692c41
--- /dev/null
+++ b/tests/queries/0_stateless/02542_transform_new.reference
@@ -0,0 +1,32 @@
+1
+1
+1
+1
+9
+9
+\N
+7
+1
+9
+7
+b
+b
+b
+b
+a
+a
+\N
+c
+sep1
+80000
+80000
+sep2
+80000
+80000
+sep3
+1
+sep4
+8000
+sep5
+8000
+sep6
diff --git a/tests/queries/0_stateless/02542_transform_new.sql b/tests/queries/0_stateless/02542_transform_new.sql
new file mode 100644
index 00000000000..43da0a50731
--- /dev/null
+++ b/tests/queries/0_stateless/02542_transform_new.sql
@@ -0,0 +1,35 @@
+select transform(2, [1,2], [9,1], materialize(null));
+select transform(2, [1,2], [9,1], materialize(7));
+select transform(2, [1,2], [9,1], null);
+select transform(2, [1,2], [9,1], 7);
+select transform(1, [1,2], [9,1], null);
+select transform(1, [1,2], [9,1], 7);
+select transform(5, [1,2], [9,1], null);
+select transform(5, [1,2], [9,1], 7);
+select transform(2, [1,2], [9,1]);
+select transform(1, [1,2], [9,1]);
+select transform(7, [1,2], [9,1]);
+
+select transform(2, [1,2], ['a','b'], materialize(null));
+select transform(2, [1,2], ['a','b'], materialize('c'));
+select transform(2, [1,2], ['a','b'], null);
+select transform(2, [1,2], ['a','b'], 'c');
+select transform(1, [1,2], ['a','b'], null);
+select transform(1, [1,2], ['a','b'], 'c');
+select transform(5, [1,2], ['a','b'], null);
+select transform(5, [1,2], ['a','b'], 'c');
+
+select 'sep1';
+SELECT transform(number, [2], [toDecimal32(1, 1)], materialize(80000)) as x FROM numbers(2);
+select 'sep2';
+SELECT transform(number, [2], [toDecimal32(1, 1)], 80000) as x FROM numbers(2);
+select 'sep3';
+SELECT transform(toDecimal32(2, 1), [toDecimal32(2, 1)], [1]);
+select 'sep4';
+SELECT transform(8000, [1], [toDecimal32(2, 1)]);
+select 'sep5';
+SELECT transform(toDecimal32(8000,0), [1], [toDecimal32(2, 1)]);
+select 'sep6';
+SELECT transform(-9223372036854775807, [-1], [toDecimal32(1024, 3)]) FROM system.numbers LIMIT 7; -- { serverError BAD_ARGUMENTS }
+SELECT [NULL, NULL, NULL, NULL], transform(number, [2147483648], [toDecimal32(1, 2)]) AS x FROM numbers(257) WHERE materialize(10); -- { serverError BAD_ARGUMENTS }
+SELECT transform(-2147483649, [1], [toDecimal32(1, 2)]) GROUP BY [1] WITH TOTALS; -- { serverError BAD_ARGUMENTS }
diff --git a/tests/queries/0_stateless/02542_transform_old.reference b/tests/queries/0_stateless/02542_transform_old.reference
new file mode 100644
index 00000000000..d03b17d40a3
--- /dev/null
+++ b/tests/queries/0_stateless/02542_transform_old.reference
@@ -0,0 +1,72 @@
+google
+other
+yahoo
+yandex
+#1
+20
+21
+22
+29
+#2
+0
+1
+3
+5
+7
+8
+9
+20
+21
+29
+#3
+20
+21
+22
+29
+#4
+google
+other
+yahoo
+yandex
+#5
+0
+1
+3
+5
+7
+8
+9
+google
+yahoo
+yandex
+----
+google
+other
+yahoo
+yandex
+#1
+20
+21
+22
+29
+#3
+20
+21
+22
+29
+#4
+google
+other
+yahoo
+yandex
+----
+2000
+2100
+2200
+2900
+#1
+2000
+2100
+2200
+2900
+----
diff --git a/tests/queries/0_stateless/02542_transform_old.sql b/tests/queries/0_stateless/02542_transform_old.sql
new file mode 100644
index 00000000000..01a960ec367
--- /dev/null
+++ b/tests/queries/0_stateless/02542_transform_old.sql
@@ -0,0 +1,25 @@
+SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '#1';
+SELECT transform(number, [2, 4, 6], [29, 20, 21], 22) as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '#2';
+SELECT transform(number, [2, 4, 6], [29, 20, 21]) as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '#3';
+SELECT transform(toString(number), ['2', '4', '6'], [29, 20, 21], 22) as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '#4';
+SELECT transform(toString(number), ['2', '4', '6'], ['google', 'yandex', 'yahoo'], 'other') as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '#5';
+SELECT transform(toString(number), ['2', '4', '6'], ['google', 'yandex', 'yahoo']) as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '----';
+SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], materialize('other')) as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '#1';
+SELECT transform(number, [2, 4, 6], [29, 20, 21], materialize(22)) as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '#3';
+SELECT transform(toString(number), ['2', '4', '6'], [29, 20, 21], materialize(22)) as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '#4';
+SELECT transform(toString(number), ['2', '4', '6'], ['google', 'yandex', 'yahoo'], materialize('other')) as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '----';
+SELECT transform(number, [2, 4, 6], [2900, 2000, 2100], 2200) as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '#1';
+SELECT transform(number, [2, 4, 6], [2900, 2000, 2100], materialize(2200)) as x FROM numbers(10) GROUP BY x ORDER BY x;
+SELECT '----';
+SELECT transform(number, [1], [null]) FROM system.numbers LIMIT 1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
diff --git a/tests/queries/0_stateless/02560_window_ntile.reference b/tests/queries/0_stateless/02560_window_ntile.reference
index cae0586fa8c..1045fc1011a 100644
--- a/tests/queries/0_stateless/02560_window_ntile.reference
+++ b/tests/queries/0_stateless/02560_window_ntile.reference
@@ -22,7 +22,28 @@ select a, b, ntile(3) over (partition by a order by b rows between unbounded pre
 1	7	3
 1	8	3
 1	9	3
-select a, b, ntile(2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
+select a, b, ntile(3) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
+0	0	1
+0	1	1
+0	2	1
+0	3	1
+0	4	2
+0	5	2
+0	6	2
+0	7	3
+0	8	3
+0	9	3
+1	0	1
+1	1	1
+1	2	1
+1	3	1
+1	4	2
+1	5	2
+1	6	2
+1	7	3
+1	8	3
+1	9	3
+select a, b, ntile(2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
 0	0	1
 0	1	1
 0	2	1
@@ -43,7 +64,7 @@ select a, b, ntile(2) over (partition by a order by b rows between unbounded pre
 1	7	2
 1	8	2
 1	9	2
-select a, b, ntile(1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
+select a, b, ntile(1) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
 0	0	1
 0	1	1
 0	2	1
@@ -64,7 +85,7 @@ select a, b, ntile(1) over (partition by a order by b rows between unbounded pre
 1	7	1
 1	8	1
 1	9	1
-select a, b, ntile(100) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
+select a, b, ntile(100) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
 0	0	1
 0	1	2
 0	2	3
@@ -85,7 +106,7 @@ select a, b, ntile(100) over (partition by a order by b rows between unbounded p
 1	7	8
 1	8	9
 1	9	10
-select a, b, ntile(65535) over (partition by a order by b rows between unbounded preceding and unbounded following) from (select 1 as a, number as b from numbers(65535)) limit 100;
+select a, b, ntile(65535) over (partition by a order by b) from (select 1 as a, number as b from numbers(65535)) limit 100;
 1	0	1
 1	1	2
 1	2	3
@@ -187,11 +208,11 @@ select a, b, ntile(65535) over (partition by a order by b rows between unbounded
 1	98	99
 1	99	100
 -- Bad arguments
-select a, b, ntile(3.0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
-select a, b, ntile('2') over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
-select a, b, ntile(0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
-select a, b, ntile(-2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
-select a, b, ntile(b + 1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
+select a, b, ntile(3.0) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
+select a, b, ntile('2') over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
+select a, b, ntile(0) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
+select a, b, ntile(-2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
+select a, b, ntile(b + 1) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
 -- Bad window type
 select a, b, ntile(2) over (partition by a) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
 select a, b, ntile(2) over (partition by a order by b rows between 4 preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
diff --git a/tests/queries/0_stateless/02560_window_ntile.sql b/tests/queries/0_stateless/02560_window_ntile.sql
index 4c25ecf4dd2..f2acf8fc94e 100644
--- a/tests/queries/0_stateless/02560_window_ntile.sql
+++ b/tests/queries/0_stateless/02560_window_ntile.sql
@@ -2,17 +2,20 @@
 
 -- Normal cases
 select a, b, ntile(3) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
-select a, b, ntile(2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
-select a, b, ntile(1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
-select a, b, ntile(100) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
-select a, b, ntile(65535) over (partition by a order by b rows between unbounded preceding and unbounded following) from (select 1 as a, number as b from numbers(65535)) limit 100;
+select a, b, ntile(3) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
+select a, b, ntile(2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
+select a, b, ntile(1) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
+select a, b, ntile(100) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20));
+select a, b, ntile(65535) over (partition by a order by b) from (select 1 as a, number as b from numbers(65535)) limit 100;
+
+
 
 -- Bad arguments
-select a, b, ntile(3.0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
-select a, b, ntile('2') over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
-select a, b, ntile(0) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
-select a, b, ntile(-2) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
-select a, b, ntile(b + 1) over (partition by a order by b rows between unbounded preceding and unbounded following) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
+select a, b, ntile(3.0) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
+select a, b, ntile('2') over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
+select a, b, ntile(0) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
+select a, b, ntile(-2) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
+select a, b, ntile(b + 1) over (partition by a order by b) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
 
 -- Bad window type
 select a, b, ntile(2) over (partition by a) from(select intDiv(number,10) as a, number%10 as b from numbers(20)); -- { serverError 36 }
diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference
index 18e83d1244a..452e0e0801e 100644
--- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference
+++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.reference
@@ -17,3 +17,42 @@ SELECT count(), _part FROM 02581_trips WHERE description = '' GROUP BY _part ORD
 8000	all_2_2_0_6
 8000	all_3_3_0_6
 8000	all_4_4_0_6
+-- Run mutation with `id 'IN big subquery'
+ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+28000
+ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000;
+SELECT count() from 02581_trips WHERE description = '';
+28000
+-- Run mutation with func(`id`) IN big subquery
+ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+28000
+-- Run mutation with non-PK `id2` IN big subquery
+ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+24000
+-- Run mutation with PK and non-PK IN big subquery
+ALTER TABLE 02581_trips UPDATE description='c'
+WHERE
+    (id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) OR
+    (id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000)))
+SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+20000
+-- Run mutation with PK and non-PK IN big subquery
+ALTER TABLE 02581_trips UPDATE description='c'
+WHERE
+    (id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) OR
+    (id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000)))
+SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+16000
+-- Run mutation with PK and non-PK IN big subquery
+ALTER TABLE 02581_trips UPDATE description='c'
+WHERE
+    (id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) OR
+    ((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000)))
+SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+12000
diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql
index fc90582d20e..7b52a89b16f 100644
--- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql
+++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks.sql
@@ -1,12 +1,12 @@
 DROP TABLE IF EXISTS 02581_trips;
 
-CREATE TABLE 02581_trips(id UInt32, description String) ENGINE=MergeTree ORDER BY id;
+CREATE TABLE 02581_trips(id UInt32, id2 UInt32, description String) ENGINE=MergeTree ORDER BY id;
 
 -- Make multiple parts
-INSERT INTO 02581_trips SELECT number, '' FROM numbers(10000);
-INSERT INTO 02581_trips SELECT number+10000, '' FROM numbers(10000);
-INSERT INTO 02581_trips SELECT number+20000, '' FROM numbers(10000);
-INSERT INTO 02581_trips SELECT number+30000, '' FROM numbers(10000);
+INSERT INTO 02581_trips SELECT number, number, '' FROM numbers(10000);
+INSERT INTO 02581_trips SELECT number+10000, number+10000, '' FROM numbers(10000);
+INSERT INTO 02581_trips SELECT number+20000, number+20000, '' FROM numbers(10000);
+INSERT INTO 02581_trips SELECT number+30000, number+30000, '' FROM numbers(10000);
 
 -- { echoOn }
 SELECT count(), _part FROM 02581_trips GROUP BY _part ORDER BY _part;
@@ -16,6 +16,45 @@ ALTER TABLE 02581_trips UPDATE description='1' WHERE id IN (SELECT (number*10+1)
 SELECT count(), _part FROM 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part;
 ALTER TABLE 02581_trips UPDATE description='2' WHERE id IN (SELECT (number*10+2)::UInt32 FROM numbers(10000)) SETTINGS mutations_sync=2;
 SELECT count(), _part FROM 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part;
+
+-- Run mutation with `id 'IN big subquery'
+ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+
+ALTER TABLE 02581_trips UPDATE description='a' WHERE id IN (SELECT (number*10 + 1)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2, max_rows_in_set=1000;
+SELECT count() from 02581_trips WHERE description = '';
+
+-- Run mutation with func(`id`) IN big subquery
+ALTER TABLE 02581_trips UPDATE description='b' WHERE id::UInt64 IN (SELECT (number*10 + 2)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+
+-- Run mutation with non-PK `id2` IN big subquery
+ALTER TABLE 02581_trips UPDATE description='c' WHERE id2 IN (SELECT (number*10 + 3)::UInt32 FROM numbers(10000000)) SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+
+-- Run mutation with PK and non-PK IN big subquery
+ALTER TABLE 02581_trips UPDATE description='c'
+WHERE
+    (id IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000))) OR
+    (id2 IN (SELECT (number*10 + 4)::UInt32 FROM numbers(10000000)))
+SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+
+-- Run mutation with PK and non-PK IN big subquery
+ALTER TABLE 02581_trips UPDATE description='c'
+WHERE
+    (id::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000))) OR
+    (id2::UInt64 IN (SELECT (number*10 + 5)::UInt32 FROM numbers(10000000)))
+SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
+
+-- Run mutation with PK and non-PK IN big subquery
+ALTER TABLE 02581_trips UPDATE description='c'
+WHERE
+    (id::UInt32 IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000))) OR
+    ((id2+1)::String IN (SELECT (number*10 + 6)::UInt32 FROM numbers(10000000)))
+SETTINGS mutations_sync=2;
+SELECT count() from 02581_trips WHERE description = '';
 -- { echoOff }
 
 DROP TABLE 02581_trips;
diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql
index 97cf979e80a..21ff453cd8e 100644
--- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql
+++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql
@@ -1,4 +1,4 @@
--- Tags: long, no-tsan, no-asan, no-ubsan, no-msan
+-- Tags: long, no-debug, no-tsan, no-asan, no-ubsan, no-msan
 
 DROP TABLE IF EXISTS 02581_trips;
 
diff --git a/tests/queries/0_stateless/02596_build_set_and_remote.reference b/tests/queries/0_stateless/02596_build_set_and_remote.reference
new file mode 100644
index 00000000000..8d12196ae33
--- /dev/null
+++ b/tests/queries/0_stateless/02596_build_set_and_remote.reference
@@ -0,0 +1,19 @@
+-- {echoOn}
+SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM system.one;
+1
+SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one);
+1
+1
+SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY NULL;
+1
+SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 1;
+1
+SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 'A';
+1
+SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}', system.one) GROUP BY dummy;
+1
+SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}', system.one) GROUP BY NULL, NULL, NULL, NULL;
+1000.0001	\N	0
+1000.0001	257	0
+1000.0001	65536	0
+1000.0001	\N	0
diff --git a/tests/queries/0_stateless/02596_build_set_and_remote.sql b/tests/queries/0_stateless/02596_build_set_and_remote.sql
new file mode 100644
index 00000000000..7a904344c91
--- /dev/null
+++ b/tests/queries/0_stateless/02596_build_set_and_remote.sql
@@ -0,0 +1,14 @@
+-- {echoOn}
+SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM system.one;
+
+SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one);
+
+SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY NULL;
+
+SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 1;
+
+SELECT arrayExists(x -> (x IN (SELECT '2')), [2]) FROM remote('127.0.0.{2,3}', system.one) GROUP BY 'A';
+
+SELECT 1 IN ( SELECT 1 ) FROM remote('127.0.0.{1,2}', system.one) GROUP BY dummy;
+
+SELECT 1000.0001, toUInt64(arrayJoin([NULL, 257, 65536, NULL])), arrayExists(x -> (x IN (SELECT '2.55')), [-9223372036854775808]) FROM remote('127.0.0.{1,2}', system.one) GROUP BY NULL, NULL, NULL, NULL;
diff --git a/tests/queries/0_stateless/02661_quantile_approx.reference b/tests/queries/0_stateless/02661_quantile_approx.reference
index f4e66adc8d9..8369363aa9b 100644
--- a/tests/queries/0_stateless/02661_quantile_approx.reference
+++ b/tests/queries/0_stateless/02661_quantile_approx.reference
@@ -19,8 +19,10 @@ select quantilesGK(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(numbe
 [99,199,249,313,776]
 select quantilesGK(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
 [100,200,250,314,777]
-select medianGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
-select quantileGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
+select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
+select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
+select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 select medianGK(100)(number) from numbers(10);
 4
 select quantileGK(100)(number) from numbers(10);
@@ -31,7 +33,8 @@ select quantileGK(100, 0.5, 0.75)(number) from numbers(10); -- { serverError NUM
 select quantileGK('abc', 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 select quantileGK(1.23, 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 select quantileGK(-100, 0.5)(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
-select quantilesGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
+select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
+select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 select quantilesGK(100)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 select quantilesGK(100, 0.5)(number) from numbers(10);
 [4]
diff --git a/tests/queries/0_stateless/02661_quantile_approx.sql b/tests/queries/0_stateless/02661_quantile_approx.sql
index 18c2e5de84b..52c2979ad44 100644
--- a/tests/queries/0_stateless/02661_quantile_approx.sql
+++ b/tests/queries/0_stateless/02661_quantile_approx.sql
@@ -1,3 +1,5 @@
+set allow_experimental_analyzer = 1;
+
 -- { echoOn }
 with arrayJoin([0, 1, 2, 10]) as x select quantilesGK(100, 0.5, 0.4, 0.1)(x);
 with arrayJoin([0, 6, 7, 9, 10]) as x select quantileGK(100, 0.5)(x);
@@ -14,8 +16,12 @@ select quantilesGK(1000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(numbe
 select quantilesGK(10000, 100/1000, 200/1000, 250/1000, 314/1000, 777/1000)(number + 1) from numbers(1000);
 
 
-select medianGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
-select quantileGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
+select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
+select medianGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
+select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
+select quantileGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
 select medianGK(100)(number) from numbers(10);
 select quantileGK(100)(number) from numbers(10);
 select quantileGK(100, 0.5)(number) from numbers(10);
@@ -24,7 +30,9 @@ select quantileGK('abc', 0.5)(number) from numbers(10); -- { serverError ILLEGAL
 select quantileGK(1.23, 0.5)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 select quantileGK(-100, 0.5)(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
 
-select quantilesGK()(number) from numbers(10); -- { serverError BAD_ARGUMENTS }
+select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 0; -- { serverError BAD_ARGUMENTS }
+select quantilesGK()(number) from numbers(10) SETTINGS allow_experimental_analyzer = 1; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
 select quantilesGK(100)(number) from numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 select quantilesGK(100, 0.5)(number) from numbers(10);
 select quantilesGK('abc', 0.5, 0.75)(number) from numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
diff --git a/tests/queries/0_stateless/02677_analyzer_bitmap_has_any.sql b/tests/queries/0_stateless/02677_analyzer_bitmap_has_any.sql
index 4af06634c66..f0f9845d91d 100644
--- a/tests/queries/0_stateless/02677_analyzer_bitmap_has_any.sql
+++ b/tests/queries/0_stateless/02677_analyzer_bitmap_has_any.sql
@@ -18,7 +18,7 @@ FROM
         bitmapHasAny(bitmapBuild([toUInt64(1)]), (
             SELECT groupBitmapState(toUInt64(2))
         )) has2
-); -- { serverError 43 }
+) SETTINGS allow_experimental_analyzer = 0; -- { serverError 43 }
 
 SELECT '--------------';
 
diff --git a/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql b/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql
index 5b0530e05ae..bde91df83ca 100644
--- a/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql
+++ b/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql
@@ -1,2 +1,4 @@
+CREATE TABLE foo (key UInt32, a String, b Int64, c String) ENGINE = TinyLog;
+
 SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', ''), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION }
--- SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', '', SETTINGS connection_pool_size = 1), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION }
+SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', '', SETTINGS connection_pool_size = 1), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION, UNSUPPORTED_METHOD }
diff --git a/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.reference b/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.reference
index 346025b277b..35c94347ac9 100644
--- a/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.reference
+++ b/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.reference
@@ -2,6 +2,8 @@
 []
 [[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]]	[[[(2147483647,0),(10.0001,65535),(1023,2147483646),(2147483647,0)]]]
 [[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]]	[]
+[[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]]	[[[(2147483647,0),(10.0001,65535),(1023,2147483646),(2147483647,0)]]]
+[[(2147483647,0),(10.0001,65535),(1,255),(1023,2147483646)]]	[[[(2147483647,0),(10.0001,65535),(1023,2147483646),(2147483647,0)]]]
 [[[(100.0001,1000.0001),(1000.0001,1.1920928955078125e-7),(20,-20),(20,20),(10,10),(-20,20),(100.0001,1000.0001)]]]
 [[[(100.0001,1000.0001),(1000.0001,1.1920928955078125e-7),(20,-20),(20,20),(10,10),(-20,20),(100.0001,1000.0001)]]]
 [(9223372036854775807,1.1754943508222875e-38)]	[[(1,1.0001)]]	\N	[]
diff --git a/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.sql b/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.sql
index 8b9b63f7996..85307bec6e5 100644
--- a/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.sql
+++ b/tests/queries/0_stateless/02699_polygons_sym_difference_rollup.sql
@@ -1,5 +1,5 @@
-
 SELECT polygonsSymDifferenceCartesian([[[(1., 1.)]] AS x], [x]) GROUP BY x WITH ROLLUP;
-SELECT [[(2147483647, 0.), (10.0001, 65535), (1, 255), (1023, 2147483646)]], polygonsSymDifferenceCartesian([[[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]]], [[[(1000.0001, 10.0001)]]]) GROUP BY [[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]] WITH ROLLUP;
+SELECT [[(2147483647, 0.), (10.0001, 65535), (1, 255), (1023, 2147483646)]], polygonsSymDifferenceCartesian([[[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]]], [[[(1000.0001, 10.0001)]]]) GROUP BY [[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]] WITH ROLLUP SETTINGS allow_experimental_analyzer=0;
+SELECT [[(2147483647, 0.), (10.0001, 65535), (1, 255), (1023, 2147483646)]], polygonsSymDifferenceCartesian([[[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]]], [[[(1000.0001, 10.0001)]]]) GROUP BY [[(2147483647, 0.), (10.0001, 65535), (1023, 2147483646)]] WITH ROLLUP SETTINGS allow_experimental_analyzer=1;
 SELECT polygonsSymDifferenceCartesian([[[(100.0001, 1000.0001), (-20., 20.), (10., 10.), (20., 20.), (20., -20.), (1000.0001, 1.1920928955078125e-7)]],[[(0.0001, 100000000000000000000.)]] AS x],[x]) GROUP BY x WITH ROLLUP;
 SELECT [(9223372036854775807, 1.1754943508222875e-38)], x, NULL, polygonsSymDifferenceCartesian([[[(1.1754943508222875e-38, 1.1920928955078125e-7), (0.5, 0.5)]], [[(1.1754943508222875e-38, 1.1920928955078125e-7), (1.1754943508222875e-38, 1.1920928955078125e-7)], [(0., 1.0001)]], [[(1., 1.0001)]] AS x], [[[(3.4028234663852886e38, 0.9999)]]]) GROUP BY GROUPING SETS ((x)) WITH TOTALS
diff --git a/tests/queries/0_stateless/02703_jit_external_aggregation.reference b/tests/queries/0_stateless/02703_jit_external_aggregation.reference
index cdeec60f4ef..9c558e357c4 100644
--- a/tests/queries/0_stateless/02703_jit_external_aggregation.reference
+++ b/tests/queries/0_stateless/02703_jit_external_aggregation.reference
@@ -1 +1 @@
-.....
+.
diff --git a/tests/queries/0_stateless/02703_jit_external_aggregation.sh b/tests/queries/0_stateless/02703_jit_external_aggregation.sh
index 2d1dda45de0..4bc17c106fb 100755
--- a/tests/queries/0_stateless/02703_jit_external_aggregation.sh
+++ b/tests/queries/0_stateless/02703_jit_external_aggregation.sh
@@ -5,11 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-# This query should return empty result in every of five runs:
-
-for _ in {1..5}
-do
-    $CLICKHOUSE_CLIENT --compile_aggregate_expressions 0 --query "
+# This query should return empty result
+$CLICKHOUSE_CLIENT --compile_aggregate_expressions 1 --min_count_to_compile_aggregate_expression=0 --query "
 SELECT
     COUNT() AS c,
     group_key,
@@ -30,6 +27,5 @@ ORDER BY group_key ASC
 LIMIT 10
 SETTINGS max_bytes_before_external_group_by = 200000
 " && echo -n '.'
-done
 
 echo
diff --git a/tests/queries/0_stateless/02707_complex_query_fails_analyzer.reference b/tests/queries/0_stateless/02707_complex_query_fails_analyzer.reference
deleted file mode 100644
index 192f8aa904a..00000000000
--- a/tests/queries/0_stateless/02707_complex_query_fails_analyzer.reference
+++ /dev/null
@@ -1,10 +0,0 @@
-1	1	-59.952
-1	2	59.952
-1	3	-100
-2	1	-93.7611
-2	2	93.7611
-3	1	0
-3	2	0
----------
-0
-0
diff --git a/tests/queries/0_stateless/02707_complex_query_fails_analyzer.sql b/tests/queries/0_stateless/02707_complex_query_fails_analyzer.sql
deleted file mode 100644
index a9d83479d50..00000000000
--- a/tests/queries/0_stateless/02707_complex_query_fails_analyzer.sql
+++ /dev/null
@@ -1,117 +0,0 @@
-DROP TABLE IF EXISTS srv_account_parts;
-DROP TABLE IF EXISTS etl_batch;
-
-CREATE TABLE srv_account_parts(
-    shard_num UInt16,
-    account_ids Array(Int64)
-)ENGINE = ReplacingMergeTree
-ORDER BY shard_num
-as select * from values ((0,[]),(1,[1,2,3]),(2,[1,2,3]),(3,[1]));
-
-CREATE TABLE etl_batch(
-    batch_id UInt64,
-    batch_start DateTime,
-    batch_start_day Date DEFAULT toDate(batch_start),
-    batch_load DateTime,
-    total_num_records UInt32,
-    etl_server_id Int32,
-    account_id UInt64,
-    shard_num UInt16
-)ENGINE = ReplacingMergeTree
-PARTITION BY toYYYYMM(batch_start_day)
-ORDER BY (batch_id, etl_server_id, account_id);
-
-insert into etl_batch(batch_id, batch_start, batch_load, total_num_records, etl_server_id, account_id, shard_num)
-select number batch_id, 
-       toDateTime('2022-01-01') + INTERVAL 23 HOUR batch_start,
-       batch_start batch_load,
-       333 total_num_records,
-       1 etl_server_id,
-       number%3+1 account_id,
-       1 shard_num
-from numbers(1000);
-
-insert into etl_batch(batch_id, batch_start, batch_load, total_num_records, etl_server_id, account_id, shard_num)
-select number+2000 batch_id, 
-       toDateTime('2022-01-01') + INTERVAL 23 HOUR batch_start,
-       batch_start batch_load,
-       333 total_num_records,
-       1 etl_server_id,
-       number%3+1 account_id,
-       2 shard_num
-from numbers(1000);
-
-insert into etl_batch(batch_id, batch_start, batch_load, total_num_records, etl_server_id, account_id, shard_num)
-select number+4000 batch_id, 
-       toDateTime('2022-01-01') + INTERVAL 3 HOUR batch_start,
-       batch_start batch_load,
-       3333 total_num_records,
-       1 etl_server_id,
-       2 account_id,
-       2 shard_num
-from numbers(1000);
-
-insert into etl_batch(batch_id, batch_start, batch_load, total_num_records, etl_server_id, account_id, shard_num)
-select number+6000 batch_id, 
-       toDateTime('2022-01-01') + INTERVAL 23 HOUR  batch_start,
-       batch_start batch_load,
-       333 total_num_records,
-       1 etl_server_id,
-       1 account_id,
-       2 shard_num
-from numbers(1000);
-
-insert into etl_batch(batch_id, batch_start, batch_load, total_num_records, etl_server_id, account_id, shard_num)
-select number+8000 batch_id, 
-       toDateTime('2022-01-01') + INTERVAL 23 HOUR batch_start,
-       batch_start batch_load,
-       1000 total_num_records,
-       1 etl_server_id,
-       3 account_id,
-       3 shard_num
-from numbers(1000);
-
-CREATE OR REPLACE VIEW v_num_records_by_node_bias_acc as
-SELECT shard_num,
-       arrayJoin(account_ids) AS account_id,
-       records_24h,
-       records_12h,
-       IF (b = '',-100,xbias) AS bias,
-       IF (bias > 10,0,IF (bias > 0,1,IF (bias < -10,301,300))) AS sbias
-FROM srv_account_parts
-  LEFT JOIN (SELECT account_id,
-                    shard_num,
-                    records_24h,
-                    records_12h,
-                    xbias,
-                    'b' AS b
-             FROM (SELECT account_id,
-                          groupArray((shard_num,records_24h,records_12h)) AS ga,
-                          arraySum(ga.2) AS tot24,
-                          arraySum(ga.3) AS tot12,
-                          arrayMap(i ->(((((i.2)*LENGTH(ga))*100) / tot24) - 100),ga) AS bias24,
-                          arrayMap(i ->(((((i.3)*LENGTH(ga))*100) / tot12) - 100),ga) AS bias12,
-                          arrayMap((i,j,k) ->(i,IF (tot12 = 0,0,IF (ABS(j) > ABS(k),j,k))),ga,bias24,bias12) AS a_bias
-                   FROM (SELECT shard_num,
-                                toInt64(account_id) AS account_id,
-                                SUM(total_num_records) AS records_24h,
-                                sumIf(total_num_records,batch_load >(toDateTime('2022-01-02') -(3600*12))) AS records_12h
-                         FROM etl_batch FINAL PREWHERE (batch_start_day >= (toDate('2022-01-02') - 2)) AND (batch_load > (toDateTime('2022-01-02') - (3600*24)))
-                         where (shard_num, account_id) in (select shard_num, arrayJoin(account_ids) from srv_account_parts)
-                         GROUP BY shard_num,account_id)
-                   GROUP BY account_id) 
-                   ARRAY JOIN (a_bias.1).1 AS shard_num,a_bias.2 AS xbias, (a_bias.1).2 AS records_24h, (a_bias.1).3 AS records_12h
-            ) s USING (shard_num,account_id);
-
-select account_id, shard_num, round(bias,4) 
-from v_num_records_by_node_bias_acc
-order by  account_id, shard_num, bias;
-
-select '---------';
-
-SELECT a AS b FROM (SELECT 0 a) s LEFT JOIN (SELECT 0 b) t USING (b);
-
-SELECT arrayJoin(a) AS b FROM (SELECT [0] a) s LEFT JOIN (SELECT 0 b) t USING (b);
-
-DROP TABLE srv_account_parts;
-DROP TABLE etl_batch;
diff --git a/tests/queries/0_stateless/02710_allow_suspicious_indices.reference b/tests/queries/0_stateless/02710_allow_suspicious_indices.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02710_allow_suspicious_indices.sql b/tests/queries/0_stateless/02710_allow_suspicious_indices.sql
new file mode 100644
index 00000000000..78d52f7bc72
--- /dev/null
+++ b/tests/queries/0_stateless/02710_allow_suspicious_indices.sql
@@ -0,0 +1,22 @@
+-- Check CREATE TABLE
+
+DROP TABLE IF EXISTS tbl;
+CREATE TABLE tbl (id UInt32) ENGINE = MergeTree() ORDER BY (id + 1, id + 1);  -- { serverError BAD_ARGUMENTS }
+CREATE TABLE tbl (id UInt32) ENGINE = MergeTree() ORDER BY (id + 1, id + 1) SETTINGS allow_suspicious_indices = 1;
+
+DROP TABLE IF EXISTS tbl;
+CREATE TABLE tbl (id UInt32, INDEX idx (id + 1, id + 1) TYPE minmax) ENGINE = MergeTree() ORDER BY id;  -- { serverError BAD_ARGUMENTS }
+CREATE TABLE tbl (id UInt32, INDEX idx (id + 1, id + 1) TYPE minmax) ENGINE = MergeTree() ORDER BY id SETTINGS allow_suspicious_indices = 1;
+
+-- Check ALTER TABLE
+
+DROP TABLE IF EXISTS tbl;
+CREATE TABLE tbl (id1 UInt32) ENGINE = MergeTree() ORDER BY id1;
+ALTER TABLE tbl ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id1, id2, id2);  -- { serverError BAD_ARGUMENTS }
+ALTER TABLE tbl ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id1, id2, id1);  -- { serverError BAD_ARGUMENTS }
+ALTER TABLE tbl ADD COLUMN `id2` UInt32, MODIFY ORDER BY (id1, id2, id2) SETTINGS allow_suspicious_indices = 1;
+
+DROP TABLE IF EXISTS tbl;
+CREATE TABLE tbl (id UInt32) ENGINE = MergeTree() ORDER BY id;
+ALTER TABLE tbl ADD INDEX idx (id+1, id, id+1) TYPE minmax;  -- { serverError BAD_ARGUMENTS }
+ALTER TABLE tbl ADD INDEX idx (id+1, id, id+1) TYPE minmax SETTINGS allow_suspicious_indices = 1;
diff --git a/tests/queries/0_stateless/02711_server_uuid_macro.sql b/tests/queries/0_stateless/02711_server_uuid_macro.sql
index f10ed7f8f6f..4f562ad36bf 100644
--- a/tests/queries/0_stateless/02711_server_uuid_macro.sql
+++ b/tests/queries/0_stateless/02711_server_uuid_macro.sql
@@ -12,4 +12,4 @@ CREATE TABLE test2 (x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{d
 -- The macro {server_uuid} is special, not a configuration-type macro. It's normal that it is inaccessible with the getMacro function.
 SELECT getMacro('server_uuid'); -- { serverError NO_ELEMENTS_IN_CONFIG }
 
-DROP TABLE test NO DELAY;
+DROP TABLE test SYNC;
diff --git a/tests/queries/0_stateless/02713_create_user_substitutions.reference b/tests/queries/0_stateless/02713_create_user_substitutions.reference
new file mode 100644
index 00000000000..f9b5cc495b5
--- /dev/null
+++ b/tests/queries/0_stateless/02713_create_user_substitutions.reference
@@ -0,0 +1,11 @@
+1
+2
+3
+4
+5
+6
+7
+8
+CREATE USER user9_02713 IDENTIFIED WITH ldap SERVER \'qwerty9\'
+CREATE USER user10_02713 IDENTIFIED WITH kerberos REALM \'qwerty10\'
+CREATE USER user11_02713 IDENTIFIED WITH ssl_certificate CN \'qwerty11\', \'qwerty12\'
diff --git a/tests/queries/0_stateless/02713_create_user_substitutions.sh b/tests/queries/0_stateless/02713_create_user_substitutions.sh
new file mode 100755
index 00000000000..42926335acb
--- /dev/null
+++ b/tests/queries/0_stateless/02713_create_user_substitutions.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-parallel
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS user1_02713, user2_02713, user3_02713, user4_02713, user5_02713, user6_02713, user7_02713";
+
+$CLICKHOUSE_CLIENT --param_password=qwerty1 -q "CREATE USER user1_02713 IDENTIFIED BY {password:String}";
+$CLICKHOUSE_CLIENT --param_password=qwerty2 -q "CREATE USER user2_02713 IDENTIFIED WITH PLAINTEXT_PASSWORD BY {password:String}";
+$CLICKHOUSE_CLIENT --param_password=qwerty3 -q "CREATE USER user3_02713 IDENTIFIED WITH SHA256_PASSWORD BY {password:String}";
+$CLICKHOUSE_CLIENT --param_password=qwerty4 -q "CREATE USER user4_02713 IDENTIFIED WITH DOUBLE_SHA1_PASSWORD BY {password:String}";
+$CLICKHOUSE_CLIENT --param_password=qwerty5 -q "CREATE USER user5_02713 IDENTIFIED WITH BCRYPT_PASSWORD BY {password:String}";
+
+# Generated online
+$CLICKHOUSE_CLIENT --param_hash=310cef2caff72c0224f38ca8e2141ca6012cd4da550c692573c25a917d9a75e6 \
+    -q "CREATE USER user6_02713 IDENTIFIED WITH SHA256_HASH BY {hash:String}";
+# Generated with ClickHouse
+$CLICKHOUSE_CLIENT --param_hash=5886A74C452575627522F3A80D8B9E239FD8955F \
+    -q "CREATE USER user7_02713 IDENTIFIED WITH DOUBLE_SHA1_HASH BY {hash:String}";
+# Generated online
+$CLICKHOUSE_CLIENT --param_hash=\$2a\$12\$wuohz0HFSBBNE8huN0Yx6.kmWrefiYVKeMp4gsuNoO1rOWwF2FXXC \
+    -q "CREATE USER user8_02713 IDENTIFIED WITH BCRYPT_HASH BY {hash:String}";
+
+$CLICKHOUSE_CLIENT --param_server=qwerty9 -q "CREATE USER user9_02713 IDENTIFIED WITH LDAP SERVER {server:String}";
+$CLICKHOUSE_CLIENT --param_realm=qwerty10 -q "CREATE USER user10_02713 IDENTIFIED WITH KERBEROS REALM {realm:String}";
+$CLICKHOUSE_CLIENT --param_cert1=qwerty11 --param_cert2=qwerty12 -q "CREATE USER user11_02713 IDENTIFIED WITH SSL_CERTIFICATE CN {cert1:String}, {cert2:String}";
+
+$CLICKHOUSE_CLIENT --user=user1_02713 --password=qwerty1 -q "SELECT 1";
+$CLICKHOUSE_CLIENT --user=user2_02713 --password=qwerty2 -q "SELECT 2";
+$CLICKHOUSE_CLIENT --user=user3_02713 --password=qwerty3 -q "SELECT 3";
+$CLICKHOUSE_CLIENT --user=user4_02713 --password=qwerty4 -q "SELECT 4";
+$CLICKHOUSE_CLIENT --user=user5_02713 --password=qwerty5 -q "SELECT 5";
+$CLICKHOUSE_CLIENT --user=user6_02713 --password=qwerty6 -q "SELECT 6";
+$CLICKHOUSE_CLIENT --user=user7_02713 --password=qwerty7 -q "SELECT 7";
+$CLICKHOUSE_CLIENT --user=user8_02713 --password=qwerty8 -q "SELECT 8";
+
+$CLICKHOUSE_CLIENT -q "SHOW CREATE USER user9_02713";
+$CLICKHOUSE_CLIENT -q "SHOW CREATE USER user10_02713";
+$CLICKHOUSE_CLIENT -q "SHOW CREATE USER user11_02713";
+
+$CLICKHOUSE_CLIENT -q "DROP USER user1_02713, user2_02713, user3_02713, user4_02713, user5_02713, user6_02713, user7_02713, user8_02713, user9_02713, user10_02713, user11_02713";
diff --git a/tests/queries/0_stateless/02713_sequence_match_serialization_fix.reference b/tests/queries/0_stateless/02713_sequence_match_serialization_fix.reference
new file mode 100644
index 00000000000..2a1c127e635
--- /dev/null
+++ b/tests/queries/0_stateless/02713_sequence_match_serialization_fix.reference
@@ -0,0 +1,3 @@
+serialized state is not used	1
+serialized state is used	1
+via Distributed	1
diff --git a/tests/queries/0_stateless/02713_sequence_match_serialization_fix.sql b/tests/queries/0_stateless/02713_sequence_match_serialization_fix.sql
new file mode 100644
index 00000000000..3521cb8470f
--- /dev/null
+++ b/tests/queries/0_stateless/02713_sequence_match_serialization_fix.sql
@@ -0,0 +1,36 @@
+DROP TABLE IF EXISTS 02713_seqt;
+DROP TABLE IF EXISTS 02713_seqt_distr;
+
+SELECT
+    'serialized state is not used', sequenceMatch('(?1)(?2)')(time, number_ = 1, number_ = 0) AS seq
+FROM
+(
+    SELECT
+        number AS time,
+        number % 2 AS number_
+    FROM numbers_mt(100)
+);
+
+
+CREATE TABLE 02713_seqt
+ENGINE = MergeTree
+ORDER BY n AS
+SELECT
+    sequenceMatchState('(?1)(?2)')(time, number_ = 1, number_ = 0) AS seq,
+    1 AS n
+FROM
+(
+    SELECT
+        number AS time,
+        number % 2 AS number_
+    FROM numbers_mt(100)
+);
+
+
+SELECT 'serialized state is used', sequenceMatchMerge('(?1)(?2)')(seq) AS seq
+FROM 02713_seqt;
+
+
+CREATE TABLE 02713_seqt_distr ( seq AggregateFunction(sequenceMatch('(?1)(?2)'), UInt64, UInt8, UInt8) , n UInt8) ENGINE = Distributed(test_shard_localhost, currentDatabase(), '02713_seqt');
+
+SELECT 'via Distributed', sequenceMatchMerge('(?1)(?2)')(seq) AS seq FROM 02713_seqt_distr;
diff --git a/tests/queries/0_stateless/02714_date_date32_in.reference b/tests/queries/0_stateless/02714_date_date32_in.reference
new file mode 100644
index 00000000000..d9ff83f1949
--- /dev/null
+++ b/tests/queries/0_stateless/02714_date_date32_in.reference
@@ -0,0 +1,4 @@
+1
+1
+0
+0
diff --git a/tests/queries/0_stateless/02714_date_date32_in.sql b/tests/queries/0_stateless/02714_date_date32_in.sql
new file mode 100644
index 00000000000..69a087eff6f
--- /dev/null
+++ b/tests/queries/0_stateless/02714_date_date32_in.sql
@@ -0,0 +1,4 @@
+select toDate32('2020-01-01') in (toDate('2020-01-01'));
+select toDate('2020-01-01') in (toDate32('2020-01-01'));
+select toDate('2020-01-01') in 1::Int64;
+select toDate32('2020-01-01') in 1::UInt64;
diff --git a/tests/queries/0_stateless/02714_local_object_storage.reference b/tests/queries/0_stateless/02714_local_object_storage.reference
new file mode 100644
index 00000000000..b3f28057554
--- /dev/null
+++ b/tests/queries/0_stateless/02714_local_object_storage.reference
@@ -0,0 +1,2 @@
+1	test
+1	test
diff --git a/tests/queries/0_stateless/02714_local_object_storage.sql b/tests/queries/0_stateless/02714_local_object_storage.sql
new file mode 100644
index 00000000000..fa9025b8b6e
--- /dev/null
+++ b/tests/queries/0_stateless/02714_local_object_storage.sql
@@ -0,0 +1,28 @@
+SET min_bytes_to_use_direct_io='1Gi'; -- It does not work (fixme)
+SET local_filesystem_read_method='pread'; -- ui_uring local_fs_method does not work here (fixme)
+
+DROP TABLE IF EXISTS test;
+
+CREATE TABLE test (a Int32, b String)
+ENGINE = MergeTree() ORDER BY tuple()
+SETTINGS disk = disk(
+    type = 'local_blob_storage',
+    path = '/var/lib/clickhouse/disks/${CLICKHOUSE_TEST_UNIQUE_NAME}/');
+
+INSERT INTO test SELECT 1, 'test';
+SELECT * FROM test;
+
+DROP TABLE test SYNC;
+
+CREATE TABLE test (a Int32, b String)
+ENGINE = MergeTree() ORDER BY tuple()
+SETTINGS disk = disk(
+    type = 'cache',
+    max_size = '10Mi',
+    path = '/var/lib/clickhouse/caches/${CLICKHOUSE_TEST_UNIQUE_NAME}/',
+    disk = disk(type='local_blob_storage', path='/var/lib/clickhouse/disks/${CLICKHOUSE_TEST_UNIQUE_NAME}/'));
+
+INSERT INTO test SELECT 1, 'test';
+SELECT * FROM test;
+
+DROP TABLE test SYNC;
diff --git a/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.reference b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.reference
new file mode 100644
index 00000000000..d315d85a11e
--- /dev/null
+++ b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.reference
@@ -0,0 +1,6 @@
+UInt64	1	8
+UInt64	10	80
+UInt64	1000	8000
+AggregateFunction(argMax, String, DateTime)	1	80
+AggregateFunction(argMax, String, DateTime)	10	800
+AggregateFunction(argMax, String, DateTime)	1000	80000
diff --git a/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql
new file mode 100644
index 00000000000..26bc9ebe62b
--- /dev/null
+++ b/tests/queries/0_stateless/02714_read_bytes_aggregateFunction.sql
@@ -0,0 +1,59 @@
+CREATE TABLE test (id UInt64, `amax` AggregateFunction(argMax, String, DateTime))
+ENGINE=MergeTree()
+ORDER BY id
+SETTINGS ratio_of_defaults_for_sparse_serialization=1 -- Sparse columns will take more bytes for a single row
+AS
+    SELECT number, argMaxState(number::String, '2023-04-12 16:23:01'::DateTime)
+    FROM numbers(1)
+    GROUP BY number;
+
+SELECT sum(id) FROM test FORMAT Null;
+SELECT argMaxMerge(amax) FROM test FORMAT Null;
+
+INSERT INTO test
+    SELECT number, argMaxState(number::String, '2023-04-12 16:23:01'::DateTime)
+    FROM numbers(9)
+    GROUP BY number;
+
+SELECT sum(id) FROM test FORMAT Null;
+SELECT argMaxMerge(amax) FROM test FORMAT Null;
+
+INSERT INTO test
+SELECT number, argMaxState(number::String, '2023-04-12 16:23:01'::DateTime)
+FROM numbers(990)
+GROUP BY number;
+
+SELECT sum(id) FROM test FORMAT Null;
+SELECT argMaxMerge(amax) FROM test FORMAT Null;
+
+SYSTEM FLUSH LOGS;
+
+SELECT 'UInt64',
+       read_rows,
+       read_bytes
+FROM system.query_log
+WHERE
+    current_database = currentDatabase() AND
+    query = 'SELECT sum(id) FROM test FORMAT Null;' AND
+    type = 2 AND event_date >= yesterday()
+ORDER BY event_time_microseconds;
+
+-- Size of ColumnAggregateFunction: Number of pointers * pointer size + arena size
+-- 1 * 8 + AggregateFunction(argMax, String, DateTime)
+--
+-- Size of AggregateFunction(argMax, String, DateTime):
+-- SingleValueDataString() + SingleValueDataFixed(DateTime)
+-- SingleValueDataString = 64B for small strings, 64B + string size + 1 for larger
+-- SingleValueDataFixed(DateTime) = 1 + 4. With padding = 8
+-- SingleValueDataString Total: 72B
+--
+-- ColumnAggregateFunction total: 8 + 72 = 80
+SELECT 'AggregateFunction(argMax, String, DateTime)',
+       read_rows,
+       read_bytes
+FROM system.query_log
+WHERE
+    current_database = currentDatabase() AND
+    query = 'SELECT argMaxMerge(amax) FROM test FORMAT Null;' AND
+    type = 2 AND event_date >= yesterday()
+ORDER BY event_time_microseconds;
diff --git a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference
new file mode 100644
index 00000000000..360b484bf28
--- /dev/null
+++ b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference
@@ -0,0 +1,4 @@
+Size: 6000001
+Size: 6000001
+Size: 6000001
+Size: 2971517
diff --git a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh
new file mode 100755
index 00000000000..69e2f734914
--- /dev/null
+++ b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, long
+# Tag no-fasttest: requires S3
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+in="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.in"
+out="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.out"
+log="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.log"
+
+set -e
+trap 'rm -f "${out:?}" "${in:?}" "${log:?}"' EXIT
+
+# Generate a file of 20MiB in size, with our part size it will have 4 parts
+# NOTE: 1 byte is for new line, so 1023 not 1024
+$CLICKHOUSE_LOCAL -q "SELECT randomPrintableASCII(1023) FROM numbers(20*1024) FORMAT LineAsString" > "$in"
+
+$CLICKHOUSE_CLIENT --send_logs_level=trace --server_logs_file="$log" -q "INSERT INTO FUNCTION s3(s3_conn, filename='$CLICKHOUSE_TEST_UNIQUE_NAME', format='LineAsString', structure='line String') FORMAT LineAsString" --s3_strict_upload_part_size=6000001 < "$in"
+grep -F '<Fatal>' "$log" || :
+grep -o 'WriteBufferFromS3: Writing part.*Size: .*' "$log" | grep -o 'Size: .*'
+$CLICKHOUSE_CLIENT -q "SELECT * FROM s3(s3_conn, filename='$CLICKHOUSE_TEST_UNIQUE_NAME', format='LineAsString', structure='line String') FORMAT LineAsString" > "$out"
+
+diff -q "$in" "$out"
diff --git a/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.reference b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.reference
new file mode 100644
index 00000000000..6f9b4b4fc6a
--- /dev/null
+++ b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.reference
@@ -0,0 +1,7 @@
+-- { echoOn }
+SYSTEM DROP COMPILED EXPRESSION CACHE;
+SELECT minIf(num1, num1 < 5) FROM dummy GROUP BY num2;
+0
+SYSTEM DROP COMPILED EXPRESSION CACHE;
+SELECT minIf(num1, num1 >= 5) FROM dummy GROUP BY num2;
+5
diff --git a/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql
new file mode 100644
index 00000000000..04e0fc5e0ba
--- /dev/null
+++ b/tests/queries/0_stateless/02723_jit_aggregation_bug_48120.sql
@@ -0,0 +1,17 @@
+-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64
+
+drop table if exists dummy;
+CREATE TABLE dummy ( num1 Int32, num2 Enum8('foo' = 0, 'bar' = 1, 'tar' = 2) )
+ENGINE = MergeTree ORDER BY num1 as select 5, 'bar';
+
+set compile_aggregate_expressions=1;
+set min_count_to_compile_aggregate_expression=0;
+
+-- { echoOn }
+SYSTEM DROP COMPILED EXPRESSION CACHE;
+SELECT minIf(num1, num1 < 5) FROM dummy GROUP BY num2;
+SYSTEM DROP COMPILED EXPRESSION CACHE;
+SELECT minIf(num1, num1 >= 5) FROM dummy GROUP BY num2;
+-- { echoOff }
+
+drop table dummy;
diff --git a/tests/queries/0_stateless/02723_parallelize_output_setting.reference b/tests/queries/0_stateless/02723_parallelize_output_setting.reference
new file mode 100644
index 00000000000..0f2a396f471
--- /dev/null
+++ b/tests/queries/0_stateless/02723_parallelize_output_setting.reference
@@ -0,0 +1,7 @@
+-- { echoOn }
+set parallelize_output_from_storages=1;
+select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline select * from file(data_02723.csv)) where resize;
+1
+-- no Resize in pipeline
+set parallelize_output_from_storages=0;
+select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline select * from file(data_02723.csv)) where resize;
diff --git a/tests/queries/0_stateless/02723_parallelize_output_setting.sql b/tests/queries/0_stateless/02723_parallelize_output_setting.sql
new file mode 100644
index 00000000000..7db28ca4dec
--- /dev/null
+++ b/tests/queries/0_stateless/02723_parallelize_output_setting.sql
@@ -0,0 +1,12 @@
+-- Tags: no-parallel
+
+insert into function file(data_02723.csv) select number from numbers(5) settings engine_file_truncate_on_insert=1;
+
+set max_threads=2;
+-- { echoOn }
+set parallelize_output_from_storages=1;
+select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline select * from file(data_02723.csv)) where resize;
+-- no Resize in pipeline
+set parallelize_output_from_storages=0;
+select startsWith(trimLeft(explain),'Resize') as resize from (explain pipeline select * from file(data_02723.csv)) where resize;
+
diff --git a/tests/queries/0_stateless/02723_zookeeper_name.reference b/tests/queries/0_stateless/02723_zookeeper_name.reference
new file mode 100644
index 00000000000..074712bd8fe
--- /dev/null
+++ b/tests/queries/0_stateless/02723_zookeeper_name.reference
@@ -0,0 +1,4 @@
+Create Tables
+Insert Data
+"t1","default",1
+"t2","default",1
diff --git a/tests/queries/0_stateless/02723_zookeeper_name.sql b/tests/queries/0_stateless/02723_zookeeper_name.sql
new file mode 100644
index 00000000000..7ddbf4edd47
--- /dev/null
+++ b/tests/queries/0_stateless/02723_zookeeper_name.sql
@@ -0,0 +1,23 @@
+-- Tags: zookeeper, replica
+
+SELECT 'Create Tables';
+CREATE TABLE t1(k UInt32, v UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_02723/zookeeper_name/t1', '1') ORDER BY k;
+
+CREATE TABLE t2(k UInt32, v UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_02723/zookeeper_name/t2', '1') ORDER BY k;
+
+SELECT 'Insert Data';
+
+INSERT INTO t1 SELECT * FROM generateRandom('k UInt32, v UInt32') LIMIT 1;
+INSERT INTO t2 SELECT * FROM generateRandom('k UInt32, v UInt32') LIMIT 1;
+
+SELECT
+    table,zookeeper_name,count()
+FROM system.replicas
+INNER JOIN system.parts USING (database, table)
+WHERE database = currentDatabase()
+GROUP BY
+    table,zookeeper_name
+FORMAT CSV;
+
+DROP TABLE t1;
+DROP TABLE t2;
diff --git a/tests/queries/0_stateless/02724_decompress_filename_exception.reference b/tests/queries/0_stateless/02724_decompress_filename_exception.reference
new file mode 100644
index 00000000000..f9c5aacff7b
--- /dev/null
+++ b/tests/queries/0_stateless/02724_decompress_filename_exception.reference
@@ -0,0 +1,8 @@
+Ok
+Ok
+Ok
+Ok
+Ok
+Ok
+Ok
+Ok
diff --git a/tests/queries/0_stateless/02724_decompress_filename_exception.sh b/tests/queries/0_stateless/02724_decompress_filename_exception.sh
new file mode 100755
index 00000000000..bbc2b8d066b
--- /dev/null
+++ b/tests/queries/0_stateless/02724_decompress_filename_exception.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-parallel
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+FILENAME="${USER_FILES_PATH}/corrupted_file.tsv.xx"
+
+echo 'corrupted file' > $FILENAME;
+
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'gzip')" 2>&1    | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'deflate')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'br')" 2>&1      | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'xz')" 2>&1      | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'zstd')" 2>&1    | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'lz4')" 2>&1     | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'bz2')" 2>&1     | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'snappy')" 2>&1  | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail';
+
+rm $FILENAME;
diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql
new file mode 100644
index 00000000000..13dfb5debe7
--- /dev/null
+++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql
@@ -0,0 +1,8 @@
+select count(*)
+from (
+  select 1 as id, [1, 2, 3] as arr
+) as sessions
+ASOF LEFT JOIN (
+  select 1 as session_id, 4 as id
+) as visitors
+ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id
diff --git a/tests/queries/0_stateless/02724_jit_logical_functions.reference b/tests/queries/0_stateless/02724_jit_logical_functions.reference
new file mode 100644
index 00000000000..673ffe02613
--- /dev/null
+++ b/tests/queries/0_stateless/02724_jit_logical_functions.reference
@@ -0,0 +1,18 @@
+Logical functions not null
+0	0	0	0	0
+0	1	0	1	1
+1	0	0	1	1
+1	1	1	1	0
+Logical functions nullable
+0	0	0	0	0
+0	1	0	1	1
+1	0	0	1	1
+1	1	1	1	0
+0	\N	0	\N	\N
+1	\N	\N	1	\N
+0	0	0
+1	1	0
+0	0	0
+1	1	0
+\N	\N	\N
+\N	\N	\N
diff --git a/tests/queries/0_stateless/02724_jit_logical_functions.sql b/tests/queries/0_stateless/02724_jit_logical_functions.sql
new file mode 100644
index 00000000000..fe6646337d0
--- /dev/null
+++ b/tests/queries/0_stateless/02724_jit_logical_functions.sql
@@ -0,0 +1,21 @@
+SET compile_expressions = 1;
+SET min_count_to_compile_expression = 0;
+
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table (a UInt8, b UInt8) ENGINE = TinyLog;
+INSERT INTO test_table VALUES (0, 0), (0, 1), (1, 0), (1, 1);
+
+SELECT 'Logical functions not null';
+SELECT a, b, and(a, b), or(a, b), xor(a, b) FROM test_table;
+
+DROP TABLE test_table;
+
+DROP TABLE IF EXISTS test_table_nullable;
+CREATE TABLE test_table_nullable (a UInt8, b Nullable(UInt8)) ENGINE = TinyLog;
+INSERT INTO test_table_nullable VALUES (0, 0), (0, 1), (1, 0), (1, 1), (0, NULL), (1, NULL);
+
+SELECT 'Logical functions nullable';
+SELECT a, b, and(a, b), or(a, b), xor(a, b) FROM test_table_nullable;
+SELECT and(b, b), or(b, b), xor(b, b) FROM test_table_nullable;
+
+DROP TABLE test_table_nullable;
diff --git a/tests/queries/0_stateless/02724_mutliple_storage_join.reference b/tests/queries/0_stateless/02724_mutliple_storage_join.reference
new file mode 100644
index 00000000000..f7eb44d66e0
--- /dev/null
+++ b/tests/queries/0_stateless/02724_mutliple_storage_join.reference
@@ -0,0 +1,6 @@
+0
+0
+0
+0
+0
+0
diff --git a/tests/queries/0_stateless/02724_mutliple_storage_join.sql b/tests/queries/0_stateless/02724_mutliple_storage_join.sql
new file mode 100644
index 00000000000..286e867704d
--- /dev/null
+++ b/tests/queries/0_stateless/02724_mutliple_storage_join.sql
@@ -0,0 +1,21 @@
+CREATE TABLE user(id UInt32, name String) ENGINE = Join(ANY, LEFT, id);
+INSERT INTO user VALUES (1,'U1')(2,'U2')(3,'U3');
+
+CREATE TABLE product(id UInt32, name String, cate String) ENGINE = Join(ANY, LEFT, id);
+INSERT INTO product VALUES (1,'P1','C1')(2,'P2','C1')(3,'P3','C2');
+
+CREATE TABLE order(id UInt32, pId UInt32, uId UInt32) ENGINE = TinyLog;
+INSERT INTO order VALUES (1,1,1)(2,1,2)(3,2,3);
+
+SELECT ignore(*) FROM (
+    SELECT
+        uId,
+        user.id as `uuu`
+    FROM order
+    LEFT ANY JOIN user
+    ON uId = `uuu`
+);
+
+SELECT ignore(*) FROM order
+LEFT ANY JOIN user ON uId = user.id
+LEFT ANY JOIN product ON pId = product.id;
diff --git a/tests/queries/0_stateless/02725_agg_projection_resprect_PK.reference b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.reference
new file mode 100644
index 00000000000..e6b95502e1e
--- /dev/null
+++ b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.reference
@@ -0,0 +1,2 @@
+      ReadFromMergeTree (p1)
+          Granules: 1/12
diff --git a/tests/queries/0_stateless/02725_agg_projection_resprect_PK.sql b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.sql
new file mode 100644
index 00000000000..a2355f78f4c
--- /dev/null
+++ b/tests/queries/0_stateless/02725_agg_projection_resprect_PK.sql
@@ -0,0 +1,32 @@
+-- Tags: no-random-merge-tree-settings
+
+DROP TABLE IF EXISTS t0;
+
+CREATE TABLE t0
+(
+    c1 Int64,
+    c2 Int64,
+    c3 Int64,
+    PROJECTION p1
+    (
+        SELECT
+            c1,
+            c2,
+            sum(c3)
+        GROUP BY
+            c2,
+            c1
+    )
+)
+ENGINE = MergeTree ORDER BY (c1, c2) settings min_bytes_for_wide_part = 10485760, min_rows_for_wide_part = 0;
+
+INSERT INTO t0 SELECT
+    number,
+    -number,
+    number
+FROM numbers_mt(1e5);
+
+select * from (EXPLAIN indexes = 1 SELECT c1, sum(c3) FROM t0 GROUP BY c1) where explain like '%ReadFromMergeTree%';
+select * from (EXPLAIN indexes = 1 SELECT c1, sum(c3) FROM t0 WHERE c1 = 100 GROUP BY c1) where explain like '%Granules%';
+
+DROP TABLE t0;
diff --git a/tests/queries/0_stateless/02725_alias_columns_should_not_allow_compression_codec.reference b/tests/queries/0_stateless/02725_alias_columns_should_not_allow_compression_codec.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02725_alias_columns_should_not_allow_compression_codec.sql b/tests/queries/0_stateless/02725_alias_columns_should_not_allow_compression_codec.sql
new file mode 100644
index 00000000000..083a3aefdaf
--- /dev/null
+++ b/tests/queries/0_stateless/02725_alias_columns_should_not_allow_compression_codec.sql
@@ -0,0 +1,7 @@
+drop table if exists alias_column_should_not_allow_compression;
+create table if not exists alias_column_should_not_allow_compression ( user_id UUID, user_id_hashed ALIAS (cityHash64(user_id))) engine=MergeTree() order by tuple();
+create table if not exists alias_column_should_not_allow_compression_fail ( user_id UUID, user_id_hashed ALIAS (cityHash64(user_id)) codec(LZ4HC(1))) engine=MergeTree() order by tuple(); -- { serverError BAD_ARGUMENTS }
+alter table alias_column_should_not_allow_compression modify column user_id codec(LZ4HC(1));
+alter table alias_column_should_not_allow_compression modify column user_id_hashed codec(LZ4HC(1)); -- { serverError BAD_ARGUMENTS }
+alter table alias_column_should_not_allow_compression add column user_id_hashed_1 UInt64 ALIAS (cityHash64(user_id)) codec(LZ4HC(1)); -- { serverError BAD_ARGUMENTS }
+drop table if exists alias_column_should_not_allow_compression;
diff --git a/tests/queries/0_stateless/02725_alias_with_restricted_keywords.reference b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.reference
new file mode 100644
index 00000000000..9874d6464ab
--- /dev/null
+++ b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.reference
@@ -0,0 +1 @@
+1	2
diff --git a/tests/queries/0_stateless/02725_alias_with_restricted_keywords.sql b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.sql
new file mode 100644
index 00000000000..6df0e856061
--- /dev/null
+++ b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.sql
@@ -0,0 +1 @@
+SELECT 1 `array`, 2 "union";
diff --git a/tests/queries/0_stateless/02725_async_insert_table_setting.reference b/tests/queries/0_stateless/02725_async_insert_table_setting.reference
new file mode 100644
index 00000000000..5f5235c569f
--- /dev/null
+++ b/tests/queries/0_stateless/02725_async_insert_table_setting.reference
@@ -0,0 +1,4 @@
+2
+2
+default.t_mt_async_insert	1
+default.t_mt_sync_insert	0
diff --git a/tests/queries/0_stateless/02725_async_insert_table_setting.sh b/tests/queries/0_stateless/02725_async_insert_table_setting.sh
new file mode 100755
index 00000000000..13911e8d677
--- /dev/null
+++ b/tests/queries/0_stateless/02725_async_insert_table_setting.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} -n --query "
+DROP TABLE IF EXISTS t_mt_async_insert;
+DROP TABLE IF EXISTS t_mt_sync_insert;
+
+CREATE TABLE t_mt_async_insert (id UInt64, s String)
+ENGINE = MergeTree ORDER BY id SETTINGS async_insert = 1;
+
+CREATE TABLE t_mt_sync_insert (id UInt64, s String)
+ENGINE = MergeTree ORDER BY id SETTINGS async_insert = 0;"
+
+url="${CLICKHOUSE_URL}&async_insert=0&wait_for_async_insert=1"
+
+${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_mt_async_insert VALUES (1, 'aa'), (2, 'bb')"
+${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO t_mt_sync_insert VALUES (1, 'aa'), (2, 'bb')"
+
+${CLICKHOUSE_CLIENT} -n --query "
+SELECT count() FROM t_mt_async_insert;
+SELECT count() FROM t_mt_sync_insert;
+
+SYSTEM FLUSH LOGS;
+SELECT tables[1], ProfileEvents['AsyncInsertQuery'] FROM system.query_log
+WHERE
+    type = 'QueryFinish' AND
+    current_database = currentDatabase() AND
+    query ILIKE 'INSERT INTO t_mt_%sync_insert%'
+ORDER BY tables[1];
+
+DROP TABLE IF EXISTS t_mt_async_insert;
+DROP TABLE IF EXISTS t_mt_sync_insert;"
diff --git a/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.reference b/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.sql b/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.sql
new file mode 100644
index 00000000000..e1db4ba2fa6
--- /dev/null
+++ b/tests/queries/0_stateless/02725_keeper_fault_inject_sequential_cleanup.sql
@@ -0,0 +1,10 @@
+DROP TABLE IF EXISTS keeper_fault_inject_sequential_cleanup;
+
+CREATE TABLE keeper_fault_inject_sequential_cleanup (d Int8) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_02725/tables/keeper_fault_inject_sequential_cleanup', '1') ORDER BY d;
+
+INSERT INTO keeper_fault_inject_sequential_cleanup VALUES (1);
+INSERT INTO keeper_fault_inject_sequential_cleanup SETTINGS insert_deduplicate = 0 VALUES (1);
+INSERT INTO keeper_fault_inject_sequential_cleanup SETTINGS insert_deduplicate = 0, insert_keeper_fault_injection_probability = 0.4, insert_keeper_fault_injection_seed = 5619964844601345291 VALUES (1);
+
+-- with database ordinary it produced a warning
+DROP TABLE keeper_fault_inject_sequential_cleanup;
diff --git a/tests/queries/0_stateless/02725_memory-for-merges.reference b/tests/queries/0_stateless/02725_memory-for-merges.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02725_memory-for-merges.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02725_memory-for-merges.sql b/tests/queries/0_stateless/02725_memory-for-merges.sql
new file mode 100644
index 00000000000..b6ae7af7f1a
--- /dev/null
+++ b/tests/queries/0_stateless/02725_memory-for-merges.sql
@@ -0,0 +1,27 @@
+-- Tags: no-s3-storage
+-- We allocate a lot of memory for buffers when reading or writing to S3
+
+DROP TABLE IF EXISTS 02725_memory_for_merges SYNC;
+
+CREATE TABLE 02725_memory_for_merges
+(   n UInt64,
+    s String
+)
+ENGINE = MergeTree
+ORDER BY n
+SETTINGS merge_max_block_size_bytes=1024, index_granularity_bytes=1024;
+
+INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100);
+INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100);
+INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100);
+INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100);
+INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100);
+
+OPTIMIZE TABLE 02725_memory_for_merges FINAL;
+
+SYSTEM FLUSH LOGS;
+
+WITH (SELECT uuid FROM system.tables WHERE table='02725_memory_for_merges' and database=currentDatabase()) as uuid
+SELECT sum(peak_memory_usage) < 1024 * 1024 * 200 from system.part_log where table_uuid=uuid and event_type='MergeParts';
+
+DROP TABLE IF EXISTS 02725_memory_for_merges SYNC;
diff --git a/tests/queries/0_stateless/02725_start_stop_fetches.reference b/tests/queries/0_stateless/02725_start_stop_fetches.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02725_start_stop_fetches.sh b/tests/queries/0_stateless/02725_start_stop_fetches.sh
new file mode 100755
index 00000000000..0ca687ae951
--- /dev/null
+++ b/tests/queries/0_stateless/02725_start_stop_fetches.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+# Tags: race, zookeeper, no-parallel, no-upgrade-check, no-replicated-database
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -e
+
+NUM_REPLICAS=5
+
+for i in $(seq 1 $NUM_REPLICAS); do
+    $CLICKHOUSE_CLIENT -n -q "
+        DROP TABLE IF EXISTS r$i SYNC;
+        CREATE TABLE r$i (x UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/r', 'r$i') ORDER BY x SETTINGS replicated_deduplication_window = 1, allow_remote_fs_zero_copy_replication = 1;
+    "
+done
+
+function thread {
+    while true; do
+        REPLICA=$(($RANDOM % 5 + 1))
+        $CLICKHOUSE_CLIENT --query "INSERT INTO r$REPLICA SELECT rand()"
+    done
+}
+
+function nemesis_thread1 {
+    while true; do
+        REPLICA=$(($RANDOM % 5 + 1))
+        $CLICKHOUSE_CLIENT --query "SYSTEM STOP REPLICATED SENDS r$REPLICA"
+        sleep 0.5
+        $CLICKHOUSE_CLIENT --query "SYSTEM START REPLICATED SENDS r$REPLICA"
+    done
+}
+
+function nemesis_thread2 {
+    while true; do
+        REPLICA=$(($RANDOM % 5 + 1))
+        $CLICKHOUSE_CLIENT --query "SYSTEM STOP FETCHES r$REPLICA"
+        sleep 0.5
+        $CLICKHOUSE_CLIENT --query "SYSTEM START FETCHES r$REPLICA"
+    done
+}
+
+
+
+export -f thread
+export -f nemesis_thread1
+export -f nemesis_thread2
+
+TIMEOUT=20
+
+timeout $TIMEOUT bash -c thread 2>/dev/null &
+timeout $TIMEOUT bash -c thread 2>/dev/null &
+timeout $TIMEOUT bash -c thread 2>/dev/null &
+timeout $TIMEOUT bash -c nemesis_thread1 2>/dev/null &
+timeout $TIMEOUT bash -c nemesis_thread1 2>/dev/null &
+timeout $TIMEOUT bash -c nemesis_thread1 2>/dev/null &
+timeout $TIMEOUT bash -c nemesis_thread2 2>/dev/null &
+timeout $TIMEOUT bash -c nemesis_thread2 2>/dev/null &
+timeout $TIMEOUT bash -c nemesis_thread2 2>/dev/null &
+
+wait
+
+
+for i in $(seq 1 $NUM_REPLICAS); do
+    $CLICKHOUSE_CLIENT -q "SYSTEM START FETCHES r$REPLICA"
+    $CLICKHOUSE_CLIENT -q "SYSTEM START REPLICATED SENDS r$REPLICA"
+done
+
+for i in $(seq 1 $NUM_REPLICAS); do
+    $CLICKHOUSE_CLIENT --max_execution_time 60 -q "SYSTEM SYNC REPLICA r$i PULL"
+done
+
+for i in $(seq 1 $NUM_REPLICAS); do
+    $CLICKHOUSE_CLIENT -q "DROP TABLE r$i" 2>/dev/null &
+done
+
+wait
diff --git a/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.reference b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.reference
new file mode 100644
index 00000000000..09d337562b5
--- /dev/null
+++ b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.reference
@@ -0,0 +1,2 @@
+dict_sharded	1	1000000	0.4768
+dict_sharded_multi	5	1000000	0.4768
diff --git a/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.sql b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.sql
new file mode 100644
index 00000000000..1e42f56889d
--- /dev/null
+++ b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor_element_count.sql
@@ -0,0 +1,17 @@
+DROP DICTIONARY IF EXISTS dict_sharded;
+DROP DICTIONARY IF EXISTS dict_sharded_multi;
+DROP TABLE IF EXISTS dict_data;
+
+CREATE TABLE dict_data (key UInt64, v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) engine=Memory() AS SELECT number, number%65535, number%65535, number%6553, number%655355, number%65535 FROM numbers(1e6);
+
+CREATE DICTIONARY dict_sharded (key UInt64, v0 UInt16) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED(SHARDS 32));
+SYSTEM RELOAD DICTIONARY dict_sharded;
+SELECT name, length(attribute.names), element_count, round(load_factor, 4) FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_sharded';
+DROP DICTIONARY dict_sharded;
+
+CREATE DICTIONARY dict_sharded_multi (key UInt64, v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED(SHARDS 32));
+SYSTEM RELOAD DICTIONARY dict_sharded_multi;
+SELECT name, length(attribute.names), element_count, round(load_factor, 4) FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_sharded_multi';
+DROP DICTIONARY dict_sharded_multi;
+
+DROP TABLE dict_data;
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 0df7691bd64..8c5d877755f 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -91,6 +91,7 @@ LOCALTIMESTAMP
 LibFuzzer
 LineAsString
 LinksDeployment
+LLVM's
 LowCardinality
 MEMTABLE
 MVCC
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 7dbd7d7a816..afaf2ee6d48 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -13,7 +13,7 @@
 #  and then to run formatter only for the specified files.
 
 ROOT_PATH=$(git rev-parse --show-toplevel)
-EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/'
+EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/|utils/keeper-bench/example.yaml'
 
 # From [1]:
 #     But since array_to_string_internal() in array.c still loops over array
diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt
index 2596be4addd..87fa64b1761 100644
--- a/utils/keeper-bench/CMakeLists.txt
+++ b/utils/keeper-bench/CMakeLists.txt
@@ -1,2 +1,7 @@
+if (NOT TARGET ch_contrib::rapidjson)
+    message (${RECONFIGURE_MESSAGE_LEVEL} "Not building keeper-bench due to rapidjson is disabled")
+    return()
+endif()
+
 clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp)
-target_link_libraries(keeper-bench PRIVATE clickhouse_common_zookeeper_no_log)
+target_link_libraries(keeper-bench PRIVATE clickhouse_common_config_no_zookeeper_log ch_contrib::rapidjson)
diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp
index b6d8223862c..2212f7158ae 100644
--- a/utils/keeper-bench/Generator.cpp
+++ b/utils/keeper-bench/Generator.cpp
@@ -1,16 +1,18 @@
 #include "Generator.h"
+#include "Common/Exception.h"
+#include "Common/ZooKeeper/ZooKeeperCommon.h"
+#include <Common/Config/ConfigProcessor.h>
 #include <random>
 #include <filesystem>
+#include <Poco/Util/AbstractConfiguration.h>
 
 using namespace Coordination;
 using namespace zkutil;
 
-namespace DB
-{
-namespace ErrorCodes
+namespace DB::ErrorCodes
 {
     extern const int LOGICAL_ERROR;
-}
+    extern const int BAD_ARGUMENTS;
 }
 
 namespace
@@ -38,16 +40,6 @@ std::string generateRandomString(size_t length)
 }
 }
 
-std::string generateRandomPath(const std::string & prefix, size_t length)
-{
-    return std::filesystem::path(prefix) / generateRandomString(length);
-}
-
-std::string generateRandomData(size_t size)
-{
-    return generateRandomString(size);
-}
-
 void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path)
 {
     namespace fs = std::filesystem;
@@ -96,139 +88,629 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa
     remove_future.get();
 }
 
-
-void CreateRequestGenerator::startup(Coordination::ZooKeeper & zookeeper)
+NumberGetter
+NumberGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional<uint64_t> default_value)
 {
-    removeRecursive(zookeeper, path_prefix);
+    NumberGetter number_getter;
 
-    auto promise = std::make_shared<std::promise<void>>();
-    auto future = promise->get_future();
-    auto create_callback = [promise] (const CreateResponse & response)
+    if (!config.has(key) && default_value.has_value())
     {
-        if (response.error != Coordination::Error::ZOK)
-            promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error)));
-        else
-            promise->set_value();
-    };
-    zookeeper.create(path_prefix, "", false, false, default_acls, create_callback);
-    future.get();
+        number_getter.value = *default_value;
+    }
+    else if (config.has(key + ".min_value") && config.has(key + ".max_value"))
+    {
+        NumberRange range{.min_value = config.getUInt64(key + ".min_value"), .max_value = config.getUInt64(key + ".max_value")};
+        if (range.max_value <= range.min_value)
+            throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Range is invalid for key {}: [{}, {}]", key, range.min_value, range.max_value);
+        number_getter.value = range;
+    }
+    else
+    {
+        number_getter.value = config.getUInt64(key);
+    }
+
+    return number_getter;
 }
 
-ZooKeeperRequestPtr CreateRequestGenerator::generate()
+std::string NumberGetter::description() const
 {
+    if (const auto * number = std::get_if<uint64_t>(&value))
+        return std::to_string(*number);
+
+    const auto & range = std::get<NumberRange>(value);
+    return fmt::format("random value from range [{}, {}]", range.min_value, range.max_value);
+}
+
+uint64_t NumberGetter::getNumber() const
+{
+    if (const auto * number = std::get_if<uint64_t>(&value))
+        return *number;
+
+    const auto & range = std::get<NumberRange>(value);
+    static pcg64 rng(randomSeed());
+    return std::uniform_int_distribution<uint64_t>(range.min_value, range.max_value)(rng);
+}
+
+StringGetter StringGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config)
+{
+    StringGetter string_getter;
+    if (config.has(key + ".random_string"))
+        string_getter.value
+            = NumberGetter::fromConfig(key + ".random_string.size", config);
+    else
+        string_getter.value = config.getString(key);
+
+    return string_getter;
+}
+
+void StringGetter::setString(std::string name)
+{
+    value = std::move(name);
+}
+
+std::string StringGetter::getString() const
+{
+    if (const auto * string = std::get_if<std::string>(&value))
+        return *string;
+
+    const auto number_getter = std::get<NumberGetter>(value);
+    return generateRandomString(number_getter.getNumber());
+}
+
+std::string StringGetter::description() const
+{
+    if (const auto * string = std::get_if<std::string>(&value))
+        return *string;
+
+    const auto number_getter = std::get<NumberGetter>(value);
+    return fmt::format("random string with size of {}", number_getter.description());
+}
+
+bool StringGetter::isRandom() const
+{
+    return std::holds_alternative<NumberGetter>(value);
+}
+
+PathGetter PathGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config)
+{
+    static constexpr std::string_view path_key_string = "path";
+
+    PathGetter path_getter;
+    Poco::Util::AbstractConfiguration::Keys path_keys;
+    config.keys(key, path_keys);
+
+    for (const auto & path_key : path_keys)
+    {
+        if (!path_key.starts_with(path_key_string))
+            continue;
+
+        const auto current_path_key_string = key + "." + path_key;
+        const auto children_of_key = current_path_key_string + ".children_of";
+        if (config.has(children_of_key))
+        {
+            auto parent_node = config.getString(children_of_key);
+            if (parent_node.empty() || parent_node[0] != '/')
+                throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid path for request generator: '{}'", parent_node);
+            path_getter.parent_paths.push_back(std::move(parent_node));
+        }
+        else
+        {
+            auto path = config.getString(key + "." + path_key);
+
+            if (path.empty() || path[0] != '/')
+                throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid path for request generator: '{}'", path);
+
+            path_getter.paths.push_back(std::move(path));
+        }
+    }
+
+    path_getter.path_picker = std::uniform_int_distribution<size_t>(0, path_getter.paths.size() - 1);
+    return path_getter;
+}
+
+void PathGetter::initialize(Coordination::ZooKeeper & zookeeper)
+{
+    for (const auto & parent_path : parent_paths)
+    {
+        auto list_promise = std::make_shared<std::promise<ListResponse>>();
+        auto list_future = list_promise->get_future();
+        auto callback = [list_promise] (const ListResponse & response)
+        {
+            if (response.error != Coordination::Error::ZOK)
+                list_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error)));
+            else
+                list_promise->set_value(response);
+        };
+        zookeeper.list(parent_path, ListRequestType::ALL, std::move(callback), {});
+        auto list_response = list_future.get();
+
+        for (const auto & child : list_response.names)
+            paths.push_back(std::filesystem::path(parent_path) / child);
+    }
+
+    path_picker = std::uniform_int_distribution<size_t>(0, paths.size() - 1);
+    initialized = true;
+}
+
+std::string PathGetter::getPath() const
+{
+    if (!initialized)
+        throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "PathGetter is not initialized");
+
+    if (paths.size() == 1)
+        return paths[0];
+
+    static pcg64 rng(randomSeed());
+    return paths[path_picker(rng)];
+}
+
+std::string PathGetter::description() const
+{
+    std::string description;
+    for (const auto & path : parent_paths)
+    {
+        if (!description.empty())
+            description += ", ";
+        description += fmt::format("children of {}", path);
+    }
+
+    for (const auto & path : paths)
+    {
+        if (!description.empty())
+            description += ", ";
+        description += path;
+    }
+
+    return description;
+}
+
+RequestGetter::RequestGetter(std::vector<RequestGeneratorPtr> request_generators_)
+    : request_generators(std::move(request_generators_))
+{}
+
+RequestGetter RequestGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, bool for_multi)
+{
+    RequestGetter request_getter;
+
+    Poco::Util::AbstractConfiguration::Keys generator_keys;
+    config.keys(key, generator_keys);
+
+    bool use_weights = false;
+    size_t weight_sum = 0;
+    auto & generators = request_getter.request_generators;
+    for (const auto & generator_key : generator_keys)
+    {
+        RequestGeneratorPtr request_generator;
+
+        if (generator_key.starts_with("create"))
+            request_generator = std::make_unique<CreateRequestGenerator>();
+        else if (generator_key.starts_with("set"))
+            request_generator = std::make_unique<SetRequestGenerator>();
+        else if (generator_key.starts_with("get"))
+            request_generator = std::make_unique<GetRequestGenerator>();
+        else if (generator_key.starts_with("list"))
+            request_generator = std::make_unique<ListRequestGenerator>();
+        else if (generator_key.starts_with("multi"))
+        {
+            if (for_multi)
+                throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Nested multi requests are not allowed");
+            request_generator = std::make_unique<MultiRequestGenerator>();
+        }
+        else
+        {
+            if (for_multi)
+                continue;
+
+            throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown generator {}", key + "." + generator_key);
+        }
+
+        request_generator->getFromConfig(key + "." + generator_key, config);
+
+        auto weight = request_generator->getWeight();
+        use_weights |= weight != 1;
+        weight_sum += weight;
+
+        generators.push_back(std::move(request_generator));
+    }
+
+    if (generators.empty())
+        throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No request generators found in config for key '{}'", key);
+
+
+    size_t max_value = use_weights ? weight_sum - 1 : generators.size() - 1;
+    request_getter.request_generator_picker = std::uniform_int_distribution<size_t>(0, max_value);
+
+    /// construct weight vector
+    if (use_weights)
+    {
+        auto & weights = request_getter.weights;
+        weights.reserve(generators.size());
+        weights.push_back(generators[0]->getWeight() - 1);
+
+        for (size_t i = 1; i < generators.size(); ++i)
+            weights.push_back(weights.back() + generators[i]->getWeight());
+    }
+
+    return request_getter;
+}
+
+RequestGeneratorPtr RequestGetter::getRequestGenerator() const
+{
+    static pcg64 rng(randomSeed());
+
+    auto random_number = request_generator_picker(rng);
+
+    if (weights.empty())
+        return request_generators[random_number];
+
+    for (size_t i = 0; i < request_generators.size(); ++i)
+    {
+        if (random_number <= weights[i])
+            return request_generators[i];
+    }
+
+    throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Invalid number generated: {}", random_number);
+}
+
+std::string RequestGetter::description() const
+{
+    std::string guard(30, '-');
+    std::string description = guard;
+
+    for (const auto & request_generator : request_generators)
+        description += fmt::format("\n{}\n", request_generator->description());
+    return description + guard;
+}
+
+void RequestGetter::startup(Coordination::ZooKeeper & zookeeper)
+{
+    for (const auto & request_generator : request_generators)
+        request_generator->startup(zookeeper);
+}
+
+const std::vector<RequestGeneratorPtr> & RequestGetter::requestGenerators() const
+{
+    return request_generators;
+}
+
+void RequestGenerator::getFromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config)
+{
+    if (config.has(key + ".weight"))
+        weight = config.getUInt64(key + ".weight");
+    getFromConfigImpl(key, config);
+}
+
+std::string RequestGenerator::description()
+{
+    std::string weight_string = weight == 1 ? "" : fmt::format("\n- weight: {}", weight);
+    return fmt::format("{}{}", descriptionImpl(), weight_string);
+}
+
+Coordination::ZooKeeperRequestPtr RequestGenerator::generate(const Coordination::ACLs & acls)
+{
+    return generateImpl(acls);
+}
+
+void RequestGenerator::startup(Coordination::ZooKeeper & zookeeper)
+{
+    startupImpl(zookeeper);
+}
+
+size_t RequestGenerator::getWeight() const
+{
+    return weight;
+}
+
+CreateRequestGenerator::CreateRequestGenerator()
+    : rng(randomSeed())
+    , remove_picker(0, 1.0)
+{}
+
+void CreateRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config)
+{
+    parent_path = PathGetter::fromConfig(key, config);
+
+    name = StringGetter(NumberGetter::fromConfig(key + ".name_length", config, 5));
+
+    if (config.has(key + ".data"))
+        data = StringGetter::fromConfig(key + ".data", config);
+
+    if (config.has(key + ".remove_factor"))
+        remove_factor = config.getDouble(key + ".remove_factor");
+}
+
+std::string CreateRequestGenerator::descriptionImpl()
+{
+    std::string data_string
+        = data.has_value() ? fmt::format("data for created nodes: {}", data->description()) : "no data for created nodes";
+    std::string remove_factor_string
+        = remove_factor.has_value() ? fmt::format("- remove factor: {}", *remove_factor) : "- without removes";
+    return fmt::format(
+        "Create Request Generator\n"
+        "- parent path(s) for created nodes: {}\n"
+        "- name for created nodes: {}\n"
+        "- {}\n"
+        "{}",
+        parent_path.description(),
+        name.description(),
+        data_string,
+        remove_factor_string);
+}
+
+void CreateRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper)
+{
+    parent_path.initialize(zookeeper);
+}
+
+Coordination::ZooKeeperRequestPtr CreateRequestGenerator::generateImpl(const Coordination::ACLs & acls)
+{
+    if (remove_factor.has_value() && !paths_created.empty() && remove_picker(rng) < *remove_factor)
+    {
+        auto request = std::make_shared<ZooKeeperRemoveRequest>();
+        auto it = paths_created.begin();
+        request->path = *it;
+        paths_created.erase(it);
+        return request;
+    }
+
     auto request = std::make_shared<ZooKeeperCreateRequest>();
-    request->acls = default_acls;
-    size_t plength = 5;
-    if (path_length)
-        plength = *path_length;
-    auto path_candidate = generateRandomPath(path_prefix, plength);
+    request->acls = acls;
+
+    std::string path_candidate = std::filesystem::path(parent_path.getPath()) / name.getString();
 
     while (paths_created.contains(path_candidate))
-        path_candidate = generateRandomPath(path_prefix, plength);
+        path_candidate = std::filesystem::path(parent_path.getPath()) / name.getString();
 
     paths_created.insert(path_candidate);
 
-    request->path = path_candidate;
-    if (data_size)
-        request->data = generateRandomData(*data_size);
+    request->path = std::move(path_candidate);
+
+    if (data)
+        request->data = data->getString();
 
     return request;
 }
 
-
-void SetRequestGenerator::startup(Coordination::ZooKeeper & zookeeper)
+void SetRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config)
 {
-    removeRecursive(zookeeper, path_prefix);
+    path = PathGetter::fromConfig(key, config);
 
-    auto promise = std::make_shared<std::promise<void>>();
-    auto future = promise->get_future();
-    auto create_callback = [promise] (const CreateResponse & response)
-    {
-        if (response.error != Coordination::Error::ZOK)
-            promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error)));
-        else
-            promise->set_value();
-    };
-    zookeeper.create(path_prefix, "", false, false, default_acls, create_callback);
-    future.get();
+    data = StringGetter::fromConfig(key + ".data", config);
 }
 
-ZooKeeperRequestPtr SetRequestGenerator::generate()
+std::string SetRequestGenerator::descriptionImpl()
+{
+    return fmt::format(
+        "Set Request Generator\n"
+        "- path(s) to set: {}\n"
+        "- data to set: {}",
+        path.description(),
+        data.description());
+}
+
+Coordination::ZooKeeperRequestPtr SetRequestGenerator::generateImpl(const Coordination::ACLs & /*acls*/)
 {
     auto request = std::make_shared<ZooKeeperSetRequest>();
-    request->path = path_prefix;
-    request->data = generateRandomData(data_size);
-
+    request->path = path.getPath();
+    request->data = data.getString();
     return request;
 }
 
-void MixedRequestGenerator::startup(Coordination::ZooKeeper & zookeeper)
+void SetRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper)
 {
-    for (auto & generator : generators)
-        generator->startup(zookeeper);
+    path.initialize(zookeeper);
 }
 
-ZooKeeperRequestPtr MixedRequestGenerator::generate()
+void GetRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config)
 {
-    pcg64 rng(randomSeed());
-    std::uniform_int_distribution<size_t> distribution(0, generators.size() - 1);
-
-    return generators[distribution(rng)]->generate();
+    path = PathGetter::fromConfig(key, config);
 }
 
-void GetRequestGenerator::startup(Coordination::ZooKeeper & zookeeper)
+std::string GetRequestGenerator::descriptionImpl()
 {
-    auto promise = std::make_shared<std::promise<void>>();
-    auto future = promise->get_future();
-    auto create_callback = [promise] (const CreateResponse & response)
-    {
-        if (response.error != Coordination::Error::ZOK)
-            promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error)));
-        else
-            promise->set_value();
-    };
-    zookeeper.create(path_prefix, "", false, false, default_acls, create_callback);
-    future.get();
-    size_t total_nodes = 1;
-    if (num_nodes)
-        total_nodes = *num_nodes;
-
-    for (size_t i = 0; i < total_nodes; ++i)
-    {
-        auto path = generateRandomPath(path_prefix, 5);
-        while (std::find(paths_to_get.begin(), paths_to_get.end(), path) != paths_to_get.end())
-            path = generateRandomPath(path_prefix, 5);
-
-        auto create_promise = std::make_shared<std::promise<void>>();
-        auto create_future = create_promise->get_future();
-        auto callback = [create_promise] (const CreateResponse & response)
-        {
-            if (response.error != Coordination::Error::ZOK)
-                create_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error)));
-            else
-                create_promise->set_value();
-        };
-        std::string data;
-        if (nodes_data_size)
-            data = generateRandomString(*nodes_data_size);
-
-        zookeeper.create(path, data, false, false, default_acls, callback);
-        create_future.get();
-        paths_to_get.push_back(path);
-    }
+    return fmt::format(
+        "Get Request Generator\n"
+        "- path(s) to get: {}",
+        path.description());
 }
 
-Coordination::ZooKeeperRequestPtr GetRequestGenerator::generate()
+Coordination::ZooKeeperRequestPtr GetRequestGenerator::generateImpl(const Coordination::ACLs & /*acls*/)
 {
     auto request = std::make_shared<ZooKeeperGetRequest>();
-
-    size_t path_index = distribution(rng);
-    request->path = paths_to_get[path_index];
+    request->path = path.getPath();
     return request;
 }
 
-void ListRequestGenerator::startup(Coordination::ZooKeeper & zookeeper)
+void GetRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper)
 {
+    path.initialize(zookeeper);
+}
+
+void ListRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config)
+{
+    path = PathGetter::fromConfig(key, config);
+}
+
+std::string ListRequestGenerator::descriptionImpl()
+{
+    return fmt::format(
+        "List Request Generator\n"
+        "- path(s) to get: {}",
+        path.description());
+}
+
+Coordination::ZooKeeperRequestPtr ListRequestGenerator::generateImpl(const Coordination::ACLs & /*acls*/)
+{
+    auto request = std::make_shared<ZooKeeperFilteredListRequest>();
+    request->path = path.getPath();
+    return request;
+}
+
+void ListRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper)
+{
+    path.initialize(zookeeper);
+}
+
+void MultiRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config)
+{
+    if (config.has(key + ".size"))
+        size = NumberGetter::fromConfig(key + ".size", config);
+
+    request_getter = RequestGetter::fromConfig(key, config, /*for_multi*/ true);
+};
+
+std::string MultiRequestGenerator::descriptionImpl()
+{
+    std::string size_string = size.has_value() ? fmt::format("- number of requests: {}\n", size->description()) : "";
+    return fmt::format(
+        "Multi Request Generator\n"
+        "{}"
+        "- requests:\n{}",
+        size_string,
+        request_getter.description());
+}
+
+Coordination::ZooKeeperRequestPtr MultiRequestGenerator::generateImpl(const Coordination::ACLs & acls)
+{
+    Coordination::Requests ops;
+
+    if (size)
+    {
+        auto request_count = size->getNumber();
+
+        for (size_t i = 0; i < request_count; ++i)
+            ops.push_back(request_getter.getRequestGenerator()->generate(acls));
+    }
+    else
+    {
+        for (const auto & request_generator : request_getter.requestGenerators())
+            ops.push_back(request_generator->generate(acls));
+    }
+
+    return std::make_shared<ZooKeeperMultiRequest>(ops, acls);
+}
+
+void MultiRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper)
+{
+    request_getter.startup(zookeeper);
+}
+
+Generator::Generator(const Poco::Util::AbstractConfiguration & config)
+{
+    Coordination::ACL acl;
+    acl.permissions = Coordination::ACL::All;
+    acl.scheme = "world";
+    acl.id = "anyone";
+    default_acls.emplace_back(std::move(acl));
+
+    static const std::string generator_key = "generator";
+
+    std::cerr << "---- Parsing setup ---- " << std::endl;
+    static const std::string setup_key = generator_key + ".setup";
+    Poco::Util::AbstractConfiguration::Keys keys;
+    config.keys(setup_key, keys);
+    for (const auto & key : keys)
+    {
+        if (key.starts_with("node"))
+        {
+            auto node_key = setup_key + "." + key;
+            auto parsed_root_node = parseNode(node_key, config);
+            const auto node = root_nodes.emplace_back(parsed_root_node);
+
+            if (config.has(node_key + ".repeat"))
+            {
+                if (!node->name.isRandom())
+                    throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key);
+
+                auto repeat_count = config.getUInt64(node_key + ".repeat");
+                node->repeat_count = repeat_count;
+                for (size_t i = 1; i < repeat_count; ++i)
+                    root_nodes.emplace_back(node->clone());
+            }
+
+            std::cerr << "Tree to create:" << std::endl;
+
+            node->dumpTree();
+            std::cerr << std::endl;
+        }
+    }
+    std::cerr << "---- Done parsing data setup ----\n" << std::endl;
+
+    std::cerr << "---- Collecting request generators ----" << std::endl;
+    static const std::string requests_key = generator_key + ".requests";
+    request_getter = RequestGetter::fromConfig(requests_key, config);
+    std::cerr << request_getter.description() << std::endl;
+    std::cerr << "---- Done collecting request generators ----\n" << std::endl;
+}
+
+std::shared_ptr<Generator::Node> Generator::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config)
+{
+    auto node = std::make_shared<Generator::Node>();
+    node->name = StringGetter::fromConfig(key + ".name", config);
+
+    if (config.has(key + ".data"))
+        node->data = StringGetter::fromConfig(key + ".data", config);
+
+    Poco::Util::AbstractConfiguration::Keys node_keys;
+    config.keys(key, node_keys);
+
+    for (const auto & node_key : node_keys)
+    {
+        if (!node_key.starts_with("node"))
+            continue;
+
+        const auto node_key_string = key + "." + node_key;
+        auto child_node = parseNode(node_key_string, config);
+        node->children.push_back(child_node);
+
+        if (config.has(node_key_string + ".repeat"))
+        {
+            if (!child_node->name.isRandom())
+                throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string);
+
+            auto repeat_count = config.getUInt64(node_key_string + ".repeat");
+            child_node->repeat_count = repeat_count;
+            for (size_t i = 1; i < repeat_count; ++i)
+                node->children.push_back(child_node);
+        }
+    }
+
+    return node;
+}
+
+void Generator::Node::dumpTree(int level) const
+{
+    std::string data_string
+        = data.has_value() ? fmt::format("{}", data->description()) : "no data";
+
+    std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : "";
+
+    std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl;
+
+    for (auto it = children.begin(); it != children.end();)
+    {
+        const auto & child = *it;
+        child->dumpTree(level + 1);
+        std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1);
+    }
+}
+
+std::shared_ptr<Generator::Node> Generator::Node::clone() const
+{
+    auto new_node = std::make_shared<Node>();
+    new_node->name = name;
+    new_node->data = data;
+    new_node->repeat_count = repeat_count;
+
+    // don't do deep copy of children because we will do clone only for root nodes
+    new_node->children = children;
+
+    return new_node;
+}
+
+void Generator::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const
+{
+    auto path = std::filesystem::path(parent_path) / name.getString();
     auto promise = std::make_shared<std::promise<void>>();
     auto future = promise->get_future();
     auto create_callback = [promise] (const CreateResponse & response)
@@ -238,103 +720,47 @@ void ListRequestGenerator::startup(Coordination::ZooKeeper & zookeeper)
         else
             promise->set_value();
     };
-    zookeeper.create(path_prefix, "", false, false, default_acls, create_callback);
+    zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback);
     future.get();
 
-    size_t total_nodes = 1;
-    if (num_nodes)
-        total_nodes = *num_nodes;
-
-    size_t path_length = 5;
-    if (paths_length)
-        path_length = *paths_length;
-
-    for (size_t i = 0; i < total_nodes; ++i)
-    {
-        auto path = generateRandomPath(path_prefix, path_length);
-
-        auto create_promise = std::make_shared<std::promise<void>>();
-        auto create_future = create_promise->get_future();
-        auto callback = [create_promise] (const CreateResponse & response)
-        {
-            if (response.error != Coordination::Error::ZOK)
-                create_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error)));
-            else
-                create_promise->set_value();
-        };
-        zookeeper.create(path, "", false, false, default_acls, callback);
-        create_future.get();
-    }
+    for (const auto & child : children)
+        child->createNode(zookeeper, path, acls);
 }
 
-Coordination::ZooKeeperRequestPtr ListRequestGenerator::generate()
+void Generator::startup(Coordination::ZooKeeper & zookeeper)
 {
-    auto request = std::make_shared<ZooKeeperListRequest>();
-    request->path = path_prefix;
-    return request;
+    std::cerr << "---- Creating test data ----" << std::endl;
+    for (const auto & node : root_nodes)
+    {
+        auto node_name = node->name.getString();
+        node->name.setString(node_name);
+
+        std::string root_path = std::filesystem::path("/") / node_name;
+        std::cerr << "Cleaning up " << root_path << std::endl;
+        removeRecursive(zookeeper, root_path);
+
+        node->createNode(zookeeper, "/", default_acls);
+    }
+    std::cerr << "---- Created test data ----\n" << std::endl;
+
+    std::cerr << "---- Initializing generators ----" << std::endl;
+
+    request_getter.startup(zookeeper);
 }
 
-std::unique_ptr<IGenerator> getGenerator(const std::string & name)
+Coordination::ZooKeeperRequestPtr Generator::generate()
 {
-    if (name == "create_no_data")
-    {
-        return std::make_unique<CreateRequestGenerator>();
-    }
-    else if (name == "create_small_data")
-    {
-        return std::make_unique<CreateRequestGenerator>("/create_generator", 5, 32);
-    }
-    else if (name == "create_medium_data")
-    {
-        return std::make_unique<CreateRequestGenerator>("/create_generator", 5, 1024);
-    }
-    else if (name == "create_big_data")
-    {
-        return std::make_unique<CreateRequestGenerator>("/create_generator", 5, 512 * 1024);
-    }
-    else if (name == "get_no_data")
-    {
-        return std::make_unique<GetRequestGenerator>("/get_generator", 10, 0);
-    }
-    else if (name == "get_small_data")
-    {
-        return std::make_unique<GetRequestGenerator>("/get_generator", 10, 32);
-    }
-    else if (name == "get_medium_data")
-    {
-        return std::make_unique<GetRequestGenerator>("/get_generator", 10, 1024);
-    }
-    else if (name == "get_big_data")
-    {
-        return std::make_unique<GetRequestGenerator>("/get_generator", 10, 512 * 1024);
-    }
-    else if (name == "list_no_nodes")
-    {
-        return std::make_unique<ListRequestGenerator>("/list_generator", 0, 1);
-    }
-    else if (name == "list_few_nodes")
-    {
-        return std::make_unique<ListRequestGenerator>("/list_generator", 10, 5);
-    }
-    else if (name == "list_medium_nodes")
-    {
-        return std::make_unique<ListRequestGenerator>("/list_generator", 1000, 5);
-    }
-    else if (name == "list_a_lot_nodes")
-    {
-        return std::make_unique<ListRequestGenerator>("/list_generator", 100000, 5);
-    }
-    else if (name == "set_small_data")
-    {
-        return std::make_unique<SetRequestGenerator>("/set_generator", 5);
-    }
-    else if (name == "mixed_small_data")
-    {
-        std::vector<std::unique_ptr<IGenerator>> generators;
-        generators.push_back(std::make_unique<SetRequestGenerator>("/set_generator", 5));
-        generators.push_back(std::make_unique<GetRequestGenerator>("/get_generator", 10, 32));
-        return std::make_unique<MixedRequestGenerator>(std::move(generators));
-    }
-
-    throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown generator {}", name);
+    return request_getter.getRequestGenerator()->generate(default_acls);
+}
+
+void Generator::cleanup(Coordination::ZooKeeper & zookeeper)
+{
+    std::cerr << "---- Cleaning up test data ----" << std::endl;
+    for (const auto & node : root_nodes)
+    {
+        auto node_name = node->name.getString();
+        std::string root_path = std::filesystem::path("/") / node_name;
+        std::cerr << "Cleaning up " << root_path << std::endl;
+        removeRecursive(zookeeper, root_path);
+    }
 }
diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h
index e2c546e4bce..5b4c05b2d8b 100644
--- a/utils/keeper-bench/Generator.h
+++ b/utils/keeper-bench/Generator.h
@@ -6,135 +6,194 @@
 #include <functional>
 #include <optional>
 #include <pcg-random/pcg_random.hpp>
+#include <Poco/Util/AbstractConfiguration.h>
 #include <Common/randomSeed.h>
 
-
-std::string generateRandomPath(const std::string & prefix, size_t length = 5);
-
-std::string generateRandomData(size_t size);
-
-class IGenerator
+struct NumberGetter
 {
-public:
-    IGenerator()
+    static NumberGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional<uint64_t> default_value = std::nullopt);
+    uint64_t getNumber() const;
+    std::string description() const;
+private:
+    struct NumberRange
     {
-        Coordination::ACL acl;
-        acl.permissions = Coordination::ACL::All;
-        acl.scheme = "world";
-        acl.id = "anyone";
-        default_acls.emplace_back(std::move(acl));
-    }
-    virtual void startup(Coordination::ZooKeeper & /*zookeeper*/) {}
-    virtual Coordination::ZooKeeperRequestPtr generate() = 0;
-
-    virtual ~IGenerator() = default;
-
-    Coordination::ACLs default_acls;
+        uint64_t min_value;
+        uint64_t max_value;
+    };
 
+    std::variant<uint64_t, NumberRange> value;
 };
 
-class CreateRequestGenerator final : public IGenerator
+struct StringGetter
 {
-public:
-    explicit CreateRequestGenerator(
-        std::string path_prefix_ = "/create_generator",
-        std::optional<uint64_t> path_length_ = std::nullopt,
-        std::optional<uint64_t> data_size_ = std::nullopt)
-        : path_prefix(path_prefix_)
-        , path_length(path_length_)
-        , data_size(data_size_)
+    explicit StringGetter(NumberGetter number_getter)
+        : value(std::move(number_getter))
     {}
 
-    void startup(Coordination::ZooKeeper & zookeeper) override;
-    Coordination::ZooKeeperRequestPtr generate() override;
+    StringGetter() = default;
 
+    static StringGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config);
+    void setString(std::string name);
+    std::string getString() const;
+    std::string description() const;
+    bool isRandom() const;
 private:
-    std::string path_prefix;
-    std::optional<uint64_t> path_length;
-    std::optional<uint64_t> data_size;
+    std::variant<std::string, NumberGetter> value;
+};
+
+struct PathGetter
+{
+    static PathGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config);
+
+    std::string getPath() const;
+    std::string description() const;
+
+    void initialize(Coordination::ZooKeeper & zookeeper);
+private:
+    std::vector<std::string> parent_paths;
+
+    bool initialized = false;
+
+    std::vector<std::string> paths;
+    mutable std::uniform_int_distribution<size_t> path_picker;
+};
+
+struct RequestGenerator
+{
+    virtual ~RequestGenerator() = default;
+
+    void getFromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config);
+
+    Coordination::ZooKeeperRequestPtr generate(const Coordination::ACLs & acls);
+
+    std::string description();
+
+    void startup(Coordination::ZooKeeper & zookeeper);
+
+    size_t getWeight() const;
+private:
+    virtual void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) = 0;
+    virtual std::string descriptionImpl() = 0;
+    virtual Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) = 0;
+    virtual void startupImpl(Coordination::ZooKeeper &) {}
+
+    size_t weight = 1;
+};
+
+using RequestGeneratorPtr = std::shared_ptr<RequestGenerator>;
+
+struct CreateRequestGenerator final : public RequestGenerator
+{
+    CreateRequestGenerator();
+private:
+    void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override;
+    std::string descriptionImpl() override;
+    Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override;
+    void startupImpl(Coordination::ZooKeeper & zookeeper) override;
+
+    PathGetter parent_path;
+    StringGetter name;
+    std::optional<StringGetter> data;
+
+    std::optional<double> remove_factor;
+    pcg64 rng;
+    std::uniform_real_distribution<double> remove_picker;
+
     std::unordered_set<std::string> paths_created;
 };
 
-
-class GetRequestGenerator final : public IGenerator
+struct SetRequestGenerator final : public RequestGenerator
 {
-public:
-    explicit GetRequestGenerator(
-        std::string path_prefix_ = "/get_generator",
-        std::optional<uint64_t> num_nodes_ = std::nullopt,
-        std::optional<uint64_t> nodes_data_size_ = std::nullopt)
-        : path_prefix(path_prefix_)
-        , num_nodes(num_nodes_)
-        , nodes_data_size(nodes_data_size_)
-        , rng(randomSeed())
-        , distribution(0, num_nodes ? *num_nodes - 1 : 0)
-    {}
-
-    void startup(Coordination::ZooKeeper & zookeeper) override;
-    Coordination::ZooKeeperRequestPtr generate() override;
-
 private:
-    std::string path_prefix;
-    std::optional<uint64_t> num_nodes;
-    std::optional<uint64_t> nodes_data_size;
-    std::vector<std::string> paths_to_get;
+    void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override;
+    std::string descriptionImpl() override;
+    Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override;
+    void startupImpl(Coordination::ZooKeeper & zookeeper) override;
 
-    pcg64 rng;
-    std::uniform_int_distribution<size_t> distribution;
+    PathGetter path;
+    StringGetter data;
 };
 
-class ListRequestGenerator final : public IGenerator
+struct GetRequestGenerator final : public RequestGenerator
 {
-public:
-    explicit ListRequestGenerator(
-        std::string path_prefix_ = "/list_generator",
-        std::optional<uint64_t> num_nodes_ = std::nullopt,
-        std::optional<uint64_t> paths_length_ = std::nullopt)
-        : path_prefix(path_prefix_)
-        , num_nodes(num_nodes_)
-        , paths_length(paths_length_)
-    {}
-
-    void startup(Coordination::ZooKeeper & zookeeper) override;
-    Coordination::ZooKeeperRequestPtr generate() override;
-
 private:
-    std::string path_prefix;
-    std::optional<uint64_t> num_nodes;
-    std::optional<uint64_t> paths_length;
+    void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override;
+    std::string descriptionImpl() override;
+    Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override;
+    void startupImpl(Coordination::ZooKeeper & zookeeper) override;
+
+    PathGetter path;
 };
 
-class SetRequestGenerator final : public IGenerator
+struct ListRequestGenerator final : public RequestGenerator
 {
-public:
-    explicit SetRequestGenerator(
-        std::string path_prefix_ = "/set_generator",
-        uint64_t data_size_ = 5)
-        : path_prefix(path_prefix_)
-        , data_size(data_size_)
-    {}
-
-    void startup(Coordination::ZooKeeper & zookeeper) override;
-    Coordination::ZooKeeperRequestPtr generate() override;
-
 private:
-    std::string path_prefix;
-    uint64_t data_size;
+    void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override;
+    std::string descriptionImpl() override;
+    Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override;
+    void startupImpl(Coordination::ZooKeeper & zookeeper) override;
+
+    PathGetter path;
 };
 
-class MixedRequestGenerator final : public IGenerator
+struct RequestGetter
 {
-public:
-    explicit MixedRequestGenerator(std::vector<std::unique_ptr<IGenerator>> generators_)
-        : generators(std::move(generators_))
-    {}
+    explicit RequestGetter(std::vector<RequestGeneratorPtr> request_generators_);
 
-    void startup(Coordination::ZooKeeper & zookeeper) override;
-    Coordination::ZooKeeperRequestPtr generate() override;
+    RequestGetter() = default;
 
+    static RequestGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, bool for_multi = false);
+
+    RequestGeneratorPtr getRequestGenerator() const;
+    std::string description() const;
+    void startup(Coordination::ZooKeeper & zookeeper);
+    const std::vector<RequestGeneratorPtr> & requestGenerators() const;
 private:
-    std::vector<std::unique_ptr<IGenerator>> generators;
+    std::vector<RequestGeneratorPtr> request_generators;
+    std::vector<size_t> weights;
+    mutable std::uniform_int_distribution<size_t> request_generator_picker;
 };
 
+struct MultiRequestGenerator final : public RequestGenerator
+{
+private:
+    void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override;
+    std::string descriptionImpl() override;
+    Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override;
+    void startupImpl(Coordination::ZooKeeper & zookeeper) override;
 
-std::unique_ptr<IGenerator> getGenerator(const std::string & name);
+    std::optional<NumberGetter> size;
+    RequestGetter request_getter;
+};
+
+class Generator
+{
+public:
+    explicit Generator(const Poco::Util::AbstractConfiguration & config);
+
+    void startup(Coordination::ZooKeeper & zookeeper);
+    Coordination::ZooKeeperRequestPtr generate();
+    void cleanup(Coordination::ZooKeeper & zookeeper);
+private:
+    struct Node
+    {
+        StringGetter name;
+        std::optional<StringGetter> data;
+        std::vector<std::shared_ptr<Node>> children;
+        size_t repeat_count = 0;
+
+        std::shared_ptr<Node> clone() const;
+
+        void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const;
+        void dumpTree(int level = 0) const;
+    };
+
+    static std::shared_ptr<Node> parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config);
+
+    std::uniform_int_distribution<size_t> request_picker;
+    std::vector<std::shared_ptr<Node>> root_nodes;
+    RequestGetter request_getter;
+    Coordination::ACLs default_acls;
+};
+
+std::optional<Generator> getGenerator(const std::string & name);
diff --git a/utils/keeper-bench/README.md b/utils/keeper-bench/README.md
new file mode 100644
index 00000000000..8b498228799
--- /dev/null
+++ b/utils/keeper-bench/README.md
@@ -0,0 +1,317 @@
+# Keeper Bench
+
+Keeper Bench is a tool for benchmarking Keeper or any ZooKeeper compatible systems.
+
+To run it call following command from the build folder:
+
+```
+./utils/keeper-bench --config benchmark_config_file.yaml
+```
+
+## Configuration file
+
+Keeper Bench runs need to be configured inside a yaml or XML file.
+An example of a configuration file can be found in `./utils/keeper-bench/example.yaml`
+
+### Table of contents
+- [Special Types](#special-types)
+- [General settings](#general-settings)
+- [Connections](#connections)
+- [Generator](#generator)
+- [Output](#output)
+
+<a name="special-types"></a>
+## Special types
+
+### IntegerGetter
+
+Can be defined with constant integer or as a random value from a range.
+
+```yaml
+key: integer
+key:
+    min_value: integer
+    max_value: integer
+```
+
+Example for a constant value:
+
+```yaml
+some_key: 2
+```
+
+Example for random value from [10, 20]:
+
+```yaml
+some_key:
+    min_value: 10
+    max_value: 20
+```
+
+### StringGetter
+
+Can be defined with constant string or as a random string of some size.
+
+```yaml
+key: string
+key:
+    random_string:
+        size: IntegerGetter
+```
+
+Example for a constant value:
+```yaml
+some_key: "string"
+```
+
+Example for a random string with a random size from [10, 20]:
+```yaml
+some_key:
+    random_string:
+        size:
+            min_value: 10
+            max_value: 20
+```
+
+
+### PathGetter
+
+If a section contains one or more `path` keys, all `path` keys are collected into a list. \
+Additionally, paths can be defined with key `children_of` which will add all children of some path to the list.
+
+```yaml
+path: string
+path:
+    children_of: string
+```
+
+Example for defining list of paths (`/path1`, `/path2` and children of `/path3`):
+
+```yaml
+main:
+    path:
+        - "/path1"
+        - "/path2"
+    path:
+        children_of: "/path3"
+```
+
+<a name="general-settings"></a>
+## General settings
+
+```yaml
+# number of parallel queries (default: 1)
+concurrency: integer
+
+# amount of queries to be executed, set 0 to disable limit (default: 0)
+iterations: integer
+
+# delay between intermediate reports in seconds, set 0 to disable reports (default: 1.0)
+report_delay: double
+
+# stop launch of queries after specified time limit, set 0 to disable limit (default: 0)
+timelimit: double
+
+# continue testing even if a query fails (default: false)
+continue_on_errors: boolean
+```
+
+<a name="connections"></a>
+## Connections
+
+Connection definitions that will be used throughout tests defined under `connections` key.
+
+Following configurations can be defined under `connections` key or for each specific connection. \
+If it's defined under `connections` key, it will be used by default unless a specific connection overrides it.
+
+```yaml
+secure: boolean
+operation_timeout_ms: integer
+session_timeout_ms: integer
+connection_timeout_ms: integer
+```
+
+Specific configuration can be defined with a string or with a detailed description.
+
+```yaml
+host: string
+connection:
+    host: string
+
+    # number of sessions to create for host
+    sessions: integer
+    # any connection configuration defined above
+```
+
+Example definition of 3 connections in total, 1 to `localhost:9181` and 2 to `localhost:9182` both will use secure connections:
+
+```yaml
+connections:
+    secure: true
+
+    host: "localhost:9181"
+    connection:
+        host: "localhost:9182"
+        sessions: 2
+```
+
+<a name="generator"></a>
+## Generator
+
+Main part of the benchmark is the generator itself which creates necessary nodes and defines how the requests will be generated. \
+It is defined under `generator` key.
+
+### Setup
+
+Setup defines nodes that are needed for test, defined under `setup` key.
+
+Each node is defined with a `node` key in the following format:
+
+```yaml
+node: StringGetter
+
+node:
+    name: StringGetter
+    data: StringGetter
+    repeat: integer
+    node: Node
+```
+
+If only string is defined, a node with that name will be created. \
+Otherwise more detailed definition could be included to set data or the children of the node. \
+If `repeat` key is set, the node definition will be used multiple times. For a `repeat` key to be valid, the name of the node needs to be a random string.
+
+Example for a setup:
+
+```yaml
+generator:
+    setup:
+        node: "node1"
+            node:
+                name:
+                    random_string:
+                        size: 20
+                data: "somedata"
+                repeat: 4
+        node:
+            name:
+                random_string:
+                    size: 10
+            repeat: 2
+```
+
+We will create node `/node1` with no data and 4 children of random name of size 20 and data set to `somedata`. \
+We will also create 2 nodes with no data and random name of size 10 under `/` node.
+
+### Requests
+
+While benchmark is running, we are generating requests.
+
+Request generator is defined under `requests` key. \
+For each request `weight` (default: 1) can be defined which defines preference for a certain request.
+
+#### `create`
+
+```yaml
+create:
+    # parent path for created nodes
+    path: string
+
+    # length of the name for the create node (default: 5)
+    name_length: IntegerGetter
+
+    # data for create nodes (default: "")
+    data: StringGetter
+
+    # value in range [0.0, 1.0> denoting how often a remove request should be generated compared to create request (default: 0)
+    remove_factor: double
+```
+
+#### `set`
+
+```yaml
+set:
+    # paths on which we randomly set data
+    path: PathGetter
+
+    # data to set
+    data: StringGetter
+```
+
+#### `get`
+
+```yaml
+get:
+    # paths for which we randomly get data
+    path: PathGetter
+```
+
+#### `list`
+
+```yaml
+list:
+    # paths for which we randomly do list request
+    path: PathGetter
+```
+
+#### `multi`
+
+```yaml
+multi:
+    # any request definition defined above can be added
+
+    # optional size for the multi request
+    size: IntegerGetter
+```
+
+Multi request definition can contain any other request generator definitions described above. \
+If `size` key is defined, we will randomly pick `size` amount of requests from defined request generators. \
+All request generators can have a higher pick probability by using `weight` key. \
+If `size` is not defined, multi request with same request generators will always be generated. \
+Both write and read multi requests are supported.
+
+#### Example
+
+```yaml
+generator:
+    requests:
+        create:
+            path: "/test_create"
+            name_length:
+                min_value: 10
+                max_value: 20
+        multi:
+            weight: 20
+            size: 10
+            get:
+                path:
+                    children_of: "/test_get1"
+            get:
+                weight: 2
+                path:
+                    children_of: "/test_get2"
+```
+
+We defined a request geneator that will generate either a `create` or a `multi` request. \
+Each `create` request will create a node under `/test_create` with a randomly generated name with size from range `[10, 20]`. \
+`multi` request will be generated 20 times more than `create` request. \
+`multi` request will contain 10 requests and approximately twice as much get requests to children of "/test_get2".
+
+<a name="output"></a>
+## Output
+
+```yaml
+output:
+    # if defined, JSON output of results will be stored at the defined path
+    file: string
+    # or
+    file:
+        # if defined, JSON output of results will be stored at the defined path
+        path: string
+
+        # if set to true, timestamp will be appended to the output file name (default: false)
+        with_timestamp: boolean
+
+    # if set to true, output will be printed to stdout also (default: false)
+    stdout: boolean
+```
diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index c858b476483..f86d2b44dd7 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -1,15 +1,160 @@
 #include "Runner.h"
+#include <Poco/Util/AbstractConfiguration.h>
 
-namespace DB
+#include "Common/ZooKeeper/ZooKeeperCommon.h"
+#include "Common/ZooKeeper/ZooKeeperConstants.h"
+#include <Common/EventNotifier.h>
+#include <Common/Config/ConfigProcessor.h>
+#include "IO/ReadBufferFromString.h"
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/copyData.h>
+
+namespace CurrentMetrics
 {
+    extern const Metric LocalThread;
+    extern const Metric LocalThreadActive;
+}
 
-namespace ErrorCodes
+namespace DB::ErrorCodes
 {
     extern const int CANNOT_BLOCK_SIGNAL;
+    extern const int BAD_ARGUMENTS;
 }
 
+Runner::Runner(
+        std::optional<size_t> concurrency_,
+        const std::string & config_path,
+        const Strings & hosts_strings_,
+        std::optional<double> max_time_,
+        std::optional<double> delay_,
+        std::optional<bool> continue_on_error_,
+        std::optional<size_t> max_iterations_)
+        : info(std::make_shared<Stats>())
+{
+
+    DB::ConfigProcessor config_processor(config_path, true, false);
+    auto config = config_processor.loadConfig().configuration;
+
+    generator.emplace(*config);
+
+    if (!hosts_strings_.empty())
+    {
+        for (const auto & host : hosts_strings_)
+            connection_infos.push_back({.host = host});
+    }
+    else
+    {
+        if (!config)
+            throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No config file or hosts defined");
+
+        parseHostsFromConfig(*config);
+    }
+
+    std::cerr << "---- Run options ---- " << std::endl;
+    static constexpr uint64_t DEFAULT_CONCURRENCY = 1;
+    if (concurrency_)
+        concurrency = *concurrency_;
+    else
+        concurrency = config->getUInt64("concurrency", DEFAULT_CONCURRENCY);
+    std::cerr << "Concurrency: " << concurrency << std::endl;
+
+    static constexpr uint64_t DEFAULT_ITERATIONS = 0;
+    if (max_iterations_)
+        max_iterations = *max_iterations_;
+    else
+        max_iterations = config->getUInt64("iterations", DEFAULT_ITERATIONS);
+    std::cerr << "Iterations: " << max_iterations << std::endl;
+
+    static constexpr double DEFAULT_DELAY = 1.0;
+    if (delay_)
+        delay = *delay_;
+    else
+        delay = config->getDouble("report_delay", DEFAULT_DELAY);
+    std::cerr << "Report delay: " << delay << std::endl;
+
+    static constexpr double DEFAULT_TIME_LIMIT = 0.0;
+    if (max_time_)
+        max_time = *max_time_;
+    else
+        max_time = config->getDouble("timelimit", DEFAULT_TIME_LIMIT);
+    std::cerr << "Time limit: " << max_time << std::endl;
+
+    if (continue_on_error_)
+        continue_on_error = *continue_on_error_;
+    else
+        continue_on_error = config->getBool("continue_on_error", false);
+    std::cerr << "Continue on error: " << continue_on_error << std::endl;
+
+    static const std::string output_key = "output";
+    print_to_stdout = config->getBool(output_key + ".stdout", false);
+    std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl;
+
+    static const std::string output_file_key = output_key + ".file";
+    if (config->has(output_file_key))
+    {
+        if (config->has(output_file_key + ".path"))
+        {
+            file_output = config->getString(output_file_key + ".path");
+            output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp");
+        }
+        else
+            file_output = config->getString(output_file_key);
+
+        std::cerr << "Result file path: " << file_output->string() << std::endl;
+    }
+
+    std::cerr << "---- Run options ----\n" << std::endl;
+
+    pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency);
+    queue.emplace(concurrency);
 }
 
+void Runner::parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config)
+{
+    ConnectionInfo default_connection_info;
+
+    const auto fill_connection_details = [&](const std::string & key, auto & connection_info)
+    {
+        if (config.has(key + ".secure"))
+            connection_info.secure = config.getBool(key + ".secure");
+
+        if (config.has(key + ".session_timeout_ms"))
+            connection_info.session_timeout_ms = config.getInt(key + ".session_timeout_ms");
+
+        if (config.has(key + ".operation_timeout_ms"))
+            connection_info.operation_timeout_ms = config.getInt(key + ".operation_timeout_ms");
+
+        if (config.has(key + ".connection_timeout_ms"))
+            connection_info.connection_timeout_ms = config.getInt(key + ".connection_timeout_ms");
+    };
+
+    fill_connection_details("connections", default_connection_info);
+
+    Poco::Util::AbstractConfiguration::Keys connections_keys;
+    config.keys("connections", connections_keys);
+
+    for (const auto & key : connections_keys)
+    {
+        std::string connection_key = "connections." + key;
+        auto connection_info = default_connection_info;
+        if (key.starts_with("host"))
+        {
+            connection_info.host = config.getString(connection_key);
+            connection_infos.push_back(std::move(connection_info));
+        }
+        else if (key.starts_with("connection") && key != "connection_timeout_ms")
+        {
+            connection_info.host = config.getString(connection_key + ".host");
+            if (config.has(connection_key + ".sessions"))
+                connection_info.sessions = config.getUInt64(connection_key + ".sessions");
+
+            fill_connection_details(connection_key, connection_info);
+
+            connection_infos.push_back(std::move(connection_info));
+        }
+    }
+}
 
 void Runner::thread(std::vector<std::shared_ptr<Coordination::ZooKeeper>> zookeepers)
 {
@@ -33,7 +178,7 @@ void Runner::thread(std::vector<std::shared_ptr<Coordination::ZooKeeper>> zookee
 
         while (!extracted)
         {
-            extracted = queue.tryPop(request, 100);
+            extracted = queue->tryPop(request, 100);
 
             if (shutdown
                 || (max_iterations && requests_executed >= max_iterations))
@@ -47,9 +192,35 @@ void Runner::thread(std::vector<std::shared_ptr<Coordination::ZooKeeper>> zookee
 
         auto promise = std::make_shared<std::promise<size_t>>();
         auto future = promise->get_future();
-        Coordination::ResponseCallback callback = [promise](const Coordination::Response & response)
+        Coordination::ResponseCallback callback = [&request, promise](const Coordination::Response & response)
         {
-            if (response.error != Coordination::Error::ZOK)
+            bool set_exception = true;
+
+            if (response.error == Coordination::Error::ZOK)
+            {
+                set_exception = false;
+            }
+            else if (response.error == Coordination::Error::ZNONODE)
+            {
+                /// remove can fail with ZNONODE because of different order of execution
+                /// of generated create and remove requests
+                /// this is okay for concurrent runs
+                if (dynamic_cast<const Coordination::ZooKeeperRemoveResponse *>(&response))
+                    set_exception = false;
+                else if (const auto * multi_response = dynamic_cast<const Coordination::ZooKeeperMultiResponse *>(&response))
+                {
+                    const auto & responses = multi_response->responses;
+                    size_t i = 0;
+                    while (responses[i]->error != Coordination::Error::ZNONODE)
+                        ++i;
+
+                    const auto & multi_request = dynamic_cast<const Coordination::ZooKeeperMultiRequest &>(*request);
+                    if (dynamic_cast<const Coordination::ZooKeeperRemoveRequest *>(&*multi_request.requests[i]))
+                        set_exception = false;
+                }
+            }
+
+            if (set_exception)
                 promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error)));
             else
                 promise->set_value(response.bytesSize());
@@ -62,14 +233,14 @@ void Runner::thread(std::vector<std::shared_ptr<Coordination::ZooKeeper>> zookee
         try
         {
             auto response_size = future.get();
-            double seconds = watch.elapsedSeconds();
+            auto microseconds = watch.elapsedMicroseconds();
 
             std::lock_guard lock(mutex);
 
             if (request->isReadRequest())
-                info->addRead(seconds, 1, request->bytesSize() + response_size);
+                info->addRead(microseconds, 1, request->bytesSize() + response_size);
             else
-                info->addWrite(seconds, 1, request->bytesSize() + response_size);
+                info->addWrite(microseconds, 1, request->bytesSize() + response_size);
         }
         catch (...)
         {
@@ -95,7 +266,7 @@ void Runner::thread(std::vector<std::shared_ptr<Coordination::ZooKeeper>> zookee
                 {
                     try
                     {
-                        zookeepers = getConnections();
+                        zookeepers = refreshConnections();
                         break;
                     }
                     catch (...)
@@ -110,13 +281,13 @@ void Runner::thread(std::vector<std::shared_ptr<Coordination::ZooKeeper>> zookee
     }
 }
 
-bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr & request, DB::InterruptListener & interrupt_listener)
+bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && request, DB::InterruptListener & interrupt_listener)
 {
     bool inserted = false;
 
     while (!inserted)
     {
-        inserted = queue.tryPush(request, 100);
+        inserted = queue->tryPush(std::move(request), 100);
 
         if (shutdown)
         {
@@ -126,13 +297,13 @@ bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr
 
         if (max_time > 0 && total_watch.elapsedSeconds() >= max_time)
         {
-            std::cout << "Stopping launch of queries. Requested time limit is exhausted.\n";
+            std::cerr << "Stopping launch of queries. Requested time limit is exhausted.\n";
             return false;
         }
 
         if (interrupt_listener.check())
         {
-            std::cout << "Stopping launch of queries. SIGINT received." << std::endl;
+            std::cerr << "Stopping launch of queries. SIGINT received." << std::endl;
             return false;
         }
 
@@ -141,7 +312,7 @@ bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr
             printNumberOfRequestsExecuted(requests_executed);
 
             std::lock_guard lock(mutex);
-            report(info, concurrency);
+            info->report(concurrency);
             delay_watch.restart();
         }
     }
@@ -152,23 +323,26 @@ bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr
 
 void Runner::runBenchmark()
 {
-    auto aux_connections = getConnections();
+    createConnections();
 
     std::cerr << "Preparing to run\n";
-    generator->startup(*aux_connections[0]);
+    generator->startup(*connections[0]);
     std::cerr << "Prepared\n";
+
+    auto start_timestamp_ms = Poco::Timestamp().epochMicroseconds() / 1000;
+
     try
     {
-        auto connections = getConnections();
         for (size_t i = 0; i < concurrency; ++i)
         {
-            pool.scheduleOrThrowOnError([this, connections]() mutable { thread(connections); });
+            auto thread_connections = connections;
+            pool->scheduleOrThrowOnError([this, connections = std::move(thread_connections)]() mutable { thread(connections); });
         }
     }
     catch (...)
     {
         shutdown = true;
-        pool.wait();
+        pool->wait();
         throw;
     }
 
@@ -185,31 +359,102 @@ void Runner::runBenchmark()
         }
     }
 
-    pool.wait();
+    pool->wait();
     total_watch.stop();
 
     printNumberOfRequestsExecuted(requests_executed);
 
     std::lock_guard lock(mutex);
-    report(info, concurrency);
-}
+    info->report(concurrency);
 
+    DB::WriteBufferFromOwnString out;
+    info->writeJSON(out, concurrency, start_timestamp_ms);
+    auto output_string = std::move(out.str());
 
-std::vector<std::shared_ptr<Coordination::ZooKeeper>> Runner::getConnections()
-{
-    std::vector<std::shared_ptr<Coordination::ZooKeeper>> zookeepers;
-    for (const auto & host_string : hosts_strings)
+    if (print_to_stdout)
+        std::cout << output_string << std::endl;
+
+    if (file_output)
     {
-        Coordination::ZooKeeper::Node node{Poco::Net::SocketAddress{host_string}, false};
-        std::vector<Coordination::ZooKeeper::Node> nodes;
-        nodes.push_back(node);
-        zkutil::ZooKeeperArgs args;
-        args.session_timeout_ms = 30000;
-        args.connection_timeout_ms = 1000;
-        args.operation_timeout_ms = 10000;
-        zookeepers.emplace_back(std::make_shared<Coordination::ZooKeeper>(nodes, args, nullptr));
+        auto path = *file_output;
+
+        if (output_file_with_timestamp)
+        {
+            auto filename = file_output->filename();
+            filename = fmt::format("{}_{}{}", filename.stem().generic_string(), start_timestamp_ms, filename.extension().generic_string());
+            path = file_output->parent_path() / filename;
+        }
+
+        std::cerr << "Storing output to " << path << std::endl;
+
+        DB::WriteBufferFromFile file_output_buffer(path);
+        DB::ReadBufferFromString read_buffer(output_string);
+        DB::copyData(read_buffer, file_output_buffer);
     }
-
-
-    return zookeepers;
 }
+
+
+void Runner::createConnections()
+{
+    DB::EventNotifier::init();
+    std::cerr << "---- Creating connections ---- " << std::endl;
+    for (size_t connection_info_idx = 0; connection_info_idx < connection_infos.size(); ++connection_info_idx)
+    {
+        const auto & connection_info = connection_infos[connection_info_idx];
+        std::cerr << fmt::format("Creating {} session(s) for:\n"
+                                 "- host: {}\n"
+                                 "- secure: {}\n"
+                                 "- session timeout: {}ms\n"
+                                 "- operation timeout: {}ms\n"
+                                 "- connection timeout: {}ms",
+                                 connection_info.sessions,
+                                 connection_info.host,
+                                 connection_info.secure,
+                                 connection_info.session_timeout_ms,
+                                 connection_info.operation_timeout_ms,
+                                 connection_info.connection_timeout_ms) << std::endl;
+
+        for (size_t session = 0; session < connection_info.sessions; ++session)
+        {
+            connections.emplace_back(getConnection(connection_info));
+            connections_to_info_map[connections.size() - 1] = connection_info_idx;
+        }
+    }
+    std::cerr << "---- Done creating connections ----\n" << std::endl;
+}
+
+std::shared_ptr<Coordination::ZooKeeper> Runner::getConnection(const ConnectionInfo & connection_info)
+{
+    Coordination::ZooKeeper::Node node{Poco::Net::SocketAddress{connection_info.host}, connection_info.secure};
+    std::vector<Coordination::ZooKeeper::Node> nodes;
+    nodes.push_back(node);
+    zkutil::ZooKeeperArgs args;
+    args.session_timeout_ms = connection_info.session_timeout_ms;
+    args.connection_timeout_ms = connection_info.operation_timeout_ms;
+    args.operation_timeout_ms = connection_info.connection_timeout_ms;
+    return std::make_shared<Coordination::ZooKeeper>(nodes, args, nullptr);
+}
+
+std::vector<std::shared_ptr<Coordination::ZooKeeper>> Runner::refreshConnections()
+{
+    std::lock_guard lock(connection_mutex);
+    for (size_t connection_idx = 0; connection_idx < connections.size(); ++connection_idx)
+    {
+        auto & connection = connections[connection_idx];
+        if (connection->isExpired())
+        {
+            const auto & connection_info = connection_infos[connections_to_info_map[connection_idx]];
+            connection = getConnection(connection_info);
+        }
+    }
+    return connections;
+}
+
+Runner::~Runner()
+{
+    queue->clearAndFinish();
+    shutdown = true;
+    pool->wait();
+    generator->cleanup(*connections[0]);
+}
+
diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h
index a00b7b43eff..f899f1d538d 100644
--- a/utils/keeper-bench/Runner.h
+++ b/utils/keeper-bench/Runner.h
@@ -1,50 +1,35 @@
 #pragma once
+#include "Common/ZooKeeper/ZooKeeperConstants.h"
 #include <Common/ZooKeeper/ZooKeeperImpl.h>
 #include "Generator.h"
 #include <Common/ZooKeeper/IKeeper.h>
+#include <Common/Config/ConfigProcessor.h>
 #include <Common/ZooKeeper/ZooKeeperCommon.h>
 #include <Common/Stopwatch.h>
 #include <Common/ThreadPool.h>
-#include <pcg-random/pcg_random.hpp>
-#include <Common/randomSeed.h>
 #include <Common/InterruptListener.h>
 #include <Common/CurrentMetrics.h>
 
 #include <Core/Types.h>
+#include <Poco/Util/AbstractConfiguration.h>
 #include "Stats.h"
 
+#include <filesystem>
+
 using Ports = std::vector<UInt16>;
 using Strings = std::vector<std::string>;
 
-namespace CurrentMetrics
-{
-    extern const Metric LocalThread;
-    extern const Metric LocalThreadActive;
-}
-
 class Runner
 {
 public:
     Runner(
-        size_t concurrency_,
-        const std::string & generator_name,
+        std::optional<size_t> concurrency_,
+        const std::string & config_path,
         const Strings & hosts_strings_,
-        double max_time_,
-        double delay_,
-        bool continue_on_error_,
-        size_t max_iterations_)
-        : concurrency(concurrency_)
-        , pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency)
-        , hosts_strings(hosts_strings_)
-        , generator(getGenerator(generator_name))
-        , max_time(max_time_)
-        , delay(delay_)
-        , continue_on_error(continue_on_error_)
-        , max_iterations(max_iterations_)
-        , info(std::make_shared<Stats>())
-        , queue(concurrency)
-    {
-    }
+        std::optional<double> max_time_,
+        std::optional<double> delay_,
+        std::optional<bool> continue_on_error_,
+        std::optional<size_t> max_iterations_);
 
     void thread(std::vector<std::shared_ptr<Coordination::ZooKeeper>> zookeepers);
 
@@ -53,18 +38,19 @@ public:
         std::cerr << "Requests executed: " << num << ".\n";
     }
 
-    bool tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr & request, DB::InterruptListener & interrupt_listener);
+    bool tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && request, DB::InterruptListener & interrupt_listener);
 
     void runBenchmark();
 
-
+    ~Runner();
 private:
+    void parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config);
 
     size_t concurrency = 1;
 
-    ThreadPool pool;
-    Strings hosts_strings;
-    std::unique_ptr<IGenerator> generator;
+    std::optional<ThreadPool> pool;
+
+    std::optional<Generator> generator;
     double max_time = 0;
     double delay = 1;
     bool continue_on_error = false;
@@ -73,6 +59,9 @@ private:
     std::atomic<bool> shutdown = false;
 
     std::shared_ptr<Stats> info;
+    bool print_to_stdout;
+    std::optional<std::filesystem::path> file_output;
+    bool output_file_with_timestamp;
 
     Stopwatch total_watch;
     Stopwatch delay_watch;
@@ -80,7 +69,26 @@ private:
     std::mutex mutex;
 
     using Queue = ConcurrentBoundedQueue<Coordination::ZooKeeperRequestPtr>;
-    Queue queue;
+    std::optional<Queue> queue;
 
-    std::vector<std::shared_ptr<Coordination::ZooKeeper>> getConnections();
+    struct ConnectionInfo
+    {
+        std::string host;
+
+        bool secure = false;
+        int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS;
+        int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS;
+        int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS;
+
+        size_t sessions = 1;
+    };
+
+    std::mutex connection_mutex;
+    std::vector<ConnectionInfo> connection_infos;
+    std::vector<std::shared_ptr<Coordination::ZooKeeper>> connections;
+    std::unordered_map<size_t, size_t> connections_to_info_map;
+
+    void createConnections();
+    std::shared_ptr<Coordination::ZooKeeper> getConnection(const ConnectionInfo & connection_info);
+    std::vector<std::shared_ptr<Coordination::ZooKeeper>> refreshConnections();
 };
diff --git a/utils/keeper-bench/Stats.cpp b/utils/keeper-bench/Stats.cpp
index 1f8b02ed09d..f5e5f84ba14 100644
--- a/utils/keeper-bench/Stats.cpp
+++ b/utils/keeper-bench/Stats.cpp
@@ -1,67 +1,177 @@
 #include "Stats.h"
 #include <iostream>
 
-void report(std::shared_ptr<Stats> & info, size_t concurrency)
+#include <rapidjson/document.h>
+#include <rapidjson/rapidjson.h>
+#include <rapidjson/writer.h>
+#include <rapidjson/stringbuffer.h>
+
+void Stats::StatsCollector::add(uint64_t microseconds, size_t requests_inc, size_t bytes_inc)
+{
+    work_time += microseconds;
+    requests += requests_inc;
+    requests_bytes += bytes_inc;
+    sampler.insert(microseconds);
+}
+
+void Stats::addRead(uint64_t microseconds, size_t requests_inc, size_t bytes_inc)
+{
+    read_collector.add(microseconds, requests_inc, bytes_inc);
+}
+
+void Stats::addWrite(uint64_t microseconds, size_t requests_inc, size_t bytes_inc)
+{
+    write_collector.add(microseconds, requests_inc, bytes_inc);
+}
+
+void Stats::StatsCollector::clear()
+{
+    requests = 0;
+    work_time = 0;
+    requests_bytes = 0;
+    sampler.clear();
+}
+
+void Stats::clear()
+{
+    read_collector.clear();
+    write_collector.clear();
+}
+
+std::pair<double, double> Stats::StatsCollector::getThroughput(size_t concurrency)
+{
+    assert(requests != 0);
+    double seconds = work_time / 1'000'000.0 / concurrency;
+
+    return {requests / seconds, requests_bytes / seconds};
+}
+
+double Stats::StatsCollector::getPercentile(double percent)
+{
+    return sampler.quantileNearest(percent / 100.0) / 1000.0;
+}
+
+void Stats::report(size_t concurrency)
 {
     std::cerr << "\n";
 
+    const auto & read_requests = read_collector.requests;
+    const auto & write_requests = write_collector.requests;
+
     /// Avoid zeros, nans or exceptions
-    if (0 == info->read_requests && 0 == info->write_requests)
+    if (0 == read_requests && 0 == write_requests)
         return;
 
-    double read_seconds = info->read_work_time / concurrency;
-    double write_seconds = info->write_work_time / concurrency;
+    auto [read_rps, read_bps] = read_collector.getThroughput(concurrency);
+    auto [write_rps, write_bps] = write_collector.getThroughput(concurrency);
 
-    std::cerr << "read requests " << info->read_requests << ", write requests " << info->write_requests << ", ";
-    if (info->errors)
-    {
-        std::cerr << "errors " << info->errors << ", ";
-    }
-    if (0 != info->read_requests)
+    std::cerr << "read requests " << read_requests << ", write requests " << write_requests << ", ";
+    if (errors)
+        std::cerr << "errors " << errors << ", ";
+
+    if (0 != read_requests)
     {
         std::cerr
-            << "Read RPS: " << (info->read_requests / read_seconds) << ", "
-            << "Read MiB/s: " << (info->requests_read_bytes / read_seconds / 1048576);
-        if (0 != info->write_requests)
+            << "Read RPS: " << read_rps << ", "
+            << "Read MiB/s: " << read_bps / 1048576;
+
+        if (0 != write_requests)
             std::cerr << ", ";
     }
-    if (0 != info->write_requests)
+
+    if (0 != write_requests)
     {
         std::cerr
-            << "Write RPS: " << (info->write_requests / write_seconds) << ", "
-            << "Write MiB/s: " << (info->requests_write_bytes / write_seconds / 1048576) << ". "
+            << "Write RPS: " << write_rps << ", "
+            << "Write MiB/s: " << write_bps / 1048576 << ". "
             << "\n";
     }
     std::cerr << "\n";
 
-    auto print_percentile = [&](double percent, Stats::Sampler & sampler)
+    auto print_percentile = [&](double percent, Stats::StatsCollector & collector)
     {
         std::cerr << percent << "%\t\t";
-        std::cerr << sampler.quantileNearest(percent / 100.0) << " sec.\t";
+        std::cerr << collector.getPercentile(percent) << " msec.\t";
         std::cerr << "\n";
     };
 
-    if (0 != info->read_requests)
+    const auto print_all_percentiles = [&](auto & collector)
+    {
+        for (int percent = 0; percent <= 90; percent += 10)
+            print_percentile(percent, collector);
+
+        print_percentile(95, collector);
+        print_percentile(99, collector);
+        print_percentile(99.9, collector);
+        print_percentile(99.99, collector);
+    };
+
+    if (0 != read_requests)
     {
         std::cerr << "Read sampler:\n";
-        for (int percent = 0; percent <= 90; percent += 10)
-            print_percentile(percent, info->read_sampler);
-
-        print_percentile(95, info->read_sampler);
-        print_percentile(99, info->read_sampler);
-        print_percentile(99.9, info->read_sampler);
-        print_percentile(99.99, info->read_sampler);
+        print_all_percentiles(read_collector);
     }
 
-    if (0 != info->write_requests)
+    if (0 != write_requests)
     {
         std::cerr << "Write sampler:\n";
-        for (int percent = 0; percent <= 90; percent += 10)
-            print_percentile(percent, info->write_sampler);
-
-        print_percentile(95, info->write_sampler);
-        print_percentile(99, info->write_sampler);
-        print_percentile(99.9, info->write_sampler);
-        print_percentile(99.99, info->write_sampler);
+        print_all_percentiles(write_collector);
     }
 }
+
+void Stats::writeJSON(DB::WriteBuffer & out, size_t concurrency, int64_t start_timestamp)
+{
+    using namespace rapidjson;
+    Document results;
+    auto & allocator = results.GetAllocator();
+    results.SetObject();
+
+    results.AddMember("timestamp", Value(start_timestamp), allocator);
+
+    const auto get_results = [&](auto & collector)
+    {
+        Value specific_results(kObjectType);
+
+        specific_results.AddMember("total_requests", Value(collector.requests), allocator);
+
+        auto [rps, bps] = collector.getThroughput(concurrency);
+        specific_results.AddMember("requests_per_second", Value(rps), allocator);
+        specific_results.AddMember("bytes_per_second", Value(bps), allocator);
+
+        Value percentiles(kArrayType);
+
+        const auto add_percentile = [&](double percent)
+        {
+            Value percentile(kObjectType);
+            Value percent_key(fmt::format("{:.2f}", percent).c_str(), allocator);
+            percentile.AddMember(percent_key, Value(collector.getPercentile(percent)), allocator);
+            percentiles.PushBack(percentile, allocator);
+        };
+
+        for (int percent = 0; percent <= 90; percent += 10)
+            add_percentile(percent);
+
+        add_percentile(95);
+        add_percentile(99);
+        add_percentile(99.9);
+        add_percentile(99.99);
+
+        specific_results.AddMember("percentiles", percentiles, allocator);
+
+        return specific_results;
+    };
+
+    if (read_collector.requests != 0)
+        results.AddMember("read_results", get_results(read_collector), results.GetAllocator());
+
+    if (write_collector.requests != 0)
+        results.AddMember("write_results", get_results(write_collector), results.GetAllocator());
+
+    StringBuffer strbuf;
+    strbuf.Clear();
+    Writer<StringBuffer> writer(strbuf);
+    results.Accept(writer);
+
+    const char * output_string = strbuf.GetString();
+    out.write(output_string, strlen(output_string));
+}
diff --git a/utils/keeper-bench/Stats.h b/utils/keeper-bench/Stats.h
index 1b9a31bb734..bc50588e837 100644
--- a/utils/keeper-bench/Stats.h
+++ b/utils/keeper-bench/Stats.h
@@ -5,48 +5,38 @@
 
 #include <AggregateFunctions/ReservoirSampler.h>
 
+#include <base/JSON.h>
+
 struct Stats
 {
-    std::atomic<size_t> read_requests{0};
-    std::atomic<size_t> write_requests{0};
     size_t errors = 0;
-    size_t requests_write_bytes = 0;
-    size_t requests_read_bytes = 0;
-    double read_work_time = 0;
-    double write_work_time = 0;
 
     using Sampler = ReservoirSampler<double>;
-    Sampler read_sampler {1 << 16};
-    Sampler write_sampler {1 << 16};
-
-    void addRead(double seconds, size_t requests_inc, size_t bytes_inc)
+    struct StatsCollector
     {
-        read_work_time += seconds;
-        read_requests += requests_inc;
-        requests_read_bytes += bytes_inc;
-        read_sampler.insert(seconds);
-    }
+        std::atomic<size_t> requests{0};
+        uint64_t requests_bytes = 0;
+        uint64_t work_time = 0;
+        Sampler sampler;
 
-    void addWrite(double seconds, size_t requests_inc, size_t bytes_inc)
-    {
-        write_work_time += seconds;
-        write_requests += requests_inc;
-        requests_write_bytes += bytes_inc;
-        write_sampler.insert(seconds);
-    }
+        /// requests/second, bytes/second
+        std::pair<double, double> getThroughput(size_t concurrency);
+        double getPercentile(double percent);
 
-    void clear()
-    {
-        read_requests = 0;
-        write_requests = 0;
-        read_work_time = 0;
-        write_work_time = 0;
-        requests_read_bytes = 0;
-        requests_write_bytes = 0;
-        read_sampler.clear();
-        write_sampler.clear();
-    }
+        void add(uint64_t microseconds, size_t requests_inc, size_t bytes_inc);
+        void clear();
+    };
+
+    StatsCollector read_collector;
+    StatsCollector write_collector;
+
+    void addRead(uint64_t microseconds, size_t requests_inc, size_t bytes_inc);
+    void addWrite(uint64_t microseconds, size_t requests_inc, size_t bytes_inc);
+
+    void clear();
+
+    void report(size_t concurrency);
+    void writeJSON(DB::WriteBuffer & out, size_t concurrency, int64_t start_timestamp);
 };
 
 
-void report(std::shared_ptr<Stats> & info, size_t concurrency);
diff --git a/utils/keeper-bench/example.yaml b/utils/keeper-bench/example.yaml
new file mode 100644
index 00000000000..e800e923482
--- /dev/null
+++ b/utils/keeper-bench/example.yaml
@@ -0,0 +1,117 @@
+concurrency: 20
+iterations: 10000
+delay: 4
+timelimit: 300
+continue_on_errors: true
+
+connections:
+  operation_timeout_ms: 3000
+  connection_timeout_ms: 40000
+
+  connection:
+    secure: false
+    operation_timeout_ms: 2000
+    session_timeout_ms: 2000
+    connection_timeout_ms: 50000
+    host: "localhost:9181"
+    sessions: 1
+
+  host: "localhost:9181"
+
+generator:
+  setup:
+    node:
+      name: "test3"
+    node:
+      name: "test_create"
+    node:
+      name: "test4"
+    node:
+      name: "test"
+      data: "somedata"
+      node:
+        repeat: 4
+        name:
+          random_string:
+            size: 15
+        data:
+          random_string:
+            size:
+              min_value: 10
+              max_value: 20
+      node:
+        repeat: 2
+        node:
+          repeat: 2
+          name:
+            random_string:
+              size: 12
+        name:
+          random_string:
+            size: 15
+        data:
+          random_string:
+            size:
+              min_value: 10
+              max_value: 20
+    node:
+      name: "test2"
+      data: "somedata"
+  requests:
+    create:
+      path: "/test_create"
+      name_length: 10
+      remove_factor: 0.5
+    multi:
+      size: 20
+      create:
+        path: "/test"
+        data:
+          random_string:
+            size:
+              min_value: 10
+              max_value: 20
+        remove_factor: 0.8
+      set:
+        weight: 2
+        path: 
+          - "/test3"
+          - "/test4"
+        path:
+          children_of: "/test"
+        data:
+          random_string:
+            size: 10
+    get:
+      path: 
+        - "/test3"
+        - "/test4"
+      path:
+        children_of: "/test"
+
+    multi:
+      weight: 10
+      get:
+        path: 
+          - "/test3"
+          - "/test4"
+        path:
+          children_of: "/test"
+      list:
+        path: 
+          - "/test3"
+        path:
+          children_of: "/test"
+
+    list:
+      path: 
+        - "/test3"
+        - "/test4"
+      path:
+        children_of: "/test"
+
+output:
+  file: 
+    path: "output.json"
+    with_timestamp: true
+  stdout: true
diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp
index 39af28e7580..0753d66850f 100644
--- a/utils/keeper-bench/main.cpp
+++ b/utils/keeper-bench/main.cpp
@@ -3,10 +3,24 @@
 #include "Runner.h"
 #include "Stats.h"
 #include "Generator.h"
+#include "Common/Exception.h"
 #include <Common/TerminalSize.h>
 #include <Core/Types.h>
+#include <boost/program_options/variables_map.hpp>
 
-using namespace std;
+namespace
+{
+
+template <typename T>
+std::optional<T> valueToOptional(const boost::program_options::variable_value & value)
+{
+    if (value.empty())
+        return std::nullopt;
+
+    return value.as<T>();
+}
+
+}
 
 int main(int argc, char *argv[])
 {
@@ -19,15 +33,14 @@ int main(int argc, char *argv[])
 
         boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
         desc.add_options()
-            ("help",                                                            "produce help message")
-            ("generator",     value<std::string>()->default_value("set_small_data"),             "query to execute")
-            ("concurrency,c", value<unsigned>()->default_value(1),              "number of parallel queries")
-            ("delay,d",       value<double>()->default_value(1),                "delay between intermediate reports in seconds (set 0 to disable reports)")
-            ("iterations,i",  value<size_t>()->default_value(0),                "amount of queries to be executed")
-            ("timelimit,t",   value<double>()->default_value(0.),               "stop launch of queries after specified time limit")
-            ("hosts,h",       value<Strings>()->multitoken(),                   "")
+            ("help",                                                                         "produce help message")
+            ("config",         value<std::string>()->default_value(""),                      "yaml/xml file containing configuration")
+            ("concurrency,c",  value<unsigned>(),                                            "number of parallel queries")
+            ("report-delay,d", value<double>(),                                              "delay between intermediate reports in seconds (set 0 to disable reports)")
+            ("iterations,i",   value<size_t>(),                                              "amount of queries to be executed")
+            ("time-limit,t",   value<double>(),                                              "stop launch of queries after specified time limit")
+            ("hosts,h",        value<Strings>()->multitoken()->default_value(Strings{}, ""), "")
             ("continue_on_errors", "continue testing even if a query fails")
-            ("reconnect", "establish new connection for every query")
         ;
 
         boost::program_options::variables_map options;
@@ -41,15 +54,22 @@ int main(int argc, char *argv[])
             return 1;
         }
 
-        Runner runner(options["concurrency"].as<unsigned>(),
-            options["generator"].as<std::string>(),
-            options["hosts"].as<Strings>(),
-            options["timelimit"].as<double>(),
-            options["delay"].as<double>(),
-            options.count("continue_on_errors"),
-            options["iterations"].as<size_t>());
+        Runner runner(valueToOptional<unsigned>(options["concurrency"]),
+                      options["config"].as<std::string>(),
+                      options["hosts"].as<Strings>(),
+                      valueToOptional<double>(options["time-limit"]),
+                      valueToOptional<double>(options["report-delay"]),
+                      options.count("continue_on_errors") ? std::optional<bool>(true) : std::nullopt,
+                      valueToOptional<size_t>(options["iterations"]));
 
-        runner.runBenchmark();
+        try
+        {
+            runner.runBenchmark();
+        }
+        catch (const DB::Exception & e)
+        {
+            std::cout << "Got exception while trying to run benchmark: " << e.message() << std::endl;
+        }
 
         return 0;
     }
diff --git a/utils/list-licenses/list-licenses.sh b/utils/list-licenses/list-licenses.sh
index db3eb5e59e8..dd23e6321c8 100755
--- a/utils/list-licenses/list-licenses.sh
+++ b/utils/list-licenses/list-licenses.sh
@@ -40,14 +40,21 @@ ls -1 -d ${LIBS_PATH}/*/ | ${GREP_CMD} -F -v -- '-cmake' | LC_ALL=C sort | while
          ${GREP_CMD} -q -i -F 'Altered source versions must be plainly marked as such' "$LIB_LICENSE" &&
          ${GREP_CMD} -q -i -F 'This notice may not be removed or altered' "$LIB_LICENSE" &&
          echo "zLib") ||
+        (${GREP_CMD} -q -i -F 'This program, "bzip2", the associated library "libbzip2"' "$LIB_LICENSE" &&
+         echo "bzip2") ||
         (${GREP_CMD} -q -i -F 'Permission is hereby granted, free of charge, to any person' "$LIB_LICENSE" &&
-         ${GREP_CMD} -q -i -F 'The above copyright notice and this permission notice shall be included' "$LIB_LICENSE" &&
+         ${GREP_CMD} -q -i -F 'The above copyright notice and this permission notice shall be' "$LIB_LICENSE" &&
          ${GREP_CMD} -q -i -F 'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND' "$LIB_LICENSE" &&
          echo "MIT") ||
+        (${GREP_CMD} -q -F 'PostgreSQL' "$LIB_LICENSE" &&
+         echo "PostgreSQL") ||
         (${GREP_CMD} -q -i -F 'Permission to use, copy, modify, and distribute this software for any purpose' "$LIB_LICENSE" &&
          ${GREP_CMD} -q -i -F 'the name of a copyright holder shall not' "$LIB_LICENSE" &&
          ${GREP_CMD} -q -i -F 'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND' "$LIB_LICENSE" &&
          echo "MIT/curl") ||
+        (${GREP_CMD} -q -i -F 'OpenLDAP Public License' "$LIB_LICENSE" &&
+         ${GREP_CMD} -q -i -F 'Version 2.8' "$LIB_LICENSE" &&
+         echo "OpenLDAP Version 2.8") ||
         (${GREP_CMD} -q -i -F 'Redistributions of source code must retain the above copyright' "$LIB_LICENSE" &&
          ${GREP_CMD} -q -i -F 'Redistributions in binary form must reproduce' "$LIB_LICENSE" &&
          ${GREP_CMD} -q -i -F 'Neither the name' "$LIB_LICENSE" &&
@@ -55,6 +62,14 @@ ls -1 -d ${LIBS_PATH}/*/ | ${GREP_CMD} -F -v -- '-cmake' | LC_ALL=C sort | while
         (${GREP_CMD} -q -i -F 'Redistributions of source code must retain the above copyright' "$LIB_LICENSE" &&
          ${GREP_CMD} -q -i -F 'Redistributions in binary form must reproduce' "$LIB_LICENSE" &&
          echo "BSD 2-clause") ||
+        (${GREP_CMD} -q -i -F 'Permission to use, copy, modify, and distribute this software' "$LIB_LICENSE" &&
+         ${GREP_CMD} -q -i -F 'documentation for any purpose and without fee is hereby granted' "$LIB_LICENSE" &&
+         ${GREP_CMD} -q -i -F 'the above copyright notice appear in all copies and that both that copyright' "$LIB_LICENSE" &&
+         ${GREP_CMD} -q -i -F 'notice and this permission notice appear in supporting documentation' "$LIB_LICENSE" &&
+         ${GREP_CMD} -q -i -F 'not be used in advertising or publicity pertaining' "$LIB_LICENSE" &&
+         ${GREP_CMD} -q -i -F 'distribution of the software without specific, written prior permission' "$LIB_LICENSE" &&
+         ${GREP_CMD} -q -i -F 'makes no representations about the suitability of this software' "$LIB_LICENSE" &&
+         echo "HPND") ||
         echo "Unknown")
 
         RELATIVE_PATH=$(echo "$LIB_LICENSE" | sed -r -e 's!^.+/contrib/!/contrib/!')
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index f3aabeec87e..653a0cd5388 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,3 +1,5 @@
+v23.4.2.11-stable	2023-05-02
+v23.4.1.1943-stable	2023-04-27
 v23.3.2.37-lts	2023-04-22
 v23.3.1.2823-lts	2023-03-31
 v23.2.6.34-stable	2023-04-23
diff --git a/utils/security-generator/generate_security.py b/utils/security-generator/generate_security.py
index d25612e8bc6..83180ccce1c 100755
--- a/utils/security-generator/generate_security.py
+++ b/utils/security-generator/generate_security.py
@@ -48,17 +48,20 @@ A public disclosure date is negotiated by the ClickHouse maintainers and the bug
 """
 
 
-def generate_supported_versions():
+def generate_supported_versions() -> str:
     with open(VERSIONS_FILE, "r", encoding="utf-8") as fd:
         versions = [line.split(maxsplit=1)[0][1:] for line in fd.readlines()]
 
     # The versions in VERSIONS_FILE are ordered ascending, so the first one is
     # the greatest one. We may have supported versions in the previous year
-    unsupported_year = int(versions[0].split(".", maxsplit=1)[0]) - 2
-    # 3 supported versions
-    supported = []  # type: List[str]
-    # 2 LTS versions, one of them could be in supported
+    greatest_year = int(versions[0].split(".", maxsplit=1)[0])
+    unsupported_year = greatest_year - 2
+    # 3 regular versions
+    regular = []  # type: List[str]
+    max_regular = 3
+    # 2 LTS versions, one of them could be in regular
     lts = []  # type: List[str]
+    max_lts = 2
     # The rest are unsupported
     unsupported = []  # type: List[str]
     table = [
@@ -69,18 +72,21 @@ def generate_supported_versions():
         year = int(version.split(".")[0])
         month = int(version.split(".")[1])
         version = f"{year}.{month}"
-        if version in supported or version in lts:
+        to_append = ""
+        if version in regular or version in lts:
             continue
-        if len(supported) < 3:
-            supported.append(version)
-            if len(lts) < 2 and month in [3, 8]:
-                # The version can be LTS as well
-                lts.append(version)
-            table.append(f"| {version} | ✔️ |")
-            continue
-        if len(lts) < 2 and month in [3, 8]:
+        if len(regular) < max_regular:
+            regular.append(version)
+            to_append = f"| {version} | ✔️ |"
+        if len(lts) < max_lts and month in [3, 8]:
             lts.append(version)
-            table.append(f"| {version} | ✔️ |")
+            to_append = f"| {version} | ✔️ |"
+        if to_append:
+            if len(regular) == max_regular and len(lts) == max_lts:
+                # if we reached the max number of supported versions, the rest
+                # are unsopported, so year.* will be used
+                unsupported_year = min(greatest_year - 1, year)
+            table.append(to_append)
             continue
         if year <= unsupported_year:
             # The whole year is unsopported
@@ -92,7 +98,7 @@ def generate_supported_versions():
     return "\n".join(table) + "\n"
 
 
-def main():
+def main() -> None:
     print(HEADER)
     print(generate_supported_versions())
     print(FOOTER)
diff --git a/utils/tests-visualizer/index.html b/utils/tests-visualizer/index.html
index 11b2d6504e4..b2db5dbed33 100644
--- a/utils/tests-visualizer/index.html
+++ b/utils/tests-visualizer/index.html
@@ -20,9 +20,7 @@
             width: 130px;
             display: block;
             margin: 30px auto;
-            -webkit-animation: spin 2s ease-in-out infinite;
-            -moz-animation: spin 2s ease-in-out infinite;
-            animation: spin 2s ease-in-out infinite;
+            animation: spin 10s ease-in-out infinite;
         }
 
         h1 {
@@ -45,16 +43,9 @@
             cursor: pointer;
         }
 
-        @-moz-keyframes spin {
-            100% { -moz-transform: rotate(360deg); }
-        }
-
-        @-webkit-keyframes spin {
-            100% { -webkit-transform: rotate(360deg); }
-        }
-
         @keyframes spin {
-            100% { transform:rotate(360deg); }
+            50% { transform:scale(150%); }
+            100% { transform:scale(100%); }
         }
     </style>
 </head>
@@ -67,33 +58,26 @@
 </div>
 <canvas id="canvas"></canvas>
 <script type="text/javascript">
-///////////////////////
-//  GLOBAL VARIABLES
-///////////////////////
-let start_date = '2020-06-13';
+let start_date = '2021-12-01';
 const canvasNode = document.getElementById('canvas');
 const infoNode = document.getElementById('info');
 const loadingNode = document.getElementById('loading');
 const failMessageNode = document.getElementById('fail-message');
 
-///////////////////////
-//      QUERIES
-///////////////////////
 let render_data_query = `
     WITH '${start_date}'::Date AS start_date
     SELECT groupArray([d, n, fail]) FROM
     (
         SELECT n, check_start_time::Date - start_date AS d, max(test_status LIKE 'F%' OR test_status LIKE 'E%') AS fail
-        FROM "default".checks
+        FROM checks
 
         INNER JOIN
         (
-            SELECT test_name, toUInt16(rowNumberInAllBlocks()) AS n FROM
+            SELECT test_name, toUInt16(row_number() OVER (ORDER BY test_name)) AS n FROM
             (
                 SELECT DISTINCT test_name
-                FROM "default".checks
+                FROM checks
                 WHERE match(test_name, '^\\d+_') AND check_name ILIKE '%stateless%' AND check_start_time > now() - INTERVAL 1 DAY
-                ORDER BY test_name
             )
         ) AS nums
 
@@ -109,25 +93,20 @@ let render_data_query = `
     FORMAT TSV`;
 
 let test_names_query = `
-    SELECT test_name, toUInt16(rowNumberInAllBlocks()) AS n FROM
+    SELECT test_name, toUInt16(row_number() OVER (ORDER BY test_name)) AS n FROM
     (
         SELECT DISTINCT test_name
-        FROM "default".checks
+        FROM checks
         WHERE match(test_name, '^\\d+_') AND check_name ILIKE '%stateless%' AND check_start_time > now() - INTERVAL 1 DAY
-        ORDER BY test_name
     ) FORMAT JSONCompact`;
 
-///////////////////////
-//       MAIN
-///////////////////////
-
 (async () => {
     try {
         const [render_data, test_names_data]  = await Promise.all([
             loadDataByQuery(render_data_query),
             loadDataByQuery(test_names_query),
         ]);
-        // we good
+
         renderResponse(render_data);
         saveTestNames(test_names_data);
     } catch (e) {
@@ -138,10 +117,6 @@ let test_names_query = `
     }
 })()
 
-///////////////////////
-//  SPECIAL FUNCTIONS
-///////////////////////
-
 async function loadDataByQuery(query) {
     const response = await fetch(
         "https://play.clickhouse.com?user=play&add_http_cors_header=1",